diff options
Diffstat (limited to 'sys/arm64/arm64')
52 files changed, 26865 insertions, 0 deletions
diff --git a/sys/arm64/arm64/autoconf.c b/sys/arm64/arm64/autoconf.c new file mode 100644 index 000000000000..9788c789cfc4 --- /dev/null +++ b/sys/arm64/arm64/autoconf.c @@ -0,0 +1,92 @@ +/*- + * Copyright (c) 2015 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Andrew Turner under + * sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +/* + * Setup the system to run on the current machine. + * + * Configure() is called at boot time and initializes the vba + * device tables and the memory controller monitoring. Available + * devices are determined (from possibilities mentioned in ioconf.c), + * and the drivers are initialized. + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/bus.h> +#include <sys/cons.h> +#include <sys/kernel.h> + +#include <machine/intr.h> + +static void configure_first(void *); +static void configure(void *); +static void configure_final(void *); + +SYSINIT(configure1, SI_SUB_CONFIGURE, SI_ORDER_FIRST, configure_first, NULL); +/* SI_ORDER_SECOND is hookable */ +SYSINIT(configure2, SI_SUB_CONFIGURE, SI_ORDER_THIRD, configure, NULL); +/* SI_ORDER_MIDDLE is hookable */ +SYSINIT(configure3, SI_SUB_CONFIGURE, SI_ORDER_ANY, configure_final, NULL); + +/* + * Determine i/o configuration for a machine. + */ +static void +configure_first(void *dummy) +{ + + /* nexus0 is the top of the device tree */ + device_add_child(root_bus, "nexus", 0); +} + +static void +configure(void *dummy) +{ + + /* initialize new bus architecture */ + root_bus_configure(); +} + +static void +configure_final(void *dummy) +{ + + /* Enable interrupt reception on this CPU */ + intr_enable(); + cninit_finish(); + + if (bootverbose) + printf("Device configuration finished.\n"); + + cold = 0; +} diff --git a/sys/arm64/arm64/bus_machdep.c b/sys/arm64/arm64/bus_machdep.c new file mode 100644 index 000000000000..1fabb91c575f --- /dev/null +++ b/sys/arm64/arm64/bus_machdep.c @@ -0,0 +1,230 @@ +/*- + * Copyright (c) 2014 Andrew Turner + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#define KCSAN_RUNTIME + +#include "opt_platform.h" + +#include <sys/param.h> +__FBSDID("$FreeBSD$"); + +#include <vm/vm.h> +#include <vm/pmap.h> + +#include <machine/bus.h> + +uint8_t generic_bs_r_1(void *, bus_space_handle_t, bus_size_t); +uint16_t generic_bs_r_2(void *, bus_space_handle_t, bus_size_t); +uint32_t generic_bs_r_4(void *, bus_space_handle_t, bus_size_t); +uint64_t generic_bs_r_8(void *, bus_space_handle_t, bus_size_t); + +void generic_bs_rm_1(void *, bus_space_handle_t, bus_size_t, uint8_t *, + bus_size_t); +void generic_bs_rm_2(void *, bus_space_handle_t, bus_size_t, uint16_t *, + bus_size_t); +void generic_bs_rm_4(void *, bus_space_handle_t, bus_size_t, uint32_t *, + bus_size_t); +void generic_bs_rm_8(void *, bus_space_handle_t, bus_size_t, uint64_t *, + bus_size_t); + +void generic_bs_rr_1(void *, bus_space_handle_t, bus_size_t, uint8_t *, + bus_size_t); +void generic_bs_rr_2(void *, bus_space_handle_t, bus_size_t, uint16_t *, + bus_size_t); +void generic_bs_rr_4(void *, bus_space_handle_t, bus_size_t, uint32_t *, + bus_size_t); +void generic_bs_rr_8(void *, bus_space_handle_t, bus_size_t, uint64_t *, + bus_size_t); + +void generic_bs_w_1(void *, bus_space_handle_t, bus_size_t, uint8_t); +void generic_bs_w_2(void *, bus_space_handle_t, bus_size_t, uint16_t); +void generic_bs_w_4(void *, bus_space_handle_t, bus_size_t, uint32_t); +void generic_bs_w_8(void *, bus_space_handle_t, bus_size_t, uint64_t); + +void generic_bs_wm_1(void *, bus_space_handle_t, bus_size_t, const uint8_t *, + bus_size_t); +void generic_bs_wm_2(void *, bus_space_handle_t, bus_size_t, const uint16_t *, + bus_size_t); +void generic_bs_wm_4(void *, bus_space_handle_t, bus_size_t, const uint32_t *, + bus_size_t); +void generic_bs_wm_8(void *, bus_space_handle_t, bus_size_t, const uint64_t *, + bus_size_t); + +void generic_bs_wr_1(void *, bus_space_handle_t, bus_size_t, const uint8_t *, + bus_size_t); +void generic_bs_wr_2(void *, bus_space_handle_t, bus_size_t, const uint16_t *, + bus_size_t); +void generic_bs_wr_4(void *, bus_space_handle_t, bus_size_t, const uint32_t *, + bus_size_t); +void generic_bs_wr_8(void *, bus_space_handle_t, bus_size_t, const uint64_t *, + bus_size_t); + +static int +generic_bs_map(void *t, bus_addr_t bpa, bus_size_t size, int flags, + bus_space_handle_t *bshp) +{ + void *va; + + va = pmap_mapdev(bpa, size); + if (va == NULL) + return (ENOMEM); + *bshp = (bus_space_handle_t)va; + return (0); +} + +static void +generic_bs_unmap(void *t, bus_space_handle_t bsh, bus_size_t size) +{ + + pmap_unmapdev(bsh, size); +} + +static void +generic_bs_barrier(void *t, bus_space_handle_t bsh, bus_size_t offset, + bus_size_t size, int flags) +{ +} + +static int +generic_bs_subregion(void *t, bus_space_handle_t bsh, bus_size_t offset, + bus_size_t size, bus_space_handle_t *nbshp) +{ + + *nbshp = bsh + offset; + return (0); +} + +struct bus_space memmap_bus = { + /* cookie */ + .bs_cookie = NULL, + + /* mapping/unmapping */ + .bs_map = generic_bs_map, + .bs_unmap = generic_bs_unmap, + .bs_subregion = generic_bs_subregion, + + /* allocation/deallocation */ + .bs_alloc = NULL, + .bs_free = NULL, + + /* barrier */ + .bs_barrier = generic_bs_barrier, + + /* read single */ + .bs_r_1 = generic_bs_r_1, + .bs_r_2 = generic_bs_r_2, + .bs_r_4 = generic_bs_r_4, + .bs_r_8 = generic_bs_r_8, + + /* read multiple */ + .bs_rm_1 = generic_bs_rm_1, + .bs_rm_2 = generic_bs_rm_2, + .bs_rm_4 = generic_bs_rm_4, + .bs_rm_8 = generic_bs_rm_8, + + /* read region */ + .bs_rr_1 = generic_bs_rr_1, + .bs_rr_2 = generic_bs_rr_2, + .bs_rr_4 = generic_bs_rr_4, + .bs_rr_8 = generic_bs_rr_8, + + /* write single */ + .bs_w_1 = generic_bs_w_1, + .bs_w_2 = generic_bs_w_2, + .bs_w_4 = generic_bs_w_4, + .bs_w_8 = generic_bs_w_8, + + /* write multiple */ + .bs_wm_1 = generic_bs_wm_1, + .bs_wm_2 = generic_bs_wm_2, + .bs_wm_4 = generic_bs_wm_4, + .bs_wm_8 = generic_bs_wm_8, + + /* write region */ + .bs_wr_1 = generic_bs_wr_1, + .bs_wr_2 = generic_bs_wr_2, + .bs_wr_4 = generic_bs_wr_4, + .bs_wr_8 = generic_bs_wr_8, + + /* set multiple */ + .bs_sm_1 = NULL, + .bs_sm_2 = NULL, + .bs_sm_4 = NULL, + .bs_sm_8 = NULL, + + /* set region */ + .bs_sr_1 = NULL, + .bs_sr_2 = NULL, + .bs_sr_4 = NULL, + .bs_sr_8 = NULL, + + /* copy */ + .bs_c_1 = NULL, + .bs_c_2 = NULL, + .bs_c_4 = NULL, + .bs_c_8 = NULL, + + /* read single stream */ + .bs_r_1_s = NULL, + .bs_r_2_s = NULL, + .bs_r_4_s = NULL, + .bs_r_8_s = NULL, + + /* read multiple stream */ + .bs_rm_1_s = generic_bs_rm_1, + .bs_rm_2_s = generic_bs_rm_2, + .bs_rm_4_s = generic_bs_rm_4, + .bs_rm_8_s = generic_bs_rm_8, + + /* read region stream */ + .bs_rr_1_s = NULL, + .bs_rr_2_s = NULL, + .bs_rr_4_s = NULL, + .bs_rr_8_s = NULL, + + /* write single stream */ + .bs_w_1_s = NULL, + .bs_w_2_s = NULL, + .bs_w_4_s = NULL, + .bs_w_8_s = NULL, + + /* write multiple stream */ + .bs_wm_1_s = generic_bs_wm_1, + .bs_wm_2_s = generic_bs_wm_2, + .bs_wm_4_s = generic_bs_wm_4, + .bs_wm_8_s = generic_bs_wm_8, + + /* write region stream */ + .bs_wr_1_s = NULL, + .bs_wr_2_s = NULL, + .bs_wr_4_s = NULL, + .bs_wr_8_s = NULL, +}; + +#ifdef FDT +bus_space_tag_t fdtbus_bs_tag = &memmap_bus; +#endif diff --git a/sys/arm64/arm64/bus_space_asm.S b/sys/arm64/arm64/bus_space_asm.S new file mode 100644 index 000000000000..d919bd5c61b1 --- /dev/null +++ b/sys/arm64/arm64/bus_space_asm.S @@ -0,0 +1,399 @@ +/*- + * Copyright (c) 2014 Andrew Turner + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include <machine/asm.h> + +__FBSDID("$FreeBSD$"); + +ENTRY(generic_bs_r_1) + ldrb w0, [x1, x2] + ret +END(generic_bs_r_1) + +ENTRY(generic_bs_r_2) + ldrh w0, [x1, x2] + ret +END(generic_bs_r_2) + +ENTRY(generic_bs_r_4) + ldr w0, [x1, x2] + ret +END(generic_bs_r_4) + +ENTRY(generic_bs_r_8) + ldr x0, [x1, x2] + ret +END(generic_bs_r_8) + +ENTRY(generic_bs_rm_1) + /* If there is anything to read. */ + cbz x4, 2f + + /* Calculate the device address. */ + add x0, x1, x2 + /* + * x0 = The device address. + * x3 = The kernel address. + * x4 = Count + */ + + /* Read the data. */ +1: ldrb w1, [x0] + strb w1, [x3], #1 + subs x4, x4, #1 + b.ne 1b + +2: ret +END(generic_bs_rm_1) + +ENTRY(generic_bs_rm_2) + /* If there is anything to read. */ + cbz x4, 2f + + /* Calculate the device address. */ + add x0, x1, x2 + /* + * x0 = The device address. + * x3 = The kernel address. + * x4 = Count + */ + + /* Read the data. */ +1: ldrh w1, [x0] + strh w1, [x3], #2 + subs x4, x4, #1 + b.ne 1b + +2: ret +END(generic_bs_rm_2) + +ENTRY(generic_bs_rm_4) + /* If there is anything to read. */ + cbz x4, 2f + + /* Calculate the device address. */ + add x0, x1, x2 + /* + * x0 = The device address. + * x3 = The kernel address. + * x4 = Count + */ + + /* Read the data. */ +1: ldr w1, [x0] + str w1, [x3], #4 + subs x4, x4, #1 + b.ne 1b + +2: ret +END(generic_bs_rm_4) + +ENTRY(generic_bs_rm_8) + /* If there is anything to read. */ + cbz x4, 2f + + /* Calculate the device address. */ + add x0, x1, x2 + /* + * x0 = The device address. + * x3 = The kernel address. + * x4 = Count + */ + + /* Read the data. */ +1: ldr x1, [x0] + str x1, [x3], #8 + subs x4, x4, #1 + b.ne 1b + +2: ret +END(generic_bs_rm_8) + +ENTRY(generic_bs_rr_1) + /* Is there is anything to read. */ + cbz x4, 2f + + /* Calculate the device address. */ + add x0, x1, x2 + /* + * x0 = The device address. + * x3 = The kernel address. + * x4 = Count + */ + + /* Read the data. */ +1: ldrb w1, [x0], #1 + strb w1, [x3], #1 + subs x4, x4, #1 + b.ne 1b + +2: ret +END(generic_bs_rr_1) + +ENTRY(generic_bs_rr_2) + /* Is there is anything to read. */ + cbz x4, 2f + + /* Calculate the device address. */ + add x0, x1, x2 + /* + * x0 = The device address. + * x3 = The kernel address. + * x4 = Count + */ + + /* Read the data. */ +1: ldrh w1, [x0], #2 + strh w1, [x3], #2 + subs x4, x4, #1 + b.ne 1b + +2: ret +END(generic_bs_rr_2) + +ENTRY(generic_bs_rr_4) + /* Is there is anything to read. */ + cbz x4, 2f + + /* Calculate the device address. */ + add x0, x1, x2 + /* + * x0 = The device address. + * x3 = The kernel address. + * x4 = Count + */ + + /* Read the data. */ +1: ldr w1, [x0], #4 + str w1, [x3], #4 + subs x4, x4, #1 + b.ne 1b + +2: ret +END(generic_bs_rr_4) + +ENTRY(generic_bs_rr_8) + /* Is there is anything to read. */ + cbz x4, 2f + + /* Calculate the device address. */ + add x0, x1, x2 + /* + * x0 = The device address. + * x3 = The kernel address. + * x4 = Count + */ + + /* Read the data. */ +1: ldr x1, [x0], #8 + str x1, [x3], #8 + subs x4, x4, #1 + b.ne 1b + +2: ret +END(generic_bs_rr_8) + + +ENTRY(generic_bs_w_1) + strb w3, [x1, x2] + ret +END(generic_bs_w_1) + +ENTRY(generic_bs_w_2) + strh w3, [x1, x2] + ret +END(generic_bs_w_2) + +ENTRY(generic_bs_w_4) + str w3, [x1, x2] + ret +END(generic_bs_w_4) + +ENTRY(generic_bs_w_8) + str x3, [x1, x2] + ret +END(generic_bs_w_8) + +ENTRY(generic_bs_wm_1) + /* If there is anything to write. */ + cbz x4, 2f + + add x0, x1, x2 + /* + * x0 = The device address. + * x3 = The kernel address. + * x4 = Count + */ + + /* Write the data */ +1: ldrb w1, [x3], #1 + strb w1, [x0] + subs x4, x4, #1 + b.ne 1b + +2: ret +END(generic_bs_wm_1) + +ENTRY(generic_bs_wm_2) + /* If there is anything to write. */ + cbz x4, 2f + + add x0, x1, x2 + /* + * x0 = The device address. + * x3 = The kernel address. + * x4 = Count + */ + + /* Write the data */ +1: ldrh w1, [x3], #2 + strh w1, [x0] + subs x4, x4, #1 + b.ne 1b + +2: ret +END(generic_bs_wm_2) + +ENTRY(generic_bs_wm_4) + /* If there is anything to write. */ + cbz x4, 2f + + add x0, x1, x2 + /* + * x0 = The device address. + * x3 = The kernel address. + * x4 = Count + */ + + /* Write the data */ +1: ldr w1, [x3], #4 + str w1, [x0] + subs x4, x4, #1 + b.ne 1b + +2: ret +END(generic_bs_wm_4) + +ENTRY(generic_bs_wm_8) + /* If there is anything to write. */ + cbz x4, 2f + + add x0, x1, x2 + /* + * x0 = The device address. + * x3 = The kernel address. + * x4 = Count + */ + + /* Write the data */ +1: ldr x1, [x3], #8 + str x1, [x0] + subs x4, x4, #1 + b.ne 1b + +2: ret +END(generic_bs_wm_8) + +ENTRY(generic_bs_wr_1) + /* Is there is anything to write. */ + cbz x4, 2f + + add x0, x1, x2 + /* + * x0 = The device address. + * x3 = The kernel address. + * x4 = Count + */ + + /* Write the data */ +1: ldrb w1, [x3], #1 + strb w1, [x0], #1 + subs x4, x4, #1 + b.ne 1b + +2: ret +END(generic_bs_wr_1) + +ENTRY(generic_bs_wr_2) + /* Is there is anything to write. */ + cbz x4, 2f + + add x0, x1, x2 + /* + * x0 = The device address. + * x3 = The kernel address. + * x4 = Count + */ + + /* Write the data */ +1: ldrh w1, [x3], #2 + strh w1, [x0], #2 + subs x4, x4, #1 + b.ne 1b + +2: ret +END(generic_bs_wr_2) + +ENTRY(generic_bs_wr_4) + /* Is there is anything to write. */ + cbz x4, 2f + + add x0, x1, x2 + /* + * x0 = The device address. + * x3 = The kernel address. + * x4 = Count + */ + + /* Write the data */ +1: ldr w1, [x3], #4 + str w1, [x0], #4 + subs x4, x4, #1 + b.ne 1b + +2: ret +END(generic_bs_wr_4) + +ENTRY(generic_bs_wr_8) + /* Is there is anything to write. */ + cbz x4, 2f + + add x0, x1, x2 + /* + * x0 = The device address. + * x3 = The kernel address. + * x4 = Count + */ + + /* Write the data */ +1: ldr x1, [x3], #8 + str x1, [x0], #8 + subs x4, x4, #1 + b.ne 1b + +2: ret +END(generic_bs_wr_8) diff --git a/sys/arm64/arm64/busdma_bounce.c b/sys/arm64/arm64/busdma_bounce.c new file mode 100644 index 000000000000..9d737d5c9021 --- /dev/null +++ b/sys/arm64/arm64/busdma_bounce.c @@ -0,0 +1,1357 @@ +/*- + * Copyright (c) 1997, 1998 Justin T. Gibbs. + * Copyright (c) 2015-2016 The FreeBSD Foundation + * All rights reserved. + * + * Portions of this software were developed by Andrew Turner + * under sponsorship of the FreeBSD Foundation. + * + * Portions of this software were developed by Semihalf + * under sponsorship of the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification, immediately at the beginning of the file. + * 2. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/malloc.h> +#include <sys/bus.h> +#include <sys/interrupt.h> +#include <sys/kernel.h> +#include <sys/ktr.h> +#include <sys/lock.h> +#include <sys/proc.h> +#include <sys/memdesc.h> +#include <sys/mutex.h> +#include <sys/sysctl.h> +#include <sys/uio.h> + +#include <vm/vm.h> +#include <vm/vm_extern.h> +#include <vm/vm_kern.h> +#include <vm/vm_page.h> +#include <vm/vm_map.h> + +#include <machine/atomic.h> +#include <machine/bus.h> +#include <machine/md_var.h> +#include <arm64/include/bus_dma_impl.h> + +#define MAX_BPAGES 4096 + +enum { + BF_COULD_BOUNCE = 0x01, + BF_MIN_ALLOC_COMP = 0x02, + BF_KMEM_ALLOC = 0x04, + BF_COHERENT = 0x10, +}; + +struct bounce_zone; + +struct bus_dma_tag { + struct bus_dma_tag_common common; + int map_count; + int bounce_flags; + bus_dma_segment_t *segments; + struct bounce_zone *bounce_zone; +}; + +struct bounce_page { + vm_offset_t vaddr; /* kva of bounce buffer */ + bus_addr_t busaddr; /* Physical address */ + vm_offset_t datavaddr; /* kva of client data */ + vm_page_t datapage; /* physical page of client data */ + vm_offset_t dataoffs; /* page offset of client data */ + bus_size_t datacount; /* client data count */ + STAILQ_ENTRY(bounce_page) links; +}; + +int busdma_swi_pending; + +struct bounce_zone { + STAILQ_ENTRY(bounce_zone) links; + STAILQ_HEAD(bp_list, bounce_page) bounce_page_list; + int total_bpages; + int free_bpages; + int reserved_bpages; + int active_bpages; + int total_bounced; + int total_deferred; + int map_count; + bus_size_t alignment; + bus_addr_t lowaddr; + char zoneid[8]; + char lowaddrid[20]; + struct sysctl_ctx_list sysctl_tree; + struct sysctl_oid *sysctl_tree_top; +}; + +static struct mtx bounce_lock; +static int total_bpages; +static int busdma_zonecount; +static STAILQ_HEAD(, bounce_zone) bounce_zone_list; + +static SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, + "Busdma parameters"); +SYSCTL_INT(_hw_busdma, OID_AUTO, total_bpages, CTLFLAG_RD, &total_bpages, 0, + "Total bounce pages"); + +struct sync_list { + vm_offset_t vaddr; /* kva of client data */ + bus_addr_t paddr; /* physical address */ + vm_page_t pages; /* starting page of client data */ + bus_size_t datacount; /* client data count */ +}; + +struct bus_dmamap { + struct bp_list bpages; + int pagesneeded; + int pagesreserved; + bus_dma_tag_t dmat; + struct memdesc mem; + bus_dmamap_callback_t *callback; + void *callback_arg; + STAILQ_ENTRY(bus_dmamap) links; + u_int flags; +#define DMAMAP_COULD_BOUNCE (1 << 0) +#define DMAMAP_FROM_DMAMEM (1 << 1) + int sync_count; + struct sync_list slist[]; +}; + +static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist; +static STAILQ_HEAD(, bus_dmamap) bounce_map_callbacklist; + +static void init_bounce_pages(void *dummy); +static int alloc_bounce_zone(bus_dma_tag_t dmat); +static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages); +static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, + int commit); +static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, + vm_offset_t vaddr, bus_addr_t addr, bus_size_t size); +static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage); +int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr); +static bool _bus_dmamap_pagesneeded(bus_dma_tag_t dmat, vm_paddr_t buf, + bus_size_t buflen, int *pagesneeded); +static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, + pmap_t pmap, void *buf, bus_size_t buflen, int flags); +static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, + vm_paddr_t buf, bus_size_t buflen, int flags); +static int _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map, + int flags); + +/* + * Allocate a device specific dma_tag. + */ +static int +bounce_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, + bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, + bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, + int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, + void *lockfuncarg, bus_dma_tag_t *dmat) +{ + bus_dma_tag_t newtag; + int error; + + *dmat = NULL; + error = common_bus_dma_tag_create(parent != NULL ? &parent->common : + NULL, alignment, boundary, lowaddr, highaddr, filter, filterarg, + maxsize, nsegments, maxsegsz, flags, lockfunc, lockfuncarg, + sizeof (struct bus_dma_tag), (void **)&newtag); + if (error != 0) + return (error); + + newtag->common.impl = &bus_dma_bounce_impl; + newtag->map_count = 0; + newtag->segments = NULL; + + if ((flags & BUS_DMA_COHERENT) != 0) + newtag->bounce_flags |= BF_COHERENT; + + if (parent != NULL) { + if ((newtag->common.filter != NULL || + (parent->bounce_flags & BF_COULD_BOUNCE) != 0)) + newtag->bounce_flags |= BF_COULD_BOUNCE; + + /* Copy some flags from the parent */ + newtag->bounce_flags |= parent->bounce_flags & BF_COHERENT; + } + + if (newtag->common.lowaddr < ptoa((vm_paddr_t)Maxmem) || + newtag->common.alignment > 1) + newtag->bounce_flags |= BF_COULD_BOUNCE; + + if (((newtag->bounce_flags & BF_COULD_BOUNCE) != 0) && + (flags & BUS_DMA_ALLOCNOW) != 0) { + struct bounce_zone *bz; + + /* Must bounce */ + if ((error = alloc_bounce_zone(newtag)) != 0) { + free(newtag, M_DEVBUF); + return (error); + } + bz = newtag->bounce_zone; + + if (ptoa(bz->total_bpages) < maxsize) { + int pages; + + pages = atop(round_page(maxsize)) - bz->total_bpages; + + /* Add pages to our bounce pool */ + if (alloc_bounce_pages(newtag, pages) < pages) + error = ENOMEM; + } + /* Performed initial allocation */ + newtag->bounce_flags |= BF_MIN_ALLOC_COMP; + } else + error = 0; + + if (error != 0) + free(newtag, M_DEVBUF); + else + *dmat = newtag; + CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", + __func__, newtag, (newtag != NULL ? newtag->common.flags : 0), + error); + return (error); +} + +static int +bounce_bus_dma_tag_destroy(bus_dma_tag_t dmat) +{ + bus_dma_tag_t dmat_copy, parent; + int error; + + error = 0; + dmat_copy = dmat; + + if (dmat != NULL) { + if (dmat->map_count != 0) { + error = EBUSY; + goto out; + } + while (dmat != NULL) { + parent = (bus_dma_tag_t)dmat->common.parent; + atomic_subtract_int(&dmat->common.ref_count, 1); + if (dmat->common.ref_count == 0) { + if (dmat->segments != NULL) + free(dmat->segments, M_DEVBUF); + free(dmat, M_DEVBUF); + /* + * Last reference count, so + * release our reference + * count on our parent. + */ + dmat = parent; + } else + dmat = NULL; + } + } +out: + CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error); + return (error); +} + +static bool +bounce_bus_dma_id_mapped(bus_dma_tag_t dmat, vm_paddr_t buf, bus_size_t buflen) +{ + + if ((dmat->bounce_flags & BF_COULD_BOUNCE) == 0) + return (true); + return (!_bus_dmamap_pagesneeded(dmat, buf, buflen, NULL)); +} + +static bus_dmamap_t +alloc_dmamap(bus_dma_tag_t dmat, int flags) +{ + u_long mapsize; + bus_dmamap_t map; + + mapsize = sizeof(*map); + mapsize += sizeof(struct sync_list) * dmat->common.nsegments; + map = malloc(mapsize, M_DEVBUF, flags | M_ZERO); + if (map == NULL) + return (NULL); + + /* Initialize the new map */ + STAILQ_INIT(&map->bpages); + + return (map); +} + +/* + * Allocate a handle for mapping from kva/uva/physical + * address space into bus device space. + */ +static int +bounce_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) +{ + struct bounce_zone *bz; + int error, maxpages, pages; + + error = 0; + + if (dmat->segments == NULL) { + dmat->segments = (bus_dma_segment_t *)malloc( + sizeof(bus_dma_segment_t) * dmat->common.nsegments, + M_DEVBUF, M_NOWAIT); + if (dmat->segments == NULL) { + CTR3(KTR_BUSDMA, "%s: tag %p error %d", + __func__, dmat, ENOMEM); + return (ENOMEM); + } + } + + *mapp = alloc_dmamap(dmat, M_NOWAIT); + if (*mapp == NULL) { + CTR3(KTR_BUSDMA, "%s: tag %p error %d", + __func__, dmat, ENOMEM); + return (ENOMEM); + } + + /* + * Bouncing might be required if the driver asks for an active + * exclusion region, a data alignment that is stricter than 1, and/or + * an active address boundary. + */ + if (dmat->bounce_flags & BF_COULD_BOUNCE) { + /* Must bounce */ + if (dmat->bounce_zone == NULL) { + if ((error = alloc_bounce_zone(dmat)) != 0) { + free(*mapp, M_DEVBUF); + return (error); + } + } + bz = dmat->bounce_zone; + + (*mapp)->flags = DMAMAP_COULD_BOUNCE; + + /* + * Attempt to add pages to our pool on a per-instance + * basis up to a sane limit. + */ + if (dmat->common.alignment > 1) + maxpages = MAX_BPAGES; + else + maxpages = MIN(MAX_BPAGES, Maxmem - + atop(dmat->common.lowaddr)); + if ((dmat->bounce_flags & BF_MIN_ALLOC_COMP) == 0 || + (bz->map_count > 0 && bz->total_bpages < maxpages)) { + pages = MAX(atop(dmat->common.maxsize), 1); + pages = MIN(maxpages - bz->total_bpages, pages); + pages = MAX(pages, 1); + if (alloc_bounce_pages(dmat, pages) < pages) + error = ENOMEM; + if ((dmat->bounce_flags & BF_MIN_ALLOC_COMP) + == 0) { + if (error == 0) { + dmat->bounce_flags |= + BF_MIN_ALLOC_COMP; + } + } else + error = 0; + } + bz->map_count++; + } + if (error == 0) + dmat->map_count++; + else + free(*mapp, M_DEVBUF); + CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", + __func__, dmat, dmat->common.flags, error); + return (error); +} + +/* + * Destroy a handle for mapping from kva/uva/physical + * address space into bus device space. + */ +static int +bounce_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map) +{ + + /* Check we are destroying the correct map type */ + if ((map->flags & DMAMAP_FROM_DMAMEM) != 0) + panic("bounce_bus_dmamap_destroy: Invalid map freed\n"); + + if (STAILQ_FIRST(&map->bpages) != NULL || map->sync_count != 0) { + CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, EBUSY); + return (EBUSY); + } + if (dmat->bounce_zone) { + KASSERT((map->flags & DMAMAP_COULD_BOUNCE) != 0, + ("%s: Bounce zone when cannot bounce", __func__)); + dmat->bounce_zone->map_count--; + } + free(map, M_DEVBUF); + dmat->map_count--; + CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat); + return (0); +} + +/* + * Allocate a piece of memory that can be efficiently mapped into + * bus device space based on the constraints lited in the dma tag. + * A dmamap to for use with dmamap_load is also allocated. + */ +static int +bounce_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, + bus_dmamap_t *mapp) +{ + /* + * XXX ARM64TODO: + * This bus_dma implementation requires IO-Coherent architecutre. + * If IO-Coherency is not guaranteed, the BUS_DMA_COHERENT flag has + * to be implented using non-cacheable memory. + */ + + vm_memattr_t attr; + int mflags; + + if (flags & BUS_DMA_NOWAIT) + mflags = M_NOWAIT; + else + mflags = M_WAITOK; + + if (dmat->segments == NULL) { + dmat->segments = (bus_dma_segment_t *)malloc( + sizeof(bus_dma_segment_t) * dmat->common.nsegments, + M_DEVBUF, mflags); + if (dmat->segments == NULL) { + CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", + __func__, dmat, dmat->common.flags, ENOMEM); + return (ENOMEM); + } + } + if (flags & BUS_DMA_ZERO) + mflags |= M_ZERO; + if (flags & BUS_DMA_NOCACHE) + attr = VM_MEMATTR_UNCACHEABLE; + else if ((flags & BUS_DMA_COHERENT) != 0 && + (dmat->bounce_flags & BF_COHERENT) == 0) + /* + * If we have a non-coherent tag, and are trying to allocate + * a coherent block of memory it needs to be uncached. + */ + attr = VM_MEMATTR_UNCACHEABLE; + else + attr = VM_MEMATTR_DEFAULT; + + /* + * Create the map, but don't set the could bounce flag as + * this allocation should never bounce; + */ + *mapp = alloc_dmamap(dmat, mflags); + if (*mapp == NULL) { + CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", + __func__, dmat, dmat->common.flags, ENOMEM); + return (ENOMEM); + } + (*mapp)->flags = DMAMAP_FROM_DMAMEM; + + /* + * Allocate the buffer from the malloc(9) allocator if... + * - It's small enough to fit into a single power of two sized bucket. + * - The alignment is less than or equal to the maximum size + * - The low address requirement is fulfilled. + * else allocate non-contiguous pages if... + * - The page count that could get allocated doesn't exceed + * nsegments also when the maximum segment size is less + * than PAGE_SIZE. + * - The alignment constraint isn't larger than a page boundary. + * - There are no boundary-crossing constraints. + * else allocate a block of contiguous pages because one or more of the + * constraints is something that only the contig allocator can fulfill. + * + * NOTE: The (dmat->common.alignment <= dmat->maxsize) check + * below is just a quick hack. The exact alignment guarantees + * of malloc(9) need to be nailed down, and the code below + * should be rewritten to take that into account. + * + * In the meantime warn the user if malloc gets it wrong. + */ + if ((dmat->common.maxsize <= PAGE_SIZE) && + (dmat->common.alignment <= dmat->common.maxsize) && + dmat->common.lowaddr >= ptoa((vm_paddr_t)Maxmem) && + attr == VM_MEMATTR_DEFAULT) { + *vaddr = malloc(dmat->common.maxsize, M_DEVBUF, mflags); + } else if (dmat->common.nsegments >= + howmany(dmat->common.maxsize, MIN(dmat->common.maxsegsz, PAGE_SIZE)) && + dmat->common.alignment <= PAGE_SIZE && + (dmat->common.boundary % PAGE_SIZE) == 0) { + /* Page-based multi-segment allocations allowed */ + *vaddr = (void *)kmem_alloc_attr(dmat->common.maxsize, mflags, + 0ul, dmat->common.lowaddr, attr); + dmat->bounce_flags |= BF_KMEM_ALLOC; + } else { + *vaddr = (void *)kmem_alloc_contig(dmat->common.maxsize, mflags, + 0ul, dmat->common.lowaddr, dmat->common.alignment != 0 ? + dmat->common.alignment : 1ul, dmat->common.boundary, attr); + dmat->bounce_flags |= BF_KMEM_ALLOC; + } + if (*vaddr == NULL) { + CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", + __func__, dmat, dmat->common.flags, ENOMEM); + free(*mapp, M_DEVBUF); + return (ENOMEM); + } else if (vtophys(*vaddr) & (dmat->common.alignment - 1)) { + printf("bus_dmamem_alloc failed to align memory properly.\n"); + } + dmat->map_count++; + CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", + __func__, dmat, dmat->common.flags, 0); + return (0); +} + +/* + * Free a piece of memory and it's allociated dmamap, that was allocated + * via bus_dmamem_alloc. Make the same choice for free/contigfree. + */ +static void +bounce_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) +{ + + /* + * Check the map came from bounce_bus_dmamem_alloc, so the map + * should be NULL and the BF_KMEM_ALLOC flag cleared if malloc() + * was used and set if kmem_alloc_contig() was used. + */ + if ((map->flags & DMAMAP_FROM_DMAMEM) == 0) + panic("bus_dmamem_free: Invalid map freed\n"); + if ((dmat->bounce_flags & BF_KMEM_ALLOC) == 0) + free(vaddr, M_DEVBUF); + else + kmem_free((vm_offset_t)vaddr, dmat->common.maxsize); + free(map, M_DEVBUF); + dmat->map_count--; + CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat, + dmat->bounce_flags); +} + +static bool +_bus_dmamap_pagesneeded(bus_dma_tag_t dmat, vm_paddr_t buf, bus_size_t buflen, + int *pagesneeded) +{ + bus_addr_t curaddr; + bus_size_t sgsize; + int count; + + /* + * Count the number of bounce pages needed in order to + * complete this transfer + */ + count = 0; + curaddr = buf; + while (buflen != 0) { + sgsize = MIN(buflen, dmat->common.maxsegsz); + if (bus_dma_run_filter(&dmat->common, curaddr)) { + sgsize = MIN(sgsize, + PAGE_SIZE - (curaddr & PAGE_MASK)); + if (pagesneeded == NULL) + return (true); + count++; + } + curaddr += sgsize; + buflen -= sgsize; + } + + if (pagesneeded != NULL) + *pagesneeded = count; + return (count != 0); +} + +static void +_bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, + bus_size_t buflen, int flags) +{ + + if ((map->flags & DMAMAP_COULD_BOUNCE) != 0 && map->pagesneeded == 0) { + _bus_dmamap_pagesneeded(dmat, buf, buflen, &map->pagesneeded); + CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded); + } +} + +static void +_bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, pmap_t pmap, + void *buf, bus_size_t buflen, int flags) +{ + vm_offset_t vaddr; + vm_offset_t vendaddr; + bus_addr_t paddr; + bus_size_t sg_len; + + if ((map->flags & DMAMAP_COULD_BOUNCE) != 0 && map->pagesneeded == 0) { + CTR4(KTR_BUSDMA, "lowaddr= %d Maxmem= %d, boundary= %d, " + "alignment= %d", dmat->common.lowaddr, + ptoa((vm_paddr_t)Maxmem), + dmat->common.boundary, dmat->common.alignment); + CTR2(KTR_BUSDMA, "map= %p, pagesneeded= %d", map, + map->pagesneeded); + /* + * Count the number of bounce pages + * needed in order to complete this transfer + */ + vaddr = (vm_offset_t)buf; + vendaddr = (vm_offset_t)buf + buflen; + + while (vaddr < vendaddr) { + sg_len = PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK); + if (pmap == kernel_pmap) + paddr = pmap_kextract(vaddr); + else + paddr = pmap_extract(pmap, vaddr); + if (bus_dma_run_filter(&dmat->common, paddr) != 0) { + sg_len = roundup2(sg_len, + dmat->common.alignment); + map->pagesneeded++; + } + vaddr += sg_len; + } + CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded); + } +} + +static int +_bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int flags) +{ + + /* Reserve Necessary Bounce Pages */ + mtx_lock(&bounce_lock); + if (flags & BUS_DMA_NOWAIT) { + if (reserve_bounce_pages(dmat, map, 0) != 0) { + mtx_unlock(&bounce_lock); + return (ENOMEM); + } + } else { + if (reserve_bounce_pages(dmat, map, 1) != 0) { + /* Queue us for resources */ + STAILQ_INSERT_TAIL(&bounce_map_waitinglist, map, links); + mtx_unlock(&bounce_lock); + return (EINPROGRESS); + } + } + mtx_unlock(&bounce_lock); + + return (0); +} + +/* + * Add a single contiguous physical range to the segment list. + */ +static bus_size_t +_bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t curaddr, + bus_size_t sgsize, bus_dma_segment_t *segs, int *segp) +{ + bus_addr_t baddr, bmask; + int seg; + + /* + * Make sure we don't cross any boundaries. + */ + bmask = ~(dmat->common.boundary - 1); + if (dmat->common.boundary > 0) { + baddr = (curaddr + dmat->common.boundary) & bmask; + if (sgsize > (baddr - curaddr)) + sgsize = (baddr - curaddr); + } + + /* + * Insert chunk into a segment, coalescing with + * previous segment if possible. + */ + seg = *segp; + if (seg == -1) { + seg = 0; + segs[seg].ds_addr = curaddr; + segs[seg].ds_len = sgsize; + } else { + if (curaddr == segs[seg].ds_addr + segs[seg].ds_len && + (segs[seg].ds_len + sgsize) <= dmat->common.maxsegsz && + (dmat->common.boundary == 0 || + (segs[seg].ds_addr & bmask) == (curaddr & bmask))) + segs[seg].ds_len += sgsize; + else { + if (++seg >= dmat->common.nsegments) + return (0); + segs[seg].ds_addr = curaddr; + segs[seg].ds_len = sgsize; + } + } + *segp = seg; + return (sgsize); +} + +/* + * Utility function to load a physical buffer. segp contains + * the starting segment on entrace, and the ending segment on exit. + */ +static int +bounce_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, + vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs, + int *segp) +{ + struct sync_list *sl; + bus_size_t sgsize; + bus_addr_t curaddr, sl_end; + int error; + + if (segs == NULL) + segs = dmat->segments; + + if ((dmat->bounce_flags & BF_COULD_BOUNCE) != 0) { + _bus_dmamap_count_phys(dmat, map, buf, buflen, flags); + if (map->pagesneeded != 0) { + error = _bus_dmamap_reserve_pages(dmat, map, flags); + if (error) + return (error); + } + } + + sl = map->slist + map->sync_count - 1; + sl_end = 0; + + while (buflen > 0) { + curaddr = buf; + sgsize = MIN(buflen, dmat->common.maxsegsz); + if (((dmat->bounce_flags & BF_COULD_BOUNCE) != 0) && + map->pagesneeded != 0 && + bus_dma_run_filter(&dmat->common, curaddr)) { + sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK)); + curaddr = add_bounce_page(dmat, map, 0, curaddr, + sgsize); + } else if ((dmat->bounce_flags & BF_COHERENT) == 0) { + if (map->sync_count > 0) + sl_end = sl->paddr + sl->datacount; + + if (map->sync_count == 0 || curaddr != sl_end) { + if (++map->sync_count > dmat->common.nsegments) + break; + sl++; + sl->vaddr = 0; + sl->paddr = curaddr; + sl->datacount = sgsize; + sl->pages = PHYS_TO_VM_PAGE(curaddr); + KASSERT(sl->pages != NULL, + ("%s: page at PA:0x%08lx is not in " + "vm_page_array", __func__, curaddr)); + } else + sl->datacount += sgsize; + } + sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, + segp); + if (sgsize == 0) + break; + buf += sgsize; + buflen -= sgsize; + } + + /* + * Did we fit? + */ + return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */ +} + +/* + * Utility function to load a linear buffer. segp contains + * the starting segment on entrace, and the ending segment on exit. + */ +static int +bounce_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, + bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs, + int *segp) +{ + struct sync_list *sl; + bus_size_t sgsize, max_sgsize; + bus_addr_t curaddr, sl_pend; + vm_offset_t kvaddr, vaddr, sl_vend; + int error; + + if (segs == NULL) + segs = dmat->segments; + + if ((dmat->bounce_flags & BF_COULD_BOUNCE) != 0) { + _bus_dmamap_count_pages(dmat, map, pmap, buf, buflen, flags); + if (map->pagesneeded != 0) { + error = _bus_dmamap_reserve_pages(dmat, map, flags); + if (error) + return (error); + } + } + + sl = map->slist + map->sync_count - 1; + vaddr = (vm_offset_t)buf; + sl_pend = 0; + sl_vend = 0; + + while (buflen > 0) { + /* + * Get the physical address for this segment. + */ + if (pmap == kernel_pmap) { + curaddr = pmap_kextract(vaddr); + kvaddr = vaddr; + } else { + curaddr = pmap_extract(pmap, vaddr); + kvaddr = 0; + } + + /* + * Compute the segment size, and adjust counts. + */ + max_sgsize = MIN(buflen, dmat->common.maxsegsz); + sgsize = PAGE_SIZE - (curaddr & PAGE_MASK); + if (((dmat->bounce_flags & BF_COULD_BOUNCE) != 0) && + map->pagesneeded != 0 && + bus_dma_run_filter(&dmat->common, curaddr)) { + sgsize = roundup2(sgsize, dmat->common.alignment); + sgsize = MIN(sgsize, max_sgsize); + curaddr = add_bounce_page(dmat, map, kvaddr, curaddr, + sgsize); + } else if ((dmat->bounce_flags & BF_COHERENT) == 0) { + sgsize = MIN(sgsize, max_sgsize); + if (map->sync_count > 0) { + sl_pend = sl->paddr + sl->datacount; + sl_vend = sl->vaddr + sl->datacount; + } + + if (map->sync_count == 0 || + (kvaddr != 0 && kvaddr != sl_vend) || + (curaddr != sl_pend)) { + if (++map->sync_count > dmat->common.nsegments) + goto cleanup; + sl++; + sl->vaddr = kvaddr; + sl->paddr = curaddr; + if (kvaddr != 0) { + sl->pages = NULL; + } else { + sl->pages = PHYS_TO_VM_PAGE(curaddr); + KASSERT(sl->pages != NULL, + ("%s: page at PA:0x%08lx is not " + "in vm_page_array", __func__, + curaddr)); + } + sl->datacount = sgsize; + } else + sl->datacount += sgsize; + } else { + sgsize = MIN(sgsize, max_sgsize); + } + sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, + segp); + if (sgsize == 0) + break; + vaddr += sgsize; + buflen -= sgsize; + } + +cleanup: + /* + * Did we fit? + */ + return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */ +} + +static void +bounce_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, + struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg) +{ + + if ((map->flags & DMAMAP_COULD_BOUNCE) == 0) + return; + map->mem = *mem; + map->dmat = dmat; + map->callback = callback; + map->callback_arg = callback_arg; +} + +static bus_dma_segment_t * +bounce_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map, + bus_dma_segment_t *segs, int nsegs, int error) +{ + + if (segs == NULL) + segs = dmat->segments; + return (segs); +} + +/* + * Release the mapping held by map. + */ +static void +bounce_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map) +{ + struct bounce_page *bpage; + + while ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { + STAILQ_REMOVE_HEAD(&map->bpages, links); + free_bounce_page(dmat, bpage); + } + + map->sync_count = 0; +} + +static void +dma_preread_safe(vm_offset_t va, vm_size_t size) +{ + /* + * Write back any partial cachelines immediately before and + * after the DMA region. + */ + if (va & (dcache_line_size - 1)) + cpu_dcache_wb_range(va, 1); + if ((va + size) & (dcache_line_size - 1)) + cpu_dcache_wb_range(va + size, 1); + + cpu_dcache_inv_range(va, size); +} + +static void +dma_dcache_sync(struct sync_list *sl, bus_dmasync_op_t op) +{ + uint32_t len, offset; + vm_page_t m; + vm_paddr_t pa; + vm_offset_t va, tempva; + bus_size_t size; + + offset = sl->paddr & PAGE_MASK; + m = sl->pages; + size = sl->datacount; + pa = sl->paddr; + + for ( ; size != 0; size -= len, pa += len, offset = 0, ++m) { + tempva = 0; + if (sl->vaddr == 0) { + len = min(PAGE_SIZE - offset, size); + tempva = pmap_quick_enter_page(m); + va = tempva | offset; + KASSERT(pa == (VM_PAGE_TO_PHYS(m) | offset), + ("unexpected vm_page_t phys: 0x%16lx != 0x%16lx", + VM_PAGE_TO_PHYS(m) | offset, pa)); + } else { + len = sl->datacount; + va = sl->vaddr; + } + + switch (op) { + case BUS_DMASYNC_PREWRITE: + case BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD: + cpu_dcache_wb_range(va, len); + break; + case BUS_DMASYNC_PREREAD: + /* + * An mbuf may start in the middle of a cacheline. There + * will be no cpu writes to the beginning of that line + * (which contains the mbuf header) while dma is in + * progress. Handle that case by doing a writeback of + * just the first cacheline before invalidating the + * overall buffer. Any mbuf in a chain may have this + * misalignment. Buffers which are not mbufs bounce if + * they are not aligned to a cacheline. + */ + dma_preread_safe(va, len); + break; + case BUS_DMASYNC_POSTREAD: + case BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE: + cpu_dcache_inv_range(va, len); + break; + default: + panic("unsupported combination of sync operations: " + "0x%08x\n", op); + } + + if (tempva != 0) + pmap_quick_remove_page(tempva); + } +} + +static void +bounce_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, + bus_dmasync_op_t op) +{ + struct bounce_page *bpage; + struct sync_list *sl, *end; + vm_offset_t datavaddr, tempvaddr; + + if (op == BUS_DMASYNC_POSTWRITE) + return; + + if ((op & BUS_DMASYNC_POSTREAD) != 0) { + /* + * Wait for any DMA operations to complete before the bcopy. + */ + dsb(sy); + } + + if ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { + CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x " + "performing bounce", __func__, dmat, dmat->common.flags, + op); + + if ((op & BUS_DMASYNC_PREWRITE) != 0) { + while (bpage != NULL) { + tempvaddr = 0; + datavaddr = bpage->datavaddr; + if (datavaddr == 0) { + tempvaddr = pmap_quick_enter_page( + bpage->datapage); + datavaddr = tempvaddr | bpage->dataoffs; + } + + bcopy((void *)datavaddr, + (void *)bpage->vaddr, bpage->datacount); + if (tempvaddr != 0) + pmap_quick_remove_page(tempvaddr); + if ((dmat->bounce_flags & BF_COHERENT) == 0) + cpu_dcache_wb_range(bpage->vaddr, + bpage->datacount); + bpage = STAILQ_NEXT(bpage, links); + } + dmat->bounce_zone->total_bounced++; + } else if ((op & BUS_DMASYNC_PREREAD) != 0) { + while (bpage != NULL) { + if ((dmat->bounce_flags & BF_COHERENT) == 0) + cpu_dcache_wbinv_range(bpage->vaddr, + bpage->datacount); + bpage = STAILQ_NEXT(bpage, links); + } + } + + if ((op & BUS_DMASYNC_POSTREAD) != 0) { + while (bpage != NULL) { + if ((dmat->bounce_flags & BF_COHERENT) == 0) + cpu_dcache_inv_range(bpage->vaddr, + bpage->datacount); + tempvaddr = 0; + datavaddr = bpage->datavaddr; + if (datavaddr == 0) { + tempvaddr = pmap_quick_enter_page( + bpage->datapage); + datavaddr = tempvaddr | bpage->dataoffs; + } + + bcopy((void *)bpage->vaddr, + (void *)datavaddr, bpage->datacount); + + if (tempvaddr != 0) + pmap_quick_remove_page(tempvaddr); + bpage = STAILQ_NEXT(bpage, links); + } + dmat->bounce_zone->total_bounced++; + } + } + + /* + * Cache maintenance for normal (non-COHERENT non-bounce) buffers. + */ + if (map->sync_count != 0) { + sl = &map->slist[0]; + end = &map->slist[map->sync_count]; + CTR3(KTR_BUSDMA, "%s: tag %p op 0x%x " + "performing sync", __func__, dmat, op); + + for ( ; sl != end; ++sl) + dma_dcache_sync(sl, op); + } + + if ((op & (BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE)) != 0) { + /* + * Wait for the bcopy to complete before any DMA operations. + */ + dsb(sy); + } +} + +static void +init_bounce_pages(void *dummy __unused) +{ + + total_bpages = 0; + STAILQ_INIT(&bounce_zone_list); + STAILQ_INIT(&bounce_map_waitinglist); + STAILQ_INIT(&bounce_map_callbacklist); + mtx_init(&bounce_lock, "bounce pages lock", NULL, MTX_DEF); +} +SYSINIT(bpages, SI_SUB_LOCK, SI_ORDER_ANY, init_bounce_pages, NULL); + +static struct sysctl_ctx_list * +busdma_sysctl_tree(struct bounce_zone *bz) +{ + + return (&bz->sysctl_tree); +} + +static struct sysctl_oid * +busdma_sysctl_tree_top(struct bounce_zone *bz) +{ + + return (bz->sysctl_tree_top); +} + +static int +alloc_bounce_zone(bus_dma_tag_t dmat) +{ + struct bounce_zone *bz; + + /* Check to see if we already have a suitable zone */ + STAILQ_FOREACH(bz, &bounce_zone_list, links) { + if ((dmat->common.alignment <= bz->alignment) && + (dmat->common.lowaddr >= bz->lowaddr)) { + dmat->bounce_zone = bz; + return (0); + } + } + + if ((bz = (struct bounce_zone *)malloc(sizeof(*bz), M_DEVBUF, + M_NOWAIT | M_ZERO)) == NULL) + return (ENOMEM); + + STAILQ_INIT(&bz->bounce_page_list); + bz->free_bpages = 0; + bz->reserved_bpages = 0; + bz->active_bpages = 0; + bz->lowaddr = dmat->common.lowaddr; + bz->alignment = MAX(dmat->common.alignment, PAGE_SIZE); + bz->map_count = 0; + snprintf(bz->zoneid, 8, "zone%d", busdma_zonecount); + busdma_zonecount++; + snprintf(bz->lowaddrid, 18, "%#jx", (uintmax_t)bz->lowaddr); + STAILQ_INSERT_TAIL(&bounce_zone_list, bz, links); + dmat->bounce_zone = bz; + + sysctl_ctx_init(&bz->sysctl_tree); + bz->sysctl_tree_top = SYSCTL_ADD_NODE(&bz->sysctl_tree, + SYSCTL_STATIC_CHILDREN(_hw_busdma), OID_AUTO, bz->zoneid, + CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); + if (bz->sysctl_tree_top == NULL) { + sysctl_ctx_free(&bz->sysctl_tree); + return (0); /* XXX error code? */ + } + + SYSCTL_ADD_INT(busdma_sysctl_tree(bz), + SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, + "total_bpages", CTLFLAG_RD, &bz->total_bpages, 0, + "Total bounce pages"); + SYSCTL_ADD_INT(busdma_sysctl_tree(bz), + SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, + "free_bpages", CTLFLAG_RD, &bz->free_bpages, 0, + "Free bounce pages"); + SYSCTL_ADD_INT(busdma_sysctl_tree(bz), + SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, + "reserved_bpages", CTLFLAG_RD, &bz->reserved_bpages, 0, + "Reserved bounce pages"); + SYSCTL_ADD_INT(busdma_sysctl_tree(bz), + SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, + "active_bpages", CTLFLAG_RD, &bz->active_bpages, 0, + "Active bounce pages"); + SYSCTL_ADD_INT(busdma_sysctl_tree(bz), + SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, + "total_bounced", CTLFLAG_RD, &bz->total_bounced, 0, + "Total bounce requests"); + SYSCTL_ADD_INT(busdma_sysctl_tree(bz), + SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, + "total_deferred", CTLFLAG_RD, &bz->total_deferred, 0, + "Total bounce requests that were deferred"); + SYSCTL_ADD_STRING(busdma_sysctl_tree(bz), + SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, + "lowaddr", CTLFLAG_RD, bz->lowaddrid, 0, ""); + SYSCTL_ADD_UAUTO(busdma_sysctl_tree(bz), + SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, + "alignment", CTLFLAG_RD, &bz->alignment, ""); + + return (0); +} + +static int +alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages) +{ + struct bounce_zone *bz; + int count; + + bz = dmat->bounce_zone; + count = 0; + while (numpages > 0) { + struct bounce_page *bpage; + + bpage = (struct bounce_page *)malloc(sizeof(*bpage), M_DEVBUF, + M_NOWAIT | M_ZERO); + + if (bpage == NULL) + break; + bpage->vaddr = (vm_offset_t)contigmalloc(PAGE_SIZE, M_DEVBUF, + M_NOWAIT, 0ul, bz->lowaddr, PAGE_SIZE, 0); + if (bpage->vaddr == 0) { + free(bpage, M_DEVBUF); + break; + } + bpage->busaddr = pmap_kextract(bpage->vaddr); + mtx_lock(&bounce_lock); + STAILQ_INSERT_TAIL(&bz->bounce_page_list, bpage, links); + total_bpages++; + bz->total_bpages++; + bz->free_bpages++; + mtx_unlock(&bounce_lock); + count++; + numpages--; + } + return (count); +} + +static int +reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit) +{ + struct bounce_zone *bz; + int pages; + + mtx_assert(&bounce_lock, MA_OWNED); + bz = dmat->bounce_zone; + pages = MIN(bz->free_bpages, map->pagesneeded - map->pagesreserved); + if (commit == 0 && map->pagesneeded > (map->pagesreserved + pages)) + return (map->pagesneeded - (map->pagesreserved + pages)); + bz->free_bpages -= pages; + bz->reserved_bpages += pages; + map->pagesreserved += pages; + pages = map->pagesneeded - map->pagesreserved; + + return (pages); +} + +static bus_addr_t +add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr, + bus_addr_t addr, bus_size_t size) +{ + struct bounce_zone *bz; + struct bounce_page *bpage; + + KASSERT(dmat->bounce_zone != NULL, ("no bounce zone in dma tag")); + KASSERT((map->flags & DMAMAP_COULD_BOUNCE) != 0, + ("add_bounce_page: bad map %p", map)); + + bz = dmat->bounce_zone; + if (map->pagesneeded == 0) + panic("add_bounce_page: map doesn't need any pages"); + map->pagesneeded--; + + if (map->pagesreserved == 0) + panic("add_bounce_page: map doesn't need any pages"); + map->pagesreserved--; + + mtx_lock(&bounce_lock); + bpage = STAILQ_FIRST(&bz->bounce_page_list); + if (bpage == NULL) + panic("add_bounce_page: free page list is empty"); + + STAILQ_REMOVE_HEAD(&bz->bounce_page_list, links); + bz->reserved_bpages--; + bz->active_bpages++; + mtx_unlock(&bounce_lock); + + if (dmat->common.flags & BUS_DMA_KEEP_PG_OFFSET) { + /* Page offset needs to be preserved. */ + bpage->vaddr |= addr & PAGE_MASK; + bpage->busaddr |= addr & PAGE_MASK; + } + bpage->datavaddr = vaddr; + bpage->datapage = PHYS_TO_VM_PAGE(addr); + bpage->dataoffs = addr & PAGE_MASK; + bpage->datacount = size; + STAILQ_INSERT_TAIL(&(map->bpages), bpage, links); + return (bpage->busaddr); +} + +static void +free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage) +{ + struct bus_dmamap *map; + struct bounce_zone *bz; + + bz = dmat->bounce_zone; + bpage->datavaddr = 0; + bpage->datacount = 0; + if (dmat->common.flags & BUS_DMA_KEEP_PG_OFFSET) { + /* + * Reset the bounce page to start at offset 0. Other uses + * of this bounce page may need to store a full page of + * data and/or assume it starts on a page boundary. + */ + bpage->vaddr &= ~PAGE_MASK; + bpage->busaddr &= ~PAGE_MASK; + } + + mtx_lock(&bounce_lock); + STAILQ_INSERT_HEAD(&bz->bounce_page_list, bpage, links); + bz->free_bpages++; + bz->active_bpages--; + if ((map = STAILQ_FIRST(&bounce_map_waitinglist)) != NULL) { + if (reserve_bounce_pages(map->dmat, map, 1) == 0) { + STAILQ_REMOVE_HEAD(&bounce_map_waitinglist, links); + STAILQ_INSERT_TAIL(&bounce_map_callbacklist, + map, links); + busdma_swi_pending = 1; + bz->total_deferred++; + swi_sched(vm_ih, 0); + } + } + mtx_unlock(&bounce_lock); +} + +void +busdma_swi(void) +{ + bus_dma_tag_t dmat; + struct bus_dmamap *map; + + mtx_lock(&bounce_lock); + while ((map = STAILQ_FIRST(&bounce_map_callbacklist)) != NULL) { + STAILQ_REMOVE_HEAD(&bounce_map_callbacklist, links); + mtx_unlock(&bounce_lock); + dmat = map->dmat; + (dmat->common.lockfunc)(dmat->common.lockfuncarg, BUS_DMA_LOCK); + bus_dmamap_load_mem(map->dmat, map, &map->mem, + map->callback, map->callback_arg, BUS_DMA_WAITOK); + (dmat->common.lockfunc)(dmat->common.lockfuncarg, + BUS_DMA_UNLOCK); + mtx_lock(&bounce_lock); + } + mtx_unlock(&bounce_lock); +} + +struct bus_dma_impl bus_dma_bounce_impl = { + .tag_create = bounce_bus_dma_tag_create, + .tag_destroy = bounce_bus_dma_tag_destroy, + .id_mapped = bounce_bus_dma_id_mapped, + .map_create = bounce_bus_dmamap_create, + .map_destroy = bounce_bus_dmamap_destroy, + .mem_alloc = bounce_bus_dmamem_alloc, + .mem_free = bounce_bus_dmamem_free, + .load_phys = bounce_bus_dmamap_load_phys, + .load_buffer = bounce_bus_dmamap_load_buffer, + .load_ma = bus_dmamap_load_ma_triv, + .map_waitok = bounce_bus_dmamap_waitok, + .map_complete = bounce_bus_dmamap_complete, + .map_unload = bounce_bus_dmamap_unload, + .map_sync = bounce_bus_dmamap_sync +}; diff --git a/sys/arm64/arm64/busdma_machdep.c b/sys/arm64/arm64/busdma_machdep.c new file mode 100644 index 000000000000..1a5ac67a2a4f --- /dev/null +++ b/sys/arm64/arm64/busdma_machdep.c @@ -0,0 +1,285 @@ +/*- + * Copyright (c) 1997, 1998 Justin T. Gibbs. + * Copyright (c) 2013, 2015 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Konstantin Belousov <kib@FreeBSD.org> + * under sponsorship from the FreeBSD Foundation. + * + * Portions of this software were developed by Semihalf + * under sponsorship of the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification, immediately at the beginning of the file. + * 2. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/malloc.h> +#include <sys/bus.h> +#include <sys/kernel.h> +#include <sys/ktr.h> +#include <sys/lock.h> +#include <sys/memdesc.h> +#include <sys/mutex.h> +#include <sys/uio.h> +#include <vm/vm.h> +#include <vm/vm_extern.h> +#include <vm/pmap.h> + +#include <machine/bus.h> +#include <arm64/include/bus_dma_impl.h> + +/* + * Convenience function for manipulating driver locks from busdma (during + * busdma_swi, for example). Drivers that don't provide their own locks + * should specify &Giant to dmat->lockfuncarg. Drivers that use their own + * non-mutex locking scheme don't have to use this at all. + */ +void +busdma_lock_mutex(void *arg, bus_dma_lock_op_t op) +{ + struct mtx *dmtx; + + dmtx = (struct mtx *)arg; + switch (op) { + case BUS_DMA_LOCK: + mtx_lock(dmtx); + break; + case BUS_DMA_UNLOCK: + mtx_unlock(dmtx); + break; + default: + panic("Unknown operation 0x%x for busdma_lock_mutex!", op); + } +} + +/* + * dflt_lock should never get called. It gets put into the dma tag when + * lockfunc == NULL, which is only valid if the maps that are associated + * with the tag are meant to never be defered. + * XXX Should have a way to identify which driver is responsible here. + */ +void +bus_dma_dflt_lock(void *arg, bus_dma_lock_op_t op) +{ + + panic("driver error: busdma dflt_lock called"); +} + +/* + * Return true if a match is made. + * + * To find a match walk the chain of bus_dma_tag_t's looking for 'paddr'. + * + * If paddr is within the bounds of the dma tag then call the filter callback + * to check for a match, if there is no filter callback then assume a match. + */ +int +bus_dma_run_filter(struct bus_dma_tag_common *tc, bus_addr_t paddr) +{ + int retval; + + retval = 0; + do { + if (((paddr > tc->lowaddr && paddr <= tc->highaddr) || + ((paddr & (tc->alignment - 1)) != 0)) && + (tc->filter == NULL || + (*tc->filter)(tc->filterarg, paddr) != 0)) + retval = 1; + + tc = tc->parent; + } while (retval == 0 && tc != NULL); + return (retval); +} + +int +common_bus_dma_tag_create(struct bus_dma_tag_common *parent, + bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr, + bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg, + bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, + bus_dma_lock_t *lockfunc, void *lockfuncarg, size_t sz, void **dmat) +{ + void *newtag; + struct bus_dma_tag_common *common; + + KASSERT(sz >= sizeof(struct bus_dma_tag_common), ("sz")); + /* Return a NULL tag on failure */ + *dmat = NULL; + /* Basic sanity checking */ + if (boundary != 0 && boundary < maxsegsz) + maxsegsz = boundary; + if (maxsegsz == 0) + return (EINVAL); + + newtag = malloc(sz, M_DEVBUF, M_ZERO | M_NOWAIT); + if (newtag == NULL) { + CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", + __func__, newtag, 0, ENOMEM); + return (ENOMEM); + } + + common = newtag; + common->impl = &bus_dma_bounce_impl; + common->parent = parent; + common->alignment = alignment; + common->boundary = boundary; + common->lowaddr = trunc_page((vm_paddr_t)lowaddr) + (PAGE_SIZE - 1); + common->highaddr = trunc_page((vm_paddr_t)highaddr) + (PAGE_SIZE - 1); + common->filter = filter; + common->filterarg = filterarg; + common->maxsize = maxsize; + common->nsegments = nsegments; + common->maxsegsz = maxsegsz; + common->flags = flags; + common->ref_count = 1; /* Count ourself */ + if (lockfunc != NULL) { + common->lockfunc = lockfunc; + common->lockfuncarg = lockfuncarg; + } else { + common->lockfunc = bus_dma_dflt_lock; + common->lockfuncarg = NULL; + } + + /* Take into account any restrictions imposed by our parent tag */ + if (parent != NULL) { + common->impl = parent->impl; + common->lowaddr = MIN(parent->lowaddr, common->lowaddr); + common->highaddr = MAX(parent->highaddr, common->highaddr); + if (common->boundary == 0) + common->boundary = parent->boundary; + else if (parent->boundary != 0) { + common->boundary = MIN(parent->boundary, + common->boundary); + } + if (common->filter == NULL) { + /* + * Short circuit looking at our parent directly + * since we have encapsulated all of its information + */ + common->filter = parent->filter; + common->filterarg = parent->filterarg; + common->parent = parent->parent; + } + atomic_add_int(&parent->ref_count, 1); + } + *dmat = common; + return (0); +} + +/* + * Allocate a device specific dma_tag. + */ +int +bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, + bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, + bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, + int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, + void *lockfuncarg, bus_dma_tag_t *dmat) +{ + struct bus_dma_tag_common *tc; + int error; + + if (parent == NULL) { + error = bus_dma_bounce_impl.tag_create(parent, alignment, + boundary, lowaddr, highaddr, filter, filterarg, maxsize, + nsegments, maxsegsz, flags, lockfunc, lockfuncarg, dmat); + } else { + tc = (struct bus_dma_tag_common *)parent; + error = tc->impl->tag_create(parent, alignment, + boundary, lowaddr, highaddr, filter, filterarg, maxsize, + nsegments, maxsegsz, flags, lockfunc, lockfuncarg, dmat); + } + return (error); +} + +void +bus_dma_template_init(bus_dma_tag_template_t *t, bus_dma_tag_t parent) +{ + + if (t == NULL) + return; + + t->parent = parent; + t->alignment = 1; + t->boundary = 0; + t->lowaddr = t->highaddr = BUS_SPACE_MAXADDR; + t->maxsize = t->maxsegsize = BUS_SPACE_MAXSIZE; + t->nsegments = BUS_SPACE_UNRESTRICTED; + t->lockfunc = NULL; + t->lockfuncarg = NULL; + t->flags = 0; +} + +int +bus_dma_template_tag(bus_dma_tag_template_t *t, bus_dma_tag_t *dmat) +{ + + if (t == NULL || dmat == NULL) + return (EINVAL); + + return (bus_dma_tag_create(t->parent, t->alignment, t->boundary, + t->lowaddr, t->highaddr, NULL, NULL, t->maxsize, + t->nsegments, t->maxsegsize, t->flags, t->lockfunc, t->lockfuncarg, + dmat)); +} + +void +bus_dma_template_clone(bus_dma_tag_template_t *t, bus_dma_tag_t dmat) +{ + struct bus_dma_tag_common *common; + + if (t == NULL || dmat == NULL) + return; + + common = (struct bus_dma_tag_common *)dmat; + + t->parent = (bus_dma_tag_t)common->parent; + t->alignment = common->alignment; + t->boundary = common->boundary; + t->lowaddr = common->lowaddr; + t->highaddr = common->highaddr; + t->maxsize = common->maxsize; + t->nsegments = common->nsegments; + t->maxsegsize = common->maxsegsz; + t->flags = common->flags; + t->lockfunc = common->lockfunc; + t->lockfuncarg = common->lockfuncarg; +} + +int +bus_dma_tag_destroy(bus_dma_tag_t dmat) +{ + struct bus_dma_tag_common *tc; + + tc = (struct bus_dma_tag_common *)dmat; + return (tc->impl->tag_destroy(dmat)); +} + +int +bus_dma_tag_set_domain(bus_dma_tag_t dmat, int domain) +{ + + return (0); +} diff --git a/sys/arm64/arm64/bzero.S b/sys/arm64/arm64/bzero.S new file mode 100644 index 000000000000..6c7f1fef1494 --- /dev/null +++ b/sys/arm64/arm64/bzero.S @@ -0,0 +1,206 @@ +/*- + * Copyright (C) 2016 Cavium Inc. + * All rights reserved. + * + * Developed by Semihalf. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <machine/asm.h> +__FBSDID("$FreeBSD$"); + + +#include "assym.inc" + + /* + * void bzero(void *p, size_t size) + * + * x0 - p + * x1 - size + */ +ENTRY(bzero) + cbz x1, ending + + /* + * x5 is number of cache lines to zero - calculated later and + * will become non-zero if buffer is long enough to zero by + * cache lines (and if it is allowed.) + * We need to zero it before proceeding with buffers of size + * smaller than 16 bytes - otherwise the x5 will not be + * calculated and will retain random value. + * "normal" is used for buffers <= 16 bytes and to align buffer + * to cache line for buffers bigger than cache line; non-0 x5 + * after "normal" has completed indicates that it has been used + * to align buffer to cache line and now zero by cache lines will + * be performed, and x5 is amount of cache lines to loop through. + */ + mov x5, xzr + + /* No use of cache assisted zero for buffers with size <= 16 */ + cmp x1, #0x10 + b.le normal + + /* + * Load size of line that will be cleaned by dc zva call. + * 0 means that the instruction is not allowed + */ + ldr x7, =dczva_line_size + ldr x7, [x7] + cbz x7, normal + + /* + * Buffer must be larger than cache line for using cache zeroing + * (and cache line aligned but this is checked after jump) + */ + cmp x1, x7 + b.lt normal + + /* + * Calculate number of bytes to cache aligned address (x4) nad + * number of full cache lines (x5). x6 is final address to zero. + */ + sub x2, x7, #0x01 + mov x3, -1 + eor x3, x3, x2 + add x4, x0, x2 + and x4, x4, x3 + subs x4, x4, x0 + b.eq normal + + /* Calculate number of "lines" in buffer */ + sub x5, x1, x4 + rbit x2, x7 + clz x2, x2 + lsr x5, x5, x2 + + /* + * If number of cache lines is 0, we will not be able to zero + * by cache lines, so go normal way. + */ + cbz x5, normal + /* x6 is final address to zero */ + add x6, x0, x1 + + /* + * We are here because x5 is non-0 so normal will be used to + * align buffer before cache zeroing. x4 holds number of bytes + * needed for alignment. + */ + mov x1, x4 + + /* When jumping here: x0 holds pointer, x1 holds size */ +normal: + /* + * Get buffer offset into 16 byte aligned address; 0 means pointer + * is aligned. + */ + ands x2, x0, #0x0f + b.eq aligned_to_16 + /* Calculate one-byte loop runs to 8 byte aligned address. */ + ands x2, x2, #0x07 + mov x3, #0x08 + sub x2, x3, x2 + /* x2 is number of bytes missing for alignment, x1 is buffer size */ + cmp x1, x2 + csel x2, x1, x2, le + sub x1, x1, x2 + + /* + * Byte by byte copy will copy at least enough bytes to align + * pointer and at most "size". + */ +align: + strb wzr, [x0], #0x01 + subs x2, x2, #0x01 + b.ne align + + /* Now pointer is aligned to 8 bytes */ + cmp x1, #0x10 + b.lt lead_out + /* + * Check if copy of another 8 bytes is needed to align to 16 byte + * address and do it + */ + tbz x0, #0x03, aligned_to_16 + str xzr, [x0], #0x08 + sub x1, x1, #0x08 + + /* While jumping here: x0 is 16 byte alligned address, x1 is size */ +aligned_to_16: + /* If size is less than 16 bytes, use lead_out to copy what remains */ + cmp x1, #0x10 + b.lt lead_out + + lsr x2, x1, #0x04 +zero_by_16: + stp xzr, xzr, [x0], #0x10 + subs x2, x2, #0x01 + b.ne zero_by_16 + + /* + * Lead out requires addresses to be aligned to 8 bytes. It is used to + * zero buffers with sizes < 16 and what can not be zeroed by + * zero_by_16 loop. + */ + ands x1, x1, #0x0f + b.eq lead_out_end +lead_out: + tbz x1, #0x03, lead_out_dword + str xzr, [x0], #0x08 +lead_out_dword: + tbz x1, #0x02, lead_out_word + str wzr, [x0], #0x04 +lead_out_word: + tbz x1, #0x01, lead_out_byte + strh wzr, [x0], #0x02 +lead_out_byte: + tbz x1, #0x00, lead_out_end + strb wzr, [x0], #0x01 + +lead_out_end: + /* + * If x5 is non-zero, this means that normal has been used as + * a lead in to align buffer address to cache size + */ + cbz x5, ending + + /* + * Here x5 holds number of lines to zero; x6 is final address of + * buffer. x0 is cache line aligned pointer. x7 is cache line size + * in bytes + */ +cache_line_zero: + dc zva, x0 + add x0, x0, x7 + subs x5, x5, #0x01 + b.ne cache_line_zero + + /* Need to zero remaining bytes? */ + subs x1, x6, x0 + b.ne normal + +ending: + ret + +END(bzero) + diff --git a/sys/arm64/arm64/clock.c b/sys/arm64/arm64/clock.c new file mode 100644 index 000000000000..ef68ea4d7e7b --- /dev/null +++ b/sys/arm64/arm64/clock.c @@ -0,0 +1,39 @@ +/*- + * Copyright (c) 2014 Andrew Turner + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/types.h> +#include <sys/systm.h> + +void +cpu_initclocks(void) +{ + + cpu_initclocks_bsp(); +} diff --git a/sys/arm64/arm64/copyinout.S b/sys/arm64/arm64/copyinout.S new file mode 100644 index 000000000000..5c523d11ed00 --- /dev/null +++ b/sys/arm64/arm64/copyinout.S @@ -0,0 +1,226 @@ +/*- + * Copyright (c) 2015 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Andrew Turner under + * sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include <machine/asm.h> +__FBSDID("$FreeBSD$"); + +#include <sys/errno.h> + +#include <machine/vmparam.h> + +#include "assym.inc" + +/* + * Fault handler for the copy{in,out} functions below. + */ +ENTRY(copyio_fault) + SET_FAULT_HANDLER(xzr, x1) /* Clear the handler */ + EXIT_USER_ACCESS_CHECK(w0, x1) +copyio_fault_nopcb: + mov x0, #EFAULT + ret +END(copyio_fault) + +/* + * Copies from a kernel to user address + * + * int copyout(const void *kaddr, void *udaddr, size_t len) + */ +ENTRY(copyout) + cbz x2, 1f + adds x3, x1, x2 + b.cs copyio_fault_nopcb + ldr x4, =VM_MAXUSER_ADDRESS + cmp x3, x4 + b.hi copyio_fault_nopcb + + b copycommon + +1: mov x0, xzr /* return 0 */ + ret + +END(copyout) + +/* + * Copies from a user to kernel address + * + * int copyin(const void *uaddr, void *kdaddr, size_t len) + */ +ENTRY(copyin) + cbz x2, 1f + adds x3, x0, x2 + b.cs copyio_fault_nopcb + ldr x4, =VM_MAXUSER_ADDRESS + cmp x3, x4 + b.hi copyio_fault_nopcb + + b copycommon + +1: mov x0, xzr /* return 0 */ + ret + +END(copyin) + +/* + * Copies a string from a user to kernel address + * + * int copyinstr(const void *udaddr, void *kaddr, size_t len, size_t *done) + */ +ENTRY(copyinstr) + mov x5, xzr /* count = 0 */ + mov w4, #1 /* If zero return faulure */ + cbz x2, 3f /* If len == 0 then skip loop */ + + adr x6, copyio_fault /* Get the handler address */ + SET_FAULT_HANDLER(x6, x7) /* Set the handler */ + ENTER_USER_ACCESS(w6, x7) + + ldr x7, =VM_MAXUSER_ADDRESS +1: cmp x0, x7 + b.cs copyio_fault + ldtrb w4, [x0] /* Load from uaddr */ + add x0, x0, #1 /* Next char */ + strb w4, [x1], #1 /* Store in kaddr */ + add x5, x5, #1 /* count++ */ + cbz w4, 2f /* Break when NUL-terminated */ + sub x2, x2, #1 /* len-- */ + cbnz x2, 1b + +2: EXIT_USER_ACCESS(w6) + SET_FAULT_HANDLER(xzr, x7) /* Clear the handler */ + + +3: cbz x3, 4f /* Check if done != NULL */ + str x5, [x3] /* done = count */ + +4: mov w1, #ENAMETOOLONG /* Load ENAMETOOLONG to return if failed */ + cmp w4, #0 /* Check if we saved the NUL-terminator */ + csel w0, wzr, w1, eq /* If so return success, else failure */ + ret +END(copyinstr) + +/* + * Local helper + * + * x0 - src pointer + * x1 - dst pointer + * x2 - size + * lr - the return address, so jump here instead of calling + * + * This function is optimized to minimize concurrent memory accesses. In + * present form it is suited for cores with a single memory prefetching + * unit. + * ARM64TODO: + * Consider using separate functions for each ARM64 core. Adding memory + * access interleaving might increase a total throughput on A57 or A72. + */ + .text + .align 4 + .local copycommon + .type copycommon,@function + +copycommon: + adr x6, copyio_fault /* Get the handler address */ + SET_FAULT_HANDLER(x6, x7) /* Set the handler */ + ENTER_USER_ACCESS(w6, x7) + + /* Check alignment */ + orr x3, x0, x1 + ands x3, x3, 0x07 + b.eq aligned + + /* Unaligned is byte by byte copy */ +byte_by_byte: + ldrb w3, [x0], #0x01 + strb w3, [x1], #0x01 + subs x2, x2, #0x01 + b.ne byte_by_byte + b ending + +aligned: + cmp x2, #0x10 + b.lt lead_out + cmp x2, #0x40 + b.lt by_dwords_start + + /* Block copy */ + lsr x15, x2, #0x06 +by_blocks: + ldp x3, x4, [x0], #0x10 + ldp x5, x6, [x0], #0x10 + ldp x7, x8, [x0], #0x10 + ldp x9, x10, [x0], #0x10 + stp x3, x4, [x1], #0x10 + stp x5, x6, [x1], #0x10 + stp x7, x8, [x1], #0x10 + stp x9, x10, [x1], #0x10 + + subs x15, x15, #0x01 + b.ne by_blocks + + and x2, x2, #0x3f + +by_dwords_start: + lsr x15, x2, #0x04 + cbz x15, lead_out +by_dwords: + ldp x3, x4, [x0], #0x10 + stp x3, x4, [x1], #0x10 + subs x15, x15, #0x01 + b.ne by_dwords + + /* Less than 16 bytes to copy */ +lead_out: + tbz x2, #0x03, last_word + ldr x3, [x0], #0x08 + str x3, [x1], #0x08 + +last_word: + tbz x2, #0x02, last_hword + ldr w3, [x0], #0x04 + str w3, [x1], #0x04 + +last_hword: + tbz x2, #0x01, last_byte + ldrh w3, [x0], #0x02 + strh w3, [x1], #0x02 + +last_byte: + tbz x2, #0x00, ending + ldrb w3, [x0] + strb w3, [x1] + +ending: + EXIT_USER_ACCESS_CHECK(w6, x7) + SET_FAULT_HANDLER(xzr, x7) /* Clear the handler */ + + mov x0, xzr /* return 0 */ + ret + .size copycommon, . - copycommon diff --git a/sys/arm64/arm64/cpu_errata.c b/sys/arm64/arm64/cpu_errata.c new file mode 100644 index 000000000000..9879e645b827 --- /dev/null +++ b/sys/arm64/arm64/cpu_errata.c @@ -0,0 +1,192 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2018 Andrew Turner + * All rights reserved. + * + * This software was developed by SRI International and the University of + * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-10-C-0237 + * ("CTSRD"), as part of the DARPA CRASH research programme. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "opt_platform.h" + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/pcpu.h> +#include <sys/systm.h> + +#include <machine/cpu.h> + +#include <dev/psci/smccc.h> + +typedef void (cpu_quirk_install)(void); +struct cpu_quirks { + cpu_quirk_install *quirk_install; + u_int midr_mask; + u_int midr_value; +}; + +static enum { + SSBD_FORCE_ON, + SSBD_FORCE_OFF, + SSBD_KERNEL, +} ssbd_method = SSBD_KERNEL; + +static cpu_quirk_install install_psci_bp_hardening; +static cpu_quirk_install install_ssbd_workaround; +static cpu_quirk_install install_thunderx_bcast_tlbi_workaround; + +static struct cpu_quirks cpu_quirks[] = { + { + .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK, + .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A57,0,0), + .quirk_install = install_psci_bp_hardening, + }, + { + .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK, + .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A72,0,0), + .quirk_install = install_psci_bp_hardening, + }, + { + .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK, + .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A73,0,0), + .quirk_install = install_psci_bp_hardening, + }, + { + .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK, + .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A75,0,0), + .quirk_install = install_psci_bp_hardening, + }, + { + .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK, + .midr_value = + CPU_ID_RAW(CPU_IMPL_CAVIUM, CPU_PART_THUNDERX2, 0,0), + .quirk_install = install_psci_bp_hardening, + }, + { + .midr_mask = 0, + .midr_value = 0, + .quirk_install = install_ssbd_workaround, + }, + { + .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK, + .midr_value = + CPU_ID_RAW(CPU_IMPL_CAVIUM, CPU_PART_THUNDERX, 0, 0), + .quirk_install = install_thunderx_bcast_tlbi_workaround, + }, + { + .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK, + .midr_value = + CPU_ID_RAW(CPU_IMPL_CAVIUM, CPU_PART_THUNDERX_81XX, 0, 0), + .quirk_install = install_thunderx_bcast_tlbi_workaround, + }, +}; + +static void +install_psci_bp_hardening(void) +{ + + if (smccc_arch_features(SMCCC_ARCH_WORKAROUND_1) != SMCCC_RET_SUCCESS) + return; + + PCPU_SET(bp_harden, smccc_arch_workaround_1); +} + +static void +install_ssbd_workaround(void) +{ + char *env; + + if (PCPU_GET(cpuid) == 0) { + env = kern_getenv("kern.cfg.ssbd"); + if (env != NULL) { + if (strcmp(env, "force-on") == 0) { + ssbd_method = SSBD_FORCE_ON; + } else if (strcmp(env, "force-off") == 0) { + ssbd_method = SSBD_FORCE_OFF; + } + } + } + + /* Enable the workaround on this CPU if it's enabled in the firmware */ + if (smccc_arch_features(SMCCC_ARCH_WORKAROUND_2) != SMCCC_RET_SUCCESS) + return; + + switch(ssbd_method) { + case SSBD_FORCE_ON: + smccc_arch_workaround_2(1); + break; + case SSBD_FORCE_OFF: + smccc_arch_workaround_2(0); + break; + case SSBD_KERNEL: + default: + PCPU_SET(ssbd, smccc_arch_workaround_2); + break; + } +} + +/* + * Workaround Cavium erratum 27456. + * + * Invalidate the local icache when changing address spaces. + */ +static void +install_thunderx_bcast_tlbi_workaround(void) +{ + u_int midr; + + midr = get_midr(); + if (CPU_PART(midr) == CPU_PART_THUNDERX_81XX) + PCPU_SET(bcast_tlbi_workaround, 1); + else if (CPU_PART(midr) == CPU_PART_THUNDERX) { + if (CPU_VAR(midr) == 0) { + /* ThunderX 1.x */ + PCPU_SET(bcast_tlbi_workaround, 1); + } else if (CPU_VAR(midr) == 1 && CPU_REV(midr) <= 1) { + /* ThunderX 2.0 - 2.1 */ + PCPU_SET(bcast_tlbi_workaround, 1); + } + } +} + +void +install_cpu_errata(void) +{ + u_int midr; + size_t i; + + midr = get_midr(); + + for (i = 0; i < nitems(cpu_quirks); i++) { + if ((midr & cpu_quirks[i].midr_mask) == + cpu_quirks[i].midr_value) { + cpu_quirks[i].quirk_install(); + } + } +} diff --git a/sys/arm64/arm64/cpufunc_asm.S b/sys/arm64/arm64/cpufunc_asm.S new file mode 100644 index 000000000000..2f28c4f68271 --- /dev/null +++ b/sys/arm64/arm64/cpufunc_asm.S @@ -0,0 +1,182 @@ +/*- + * Copyright (c) 2014 Robin Randhawa + * Copyright (c) 2015 The FreeBSD Foundation + * All rights reserved. + * + * Portions of this software were developed by Andrew Turner + * under sponsorship from the FreeBSD Foundation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include <sys/errno.h> +#include <machine/asm.h> +#include <machine/param.h> + +#include "assym.inc" + +__FBSDID("$FreeBSD$"); + +/* + * FIXME: + * Need big.LITTLE awareness at some point. + * Using arm64_p[id]cache_line_size may not be the best option. + * Need better SMP awareness. + */ + .text + .align 2 + +.Lpage_mask: + .word PAGE_MASK + +/* + * Macro to handle the cache. This takes the start address in x0, length + * in x1. It will corrupt x0, x1, x2, x3, and x4. + */ +.macro cache_handle_range dcop = 0, ic = 0, icop = 0 +.if \ic == 0 + ldr x3, =dcache_line_size /* Load the D cache line size */ +.else + ldr x3, =idcache_line_size /* Load the I & D cache line size */ +.endif + ldr x3, [x3] + sub x4, x3, #1 /* Get the address mask */ + and x2, x0, x4 /* Get the low bits of the address */ + add x1, x1, x2 /* Add these to the size */ + bic x0, x0, x4 /* Clear the low bit of the address */ +.if \ic != 0 + mov x2, x0 /* Save the address */ + mov x4, x1 /* Save the size */ +.endif +1: + dc \dcop, x0 + add x0, x0, x3 /* Move to the next line */ + subs x1, x1, x3 /* Reduce the size */ + b.hi 1b /* Check if we are done */ + dsb ish +.if \ic != 0 +2: + ic \icop, x2 + add x2, x2, x3 /* Move to the next line */ + subs x4, x4, x3 /* Reduce the size */ + b.hi 2b /* Check if we are done */ + dsb ish + isb +.endif +.endm + +ENTRY(arm64_nullop) + ret +END(arm64_nullop) + +/* + * Generic functions to read/modify/write the internal coprocessor registers + */ + +ENTRY(arm64_tlb_flushID) + dsb ishst +#ifdef SMP + tlbi vmalle1is +#else + tlbi vmalle1 +#endif + dsb ish + isb + ret +END(arm64_tlb_flushID) + +/* + * void arm64_dcache_wb_range(vm_offset_t, vm_size_t) + */ +ENTRY(arm64_dcache_wb_range) + cache_handle_range dcop = cvac + ret +END(arm64_dcache_wb_range) + +/* + * void arm64_dcache_wbinv_range(vm_offset_t, vm_size_t) + */ +ENTRY(arm64_dcache_wbinv_range) + cache_handle_range dcop = civac + ret +END(arm64_dcache_wbinv_range) + +/* + * void arm64_dcache_inv_range(vm_offset_t, vm_size_t) + * + * Note, we must not invalidate everything. If the range is too big we + * must use wb-inv of the entire cache. + */ +ENTRY(arm64_dcache_inv_range) + cache_handle_range dcop = ivac + ret +END(arm64_dcache_inv_range) + +/* + * void arm64_dic_idc_icache_sync_range(vm_offset_t, vm_size_t) + * When the CTR_EL0.IDC bit is set cleaning to PoU becomes a dsb. + * When the CTR_EL0.DIC bit is set icache invalidation becomes an isb. + */ +ENTRY(arm64_dic_idc_icache_sync_range) + dsb ishst + isb + ret +END(arm64_dic_idc_icache_sync_range) + +/* + * void arm64_aliasing_icache_sync_range(vm_offset_t, vm_size_t) + */ +ENTRY(arm64_aliasing_icache_sync_range) + /* + * XXX Temporary solution - I-cache flush should be range based for + * PIPT cache or IALLUIS for VIVT or VIPT caches + */ +/* cache_handle_range dcop = cvau, ic = 1, icop = ivau */ + cache_handle_range dcop = cvau + ic ialluis + dsb ish + isb + ret +END(arm64_aliasing_icache_sync_range) + +/* + * int arm64_icache_sync_range_checked(vm_offset_t, vm_size_t) + */ +ENTRY(arm64_icache_sync_range_checked) + adr x5, cache_maint_fault + SET_FAULT_HANDLER(x5, x6) + /* XXX: See comment in arm64_icache_sync_range */ + cache_handle_range dcop = cvau + ic ialluis + dsb ish + isb + SET_FAULT_HANDLER(xzr, x6) + mov x0, #0 + ret +END(arm64_icache_sync_range_checked) + +ENTRY(cache_maint_fault) + SET_FAULT_HANDLER(xzr, x1) + mov x0, #EFAULT + ret +END(cache_maint_fault) diff --git a/sys/arm64/arm64/db_disasm.c b/sys/arm64/arm64/db_disasm.c new file mode 100644 index 000000000000..73efca0bdee9 --- /dev/null +++ b/sys/arm64/arm64/db_disasm.c @@ -0,0 +1,70 @@ +/*- + * Copyright (c) 2015 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Semihalf under + * the sponsorship of the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); +#include <sys/param.h> +#include <ddb/ddb.h> +#include <ddb/db_access.h> +#include <ddb/db_sym.h> + +#include <machine/disassem.h> + +static u_int db_disasm_read_word(vm_offset_t); +static void db_disasm_printaddr(vm_offset_t); + +/* Glue code to interface db_disasm to the generic ARM disassembler */ +static const struct disasm_interface db_disasm_interface = { + .di_readword = db_disasm_read_word, + .di_printaddr = db_disasm_printaddr, + .di_printf = db_printf, +}; + +static u_int +db_disasm_read_word(vm_offset_t address) +{ + + return (db_get_value(address, INSN_SIZE, 0)); +} + +static void +db_disasm_printaddr(vm_offset_t address) +{ + + db_printsym((db_addr_t)address, DB_STGY_ANY); +} + +vm_offset_t +db_disasm(vm_offset_t loc, bool altfmt) +{ + + return (disasm(&db_disasm_interface, loc, altfmt)); +} + +/* End of db_disasm.c */ diff --git a/sys/arm64/arm64/db_interface.c b/sys/arm64/arm64/db_interface.c new file mode 100644 index 000000000000..5138bf3f1cab --- /dev/null +++ b/sys/arm64/arm64/db_interface.c @@ -0,0 +1,194 @@ +/*- + * Copyright (c) 2015 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Semihalf under + * the sponsorship of the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); +#include <sys/param.h> +#include <sys/proc.h> +#include <vm/vm.h> +#include <vm/pmap.h> +#include <vm/vm_map.h> + +#ifdef KDB +#include <sys/kdb.h> +#endif + +#include <ddb/ddb.h> +#include <ddb/db_variables.h> + +#include <machine/cpu.h> +#include <machine/pcb.h> +#include <machine/stack.h> +#include <machine/vmparam.h> + +static int +db_frame(struct db_variable *vp, db_expr_t *valuep, int op) +{ + long *reg; + + if (kdb_frame == NULL) + return (0); + + reg = (long *)((uintptr_t)kdb_frame + (db_expr_t)vp->valuep); + if (op == DB_VAR_GET) + *valuep = *reg; + else + *reg = *valuep; + return (1); +} + +#define DB_OFFSET(x) (db_expr_t *)offsetof(struct trapframe, x) +struct db_variable db_regs[] = { + { "spsr", DB_OFFSET(tf_spsr), db_frame }, + { "x0", DB_OFFSET(tf_x[0]), db_frame }, + { "x1", DB_OFFSET(tf_x[1]), db_frame }, + { "x2", DB_OFFSET(tf_x[2]), db_frame }, + { "x3", DB_OFFSET(tf_x[3]), db_frame }, + { "x4", DB_OFFSET(tf_x[4]), db_frame }, + { "x5", DB_OFFSET(tf_x[5]), db_frame }, + { "x6", DB_OFFSET(tf_x[6]), db_frame }, + { "x7", DB_OFFSET(tf_x[7]), db_frame }, + { "x8", DB_OFFSET(tf_x[8]), db_frame }, + { "x9", DB_OFFSET(tf_x[9]), db_frame }, + { "x10", DB_OFFSET(tf_x[10]), db_frame }, + { "x11", DB_OFFSET(tf_x[11]), db_frame }, + { "x12", DB_OFFSET(tf_x[12]), db_frame }, + { "x13", DB_OFFSET(tf_x[13]), db_frame }, + { "x14", DB_OFFSET(tf_x[14]), db_frame }, + { "x15", DB_OFFSET(tf_x[15]), db_frame }, + { "x16", DB_OFFSET(tf_x[16]), db_frame }, + { "x17", DB_OFFSET(tf_x[17]), db_frame }, + { "x18", DB_OFFSET(tf_x[18]), db_frame }, + { "x19", DB_OFFSET(tf_x[19]), db_frame }, + { "x20", DB_OFFSET(tf_x[20]), db_frame }, + { "x21", DB_OFFSET(tf_x[21]), db_frame }, + { "x22", DB_OFFSET(tf_x[22]), db_frame }, + { "x23", DB_OFFSET(tf_x[23]), db_frame }, + { "x24", DB_OFFSET(tf_x[24]), db_frame }, + { "x25", DB_OFFSET(tf_x[25]), db_frame }, + { "x26", DB_OFFSET(tf_x[26]), db_frame }, + { "x27", DB_OFFSET(tf_x[27]), db_frame }, + { "x28", DB_OFFSET(tf_x[28]), db_frame }, + { "x29", DB_OFFSET(tf_x[29]), db_frame }, + { "lr", DB_OFFSET(tf_lr), db_frame }, + { "elr", DB_OFFSET(tf_elr), db_frame }, + { "sp", DB_OFFSET(tf_sp), db_frame }, +}; + +struct db_variable *db_eregs = db_regs + nitems(db_regs); + +void +db_show_mdpcpu(struct pcpu *pc) +{ +} + +/* + * Read bytes from kernel address space for debugger. + */ +int +db_read_bytes(vm_offset_t addr, size_t size, char *data) +{ + jmp_buf jb; + void *prev_jb; + const char *src; + int ret; + uint64_t tmp64; + uint32_t tmp32; + uint16_t tmp16; + + prev_jb = kdb_jmpbuf(jb); + ret = setjmp(jb); + + if (ret == 0) { + src = (const char *)addr; + if (size == 8 && (addr & 7) == 0) { + tmp64 = *((const int *)src); + src = (const char *)&tmp64; + } else if (size == 4 && (addr & 3) == 0) { + tmp32 = *((const int *)src); + src = (const char *)&tmp32; + } else if (size == 2 && (addr & 1) == 0) { + tmp16 = *((const short *)src); + src = (const char *)&tmp16; + } + while (size-- > 0) + *data++ = *src++; + } + (void)kdb_jmpbuf(prev_jb); + + return (ret); +} + +/* + * Write bytes to kernel address space for debugger. + */ +int +db_write_bytes(vm_offset_t addr, size_t size, char *data) +{ + jmp_buf jb; + void *prev_jb; + char *dst; + int ret; + uint64_t tmp64; + uint32_t tmp32; + uint16_t tmp16; + + prev_jb = kdb_jmpbuf(jb); + ret = setjmp(jb); + if (ret == 0) { + if (size == 8 && (addr & 7) == 0) { + dst = (char *)&tmp64; + while (size-- > 0) + *dst++ = *data++; + *((uint64_t *)addr) = tmp64; + } else if (size == 4 && (addr & 3) == 0) { + dst = (char *)&tmp32; + while (size-- > 0) + *dst++ = *data++; + *((uint32_t *)addr) = tmp32; + } else if (size == 2 && (addr & 1) == 0) { + dst = (char *)&tmp16; + while (size-- > 0) + *dst++ = *data++; + *((uint32_t *)addr) = tmp16; + } else { + dst = (char *)addr; + while (size-- > 0) + *dst++ = *data++; + } + dsb(ish); + + /* Clean D-cache and invalidate I-cache */ + cpu_dcache_wb_range(addr, (vm_size_t)size); + cpu_icache_sync_range(addr, (vm_size_t)size); + } + (void)kdb_jmpbuf(prev_jb); + + return (ret); +} diff --git a/sys/arm64/arm64/db_trace.c b/sys/arm64/arm64/db_trace.c new file mode 100644 index 000000000000..f892935cd13a --- /dev/null +++ b/sys/arm64/arm64/db_trace.c @@ -0,0 +1,133 @@ +/*- + * Copyright (c) 2015 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Semihalf under + * the sponsorship of the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "opt_ddb.h" + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); +#include <sys/param.h> +#include <sys/proc.h> +#include <sys/kdb.h> + +#include <machine/pcb.h> +#include <ddb/ddb.h> +#include <ddb/db_sym.h> + +#include <machine/armreg.h> +#include <machine/debug_monitor.h> +#include <machine/stack.h> + +void +db_md_list_watchpoints() +{ + + dbg_show_watchpoint(); +} + +int +db_md_clr_watchpoint(db_expr_t addr, db_expr_t size) +{ + + return (dbg_remove_watchpoint(NULL, addr, size)); +} + +int +db_md_set_watchpoint(db_expr_t addr, db_expr_t size) +{ + + return (dbg_setup_watchpoint(NULL, addr, size, HW_BREAKPOINT_RW)); +} + +static void +db_stack_trace_cmd(struct unwind_state *frame) +{ + c_db_sym_t sym; + const char *name; + db_expr_t value; + db_expr_t offset; + + while (1) { + uint64_t pc = frame->pc; + int ret; + + ret = unwind_frame(frame); + if (ret < 0) + break; + + sym = db_search_symbol(pc, DB_STGY_ANY, &offset); + if (sym == C_DB_SYM_NULL) { + value = 0; + name = "(null)"; + } else + db_symbol_values(sym, &name, &value); + + db_printf("%s() at ", name); + db_printsym(frame->pc, DB_STGY_PROC); + db_printf("\n"); + + db_printf("\t pc = 0x%016lx lr = 0x%016lx\n", pc, + frame->pc); + db_printf("\t sp = 0x%016lx fp = 0x%016lx\n", frame->sp, + frame->fp); + /* TODO: Show some more registers */ + db_printf("\n"); + } +} + +int +db_trace_thread(struct thread *thr, int count) +{ + struct unwind_state frame; + struct pcb *ctx; + + if (thr != curthread) { + ctx = kdb_thr_ctx(thr); + + frame.sp = (uint64_t)ctx->pcb_sp; + frame.fp = (uint64_t)ctx->pcb_x[29]; + frame.pc = (uint64_t)ctx->pcb_x[30]; + db_stack_trace_cmd(&frame); + } else + db_trace_self(); + return (0); +} + +void +db_trace_self(void) +{ + struct unwind_state frame; + uint64_t sp; + + __asm __volatile("mov %0, sp" : "=&r" (sp)); + + frame.sp = sp; + frame.fp = (uint64_t)__builtin_frame_address(0); + frame.pc = (uint64_t)db_trace_self; + db_stack_trace_cmd(&frame); +} diff --git a/sys/arm64/arm64/debug_monitor.c b/sys/arm64/arm64/debug_monitor.c new file mode 100644 index 000000000000..dcb3645cf5d4 --- /dev/null +++ b/sys/arm64/arm64/debug_monitor.c @@ -0,0 +1,565 @@ +/*- + * Copyright (c) 2014 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Semihalf under + * the sponsorship of the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "opt_ddb.h" + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/types.h> +#include <sys/kdb.h> +#include <sys/pcpu.h> +#include <sys/proc.h> +#include <sys/systm.h> +#include <sys/sysent.h> + +#include <machine/armreg.h> +#include <machine/cpu.h> +#include <machine/debug_monitor.h> +#include <machine/kdb.h> + +#ifdef DDB +#include <ddb/ddb.h> +#include <ddb/db_sym.h> +#endif + +enum dbg_t { + DBG_TYPE_BREAKPOINT = 0, + DBG_TYPE_WATCHPOINT = 1, +}; + +static int dbg_watchpoint_num; +static int dbg_breakpoint_num; +static struct debug_monitor_state kernel_monitor = { + .dbg_flags = DBGMON_KERNEL +}; + +/* Called from the exception handlers */ +void dbg_monitor_enter(struct thread *); +void dbg_monitor_exit(struct thread *, struct trapframe *); + +/* Watchpoints/breakpoints control register bitfields */ +#define DBG_WATCH_CTRL_LEN_1 (0x1 << 5) +#define DBG_WATCH_CTRL_LEN_2 (0x3 << 5) +#define DBG_WATCH_CTRL_LEN_4 (0xf << 5) +#define DBG_WATCH_CTRL_LEN_8 (0xff << 5) +#define DBG_WATCH_CTRL_LEN_MASK(x) ((x) & (0xff << 5)) +#define DBG_WATCH_CTRL_EXEC (0x0 << 3) +#define DBG_WATCH_CTRL_LOAD (0x1 << 3) +#define DBG_WATCH_CTRL_STORE (0x2 << 3) +#define DBG_WATCH_CTRL_ACCESS_MASK(x) ((x) & (0x3 << 3)) + +/* Common for breakpoint and watchpoint */ +#define DBG_WB_CTRL_EL1 (0x1 << 1) +#define DBG_WB_CTRL_EL0 (0x2 << 1) +#define DBG_WB_CTRL_ELX_MASK(x) ((x) & (0x3 << 1)) +#define DBG_WB_CTRL_E (0x1 << 0) + +#define DBG_REG_BASE_BVR 0 +#define DBG_REG_BASE_BCR (DBG_REG_BASE_BVR + 16) +#define DBG_REG_BASE_WVR (DBG_REG_BASE_BCR + 16) +#define DBG_REG_BASE_WCR (DBG_REG_BASE_WVR + 16) + +/* Watchpoint/breakpoint helpers */ +#define DBG_WB_WVR "wvr" +#define DBG_WB_WCR "wcr" +#define DBG_WB_BVR "bvr" +#define DBG_WB_BCR "bcr" + +#define DBG_WB_READ(reg, num, val) do { \ + __asm __volatile("mrs %0, dbg" reg #num "_el1" : "=r" (val)); \ +} while (0) + +#define DBG_WB_WRITE(reg, num, val) do { \ + __asm __volatile("msr dbg" reg #num "_el1, %0" :: "r" (val)); \ +} while (0) + +#define READ_WB_REG_CASE(reg, num, offset, val) \ + case (num + offset): \ + DBG_WB_READ(reg, num, val); \ + break + +#define WRITE_WB_REG_CASE(reg, num, offset, val) \ + case (num + offset): \ + DBG_WB_WRITE(reg, num, val); \ + break + +#define SWITCH_CASES_READ_WB_REG(reg, offset, val) \ + READ_WB_REG_CASE(reg, 0, offset, val); \ + READ_WB_REG_CASE(reg, 1, offset, val); \ + READ_WB_REG_CASE(reg, 2, offset, val); \ + READ_WB_REG_CASE(reg, 3, offset, val); \ + READ_WB_REG_CASE(reg, 4, offset, val); \ + READ_WB_REG_CASE(reg, 5, offset, val); \ + READ_WB_REG_CASE(reg, 6, offset, val); \ + READ_WB_REG_CASE(reg, 7, offset, val); \ + READ_WB_REG_CASE(reg, 8, offset, val); \ + READ_WB_REG_CASE(reg, 9, offset, val); \ + READ_WB_REG_CASE(reg, 10, offset, val); \ + READ_WB_REG_CASE(reg, 11, offset, val); \ + READ_WB_REG_CASE(reg, 12, offset, val); \ + READ_WB_REG_CASE(reg, 13, offset, val); \ + READ_WB_REG_CASE(reg, 14, offset, val); \ + READ_WB_REG_CASE(reg, 15, offset, val) + +#define SWITCH_CASES_WRITE_WB_REG(reg, offset, val) \ + WRITE_WB_REG_CASE(reg, 0, offset, val); \ + WRITE_WB_REG_CASE(reg, 1, offset, val); \ + WRITE_WB_REG_CASE(reg, 2, offset, val); \ + WRITE_WB_REG_CASE(reg, 3, offset, val); \ + WRITE_WB_REG_CASE(reg, 4, offset, val); \ + WRITE_WB_REG_CASE(reg, 5, offset, val); \ + WRITE_WB_REG_CASE(reg, 6, offset, val); \ + WRITE_WB_REG_CASE(reg, 7, offset, val); \ + WRITE_WB_REG_CASE(reg, 8, offset, val); \ + WRITE_WB_REG_CASE(reg, 9, offset, val); \ + WRITE_WB_REG_CASE(reg, 10, offset, val); \ + WRITE_WB_REG_CASE(reg, 11, offset, val); \ + WRITE_WB_REG_CASE(reg, 12, offset, val); \ + WRITE_WB_REG_CASE(reg, 13, offset, val); \ + WRITE_WB_REG_CASE(reg, 14, offset, val); \ + WRITE_WB_REG_CASE(reg, 15, offset, val) + +#ifdef DDB +static uint64_t +dbg_wb_read_reg(int reg, int n) +{ + uint64_t val = 0; + + switch (reg + n) { + SWITCH_CASES_READ_WB_REG(DBG_WB_WVR, DBG_REG_BASE_WVR, val); + SWITCH_CASES_READ_WB_REG(DBG_WB_WCR, DBG_REG_BASE_WCR, val); + SWITCH_CASES_READ_WB_REG(DBG_WB_BVR, DBG_REG_BASE_BVR, val); + SWITCH_CASES_READ_WB_REG(DBG_WB_BCR, DBG_REG_BASE_BCR, val); + default: + printf("trying to read from wrong debug register %d\n", n); + } + + return val; +} +#endif /* DDB */ + +static void +dbg_wb_write_reg(int reg, int n, uint64_t val) +{ + switch (reg + n) { + SWITCH_CASES_WRITE_WB_REG(DBG_WB_WVR, DBG_REG_BASE_WVR, val); + SWITCH_CASES_WRITE_WB_REG(DBG_WB_WCR, DBG_REG_BASE_WCR, val); + SWITCH_CASES_WRITE_WB_REG(DBG_WB_BVR, DBG_REG_BASE_BVR, val); + SWITCH_CASES_WRITE_WB_REG(DBG_WB_BCR, DBG_REG_BASE_BCR, val); + default: + printf("trying to write to wrong debug register %d\n", n); + return; + } + isb(); +} + +#ifdef DDB +void +kdb_cpu_set_singlestep(void) +{ + + kdb_frame->tf_spsr |= DBG_SPSR_SS; + WRITE_SPECIALREG(mdscr_el1, READ_SPECIALREG(mdscr_el1) | + DBG_MDSCR_SS | DBG_MDSCR_KDE); + + /* + * Disable breakpoints and watchpoints, e.g. stepping + * over watched instruction will trigger break exception instead of + * single-step exception and locks CPU on that instruction for ever. + */ + if ((kernel_monitor.dbg_flags & DBGMON_ENABLED) != 0) { + WRITE_SPECIALREG(mdscr_el1, + READ_SPECIALREG(mdscr_el1) & ~DBG_MDSCR_MDE); + } +} + +void +kdb_cpu_clear_singlestep(void) +{ + + WRITE_SPECIALREG(mdscr_el1, READ_SPECIALREG(mdscr_el1) & + ~(DBG_MDSCR_SS | DBG_MDSCR_KDE)); + + /* Restore breakpoints and watchpoints */ + if ((kernel_monitor.dbg_flags & DBGMON_ENABLED) != 0) { + WRITE_SPECIALREG(mdscr_el1, + READ_SPECIALREG(mdscr_el1) | DBG_MDSCR_MDE); + + if ((kernel_monitor.dbg_flags & DBGMON_KERNEL) != 0) { + WRITE_SPECIALREG(mdscr_el1, + READ_SPECIALREG(mdscr_el1) | DBG_MDSCR_KDE); + } + } +} + +static const char * +dbg_watchtype_str(uint32_t type) +{ + switch (type) { + case DBG_WATCH_CTRL_EXEC: + return ("execute"); + case DBG_WATCH_CTRL_STORE: + return ("write"); + case DBG_WATCH_CTRL_LOAD: + return ("read"); + case DBG_WATCH_CTRL_LOAD | DBG_WATCH_CTRL_STORE: + return ("read/write"); + default: + return ("invalid"); + } +} + +static int +dbg_watchtype_len(uint32_t len) +{ + switch (len) { + case DBG_WATCH_CTRL_LEN_1: + return (1); + case DBG_WATCH_CTRL_LEN_2: + return (2); + case DBG_WATCH_CTRL_LEN_4: + return (4); + case DBG_WATCH_CTRL_LEN_8: + return (8); + default: + return (0); + } +} + +void +dbg_show_watchpoint(void) +{ + uint32_t wcr, len, type; + uint64_t addr; + int i; + + db_printf("\nhardware watchpoints:\n"); + db_printf(" watch status type len address symbol\n"); + db_printf(" ----- -------- ---------- --- ------------------ ------------------\n"); + for (i = 0; i < dbg_watchpoint_num; i++) { + wcr = dbg_wb_read_reg(DBG_REG_BASE_WCR, i); + if ((wcr & DBG_WB_CTRL_E) != 0) { + type = DBG_WATCH_CTRL_ACCESS_MASK(wcr); + len = DBG_WATCH_CTRL_LEN_MASK(wcr); + addr = dbg_wb_read_reg(DBG_REG_BASE_WVR, i); + db_printf(" %-5d %-8s %10s %3d 0x%16lx ", + i, "enabled", dbg_watchtype_str(type), + dbg_watchtype_len(len), addr); + db_printsym((db_addr_t)addr, DB_STGY_ANY); + db_printf("\n"); + } else { + db_printf(" %-5d disabled\n", i); + } + } +} +#endif /* DDB */ + +static int +dbg_find_free_slot(struct debug_monitor_state *monitor, enum dbg_t type) +{ + uint64_t *reg; + u_int max, i; + + switch(type) { + case DBG_TYPE_BREAKPOINT: + max = dbg_breakpoint_num; + reg = monitor->dbg_bcr; + break; + case DBG_TYPE_WATCHPOINT: + max = dbg_watchpoint_num; + reg = monitor->dbg_wcr; + break; + default: + printf("Unsupported debug type\n"); + return (i); + } + + for (i = 0; i < max; i++) { + if ((reg[i] & DBG_WB_CTRL_E) == 0) + return (i); + } + + return (-1); +} + +static int +dbg_find_slot(struct debug_monitor_state *monitor, enum dbg_t type, + vm_offset_t addr) +{ + uint64_t *reg_addr, *reg_ctrl; + u_int max, i; + + switch(type) { + case DBG_TYPE_BREAKPOINT: + max = dbg_breakpoint_num; + reg_addr = monitor->dbg_bvr; + reg_ctrl = monitor->dbg_bcr; + break; + case DBG_TYPE_WATCHPOINT: + max = dbg_watchpoint_num; + reg_addr = monitor->dbg_wvr; + reg_ctrl = monitor->dbg_wcr; + break; + default: + printf("Unsupported debug type\n"); + return (i); + } + + for (i = 0; i < max; i++) { + if (reg_addr[i] == addr && + (reg_ctrl[i] & DBG_WB_CTRL_E) != 0) + return (i); + } + + return (-1); +} + +int +dbg_setup_watchpoint(struct debug_monitor_state *monitor, vm_offset_t addr, + vm_size_t size, enum dbg_access_t access) +{ + uint64_t wcr_size, wcr_priv, wcr_access; + u_int i; + + if (monitor == NULL) + monitor = &kernel_monitor; + + i = dbg_find_free_slot(monitor, DBG_TYPE_WATCHPOINT); + if (i == -1) { + printf("Can not find slot for watchpoint, max %d" + " watchpoints supported\n", dbg_watchpoint_num); + return (i); + } + + switch(size) { + case 1: + wcr_size = DBG_WATCH_CTRL_LEN_1; + break; + case 2: + wcr_size = DBG_WATCH_CTRL_LEN_2; + break; + case 4: + wcr_size = DBG_WATCH_CTRL_LEN_4; + break; + case 8: + wcr_size = DBG_WATCH_CTRL_LEN_8; + break; + default: + printf("Unsupported address size for watchpoint\n"); + return (-1); + } + + if ((monitor->dbg_flags & DBGMON_KERNEL) == 0) + wcr_priv = DBG_WB_CTRL_EL0; + else + wcr_priv = DBG_WB_CTRL_EL1; + + switch(access) { + case HW_BREAKPOINT_X: + wcr_access = DBG_WATCH_CTRL_EXEC; + break; + case HW_BREAKPOINT_R: + wcr_access = DBG_WATCH_CTRL_LOAD; + break; + case HW_BREAKPOINT_W: + wcr_access = DBG_WATCH_CTRL_STORE; + break; + case HW_BREAKPOINT_RW: + wcr_access = DBG_WATCH_CTRL_LOAD | DBG_WATCH_CTRL_STORE; + break; + default: + printf("Unsupported exception level for watchpoint\n"); + return (-1); + } + + monitor->dbg_wvr[i] = addr; + monitor->dbg_wcr[i] = wcr_size | wcr_access | wcr_priv | DBG_WB_CTRL_E; + monitor->dbg_enable_count++; + monitor->dbg_flags |= DBGMON_ENABLED; + + dbg_register_sync(monitor); + return (0); +} + +int +dbg_remove_watchpoint(struct debug_monitor_state *monitor, vm_offset_t addr, + vm_size_t size) +{ + u_int i; + + if (monitor == NULL) + monitor = &kernel_monitor; + + i = dbg_find_slot(monitor, DBG_TYPE_WATCHPOINT, addr); + if (i == -1) { + printf("Can not find watchpoint for address 0%lx\n", addr); + return (i); + } + + monitor->dbg_wvr[i] = 0; + monitor->dbg_wcr[i] = 0; + monitor->dbg_enable_count--; + if (monitor->dbg_enable_count == 0) + monitor->dbg_flags &= ~DBGMON_ENABLED; + + dbg_register_sync(monitor); + return (0); +} + +void +dbg_register_sync(struct debug_monitor_state *monitor) +{ + uint64_t mdscr; + int i; + + if (monitor == NULL) + monitor = &kernel_monitor; + + mdscr = READ_SPECIALREG(mdscr_el1); + if ((monitor->dbg_flags & DBGMON_ENABLED) == 0) { + mdscr &= ~(DBG_MDSCR_MDE | DBG_MDSCR_KDE); + } else { + for (i = 0; i < dbg_breakpoint_num; i++) { + dbg_wb_write_reg(DBG_REG_BASE_BCR, i, + monitor->dbg_bcr[i]); + dbg_wb_write_reg(DBG_REG_BASE_BVR, i, + monitor->dbg_bvr[i]); + } + + for (i = 0; i < dbg_watchpoint_num; i++) { + dbg_wb_write_reg(DBG_REG_BASE_WCR, i, + monitor->dbg_wcr[i]); + dbg_wb_write_reg(DBG_REG_BASE_WVR, i, + monitor->dbg_wvr[i]); + } + mdscr |= DBG_MDSCR_MDE; + if ((monitor->dbg_flags & DBGMON_KERNEL) == DBGMON_KERNEL) + mdscr |= DBG_MDSCR_KDE; + } + WRITE_SPECIALREG(mdscr_el1, mdscr); + isb(); +} + +void +dbg_monitor_init(void) +{ + u_int i; + + /* Find out many breakpoints and watchpoints we can use */ + dbg_watchpoint_num = ((READ_SPECIALREG(id_aa64dfr0_el1) >> 20) & 0xf) + 1; + dbg_breakpoint_num = ((READ_SPECIALREG(id_aa64dfr0_el1) >> 12) & 0xf) + 1; + + if (bootverbose && PCPU_GET(cpuid) == 0) { + printf("%d watchpoints and %d breakpoints supported\n", + dbg_watchpoint_num, dbg_breakpoint_num); + } + + /* + * We have limited number of {watch,break}points, each consists of + * two registers: + * - wcr/bcr regsiter configurates corresponding {watch,break}point + * behaviour + * - wvr/bvr register keeps address we are hunting for + * + * Reset all breakpoints and watchpoints. + */ + for (i = 0; i < dbg_watchpoint_num; i++) { + dbg_wb_write_reg(DBG_REG_BASE_WCR, i, 0); + dbg_wb_write_reg(DBG_REG_BASE_WVR, i, 0); + } + + for (i = 0; i < dbg_breakpoint_num; i++) { + dbg_wb_write_reg(DBG_REG_BASE_BCR, i, 0); + dbg_wb_write_reg(DBG_REG_BASE_BVR, i, 0); + } + + dbg_enable(); +} + +void +dbg_monitor_enter(struct thread *thread) +{ + int i; + + if ((kernel_monitor.dbg_flags & DBGMON_ENABLED) != 0) { + /* Install the kernel version of the registers */ + dbg_register_sync(&kernel_monitor); + } else if ((thread->td_pcb->pcb_dbg_regs.dbg_flags & DBGMON_ENABLED) != 0) { + /* Disable the user breakpoints until we return to userspace */ + for (i = 0; i < dbg_watchpoint_num; i++) { + dbg_wb_write_reg(DBG_REG_BASE_WCR, i, 0); + dbg_wb_write_reg(DBG_REG_BASE_WVR, i, 0); + } + + for (i = 0; i < dbg_breakpoint_num; ++i) { + dbg_wb_write_reg(DBG_REG_BASE_BCR, i, 0); + dbg_wb_write_reg(DBG_REG_BASE_BVR, i, 0); + } + WRITE_SPECIALREG(mdscr_el1, + READ_SPECIALREG(mdscr_el1) & + ~(DBG_MDSCR_MDE | DBG_MDSCR_KDE)); + isb(); + } +} + +void +dbg_monitor_exit(struct thread *thread, struct trapframe *frame) +{ + int i; + + /* + * PSR_D is an aarch64-only flag. On aarch32, it switches + * the processor to big-endian, so avoid setting it for + * 32bits binaries. + */ + if (!(SV_PROC_FLAG(thread->td_proc, SV_ILP32))) + frame->tf_spsr |= PSR_D; + if ((thread->td_pcb->pcb_dbg_regs.dbg_flags & DBGMON_ENABLED) != 0) { + /* Install the kernel version of the registers */ + dbg_register_sync(&thread->td_pcb->pcb_dbg_regs); + frame->tf_spsr &= ~PSR_D; + } else if ((kernel_monitor.dbg_flags & DBGMON_ENABLED) != 0) { + /* Disable the user breakpoints until we return to userspace */ + for (i = 0; i < dbg_watchpoint_num; i++) { + dbg_wb_write_reg(DBG_REG_BASE_WCR, i, 0); + dbg_wb_write_reg(DBG_REG_BASE_WVR, i, 0); + } + + for (i = 0; i < dbg_breakpoint_num; ++i) { + dbg_wb_write_reg(DBG_REG_BASE_BCR, i, 0); + dbg_wb_write_reg(DBG_REG_BASE_BVR, i, 0); + } + WRITE_SPECIALREG(mdscr_el1, + READ_SPECIALREG(mdscr_el1) & + ~(DBG_MDSCR_MDE | DBG_MDSCR_KDE)); + isb(); + } +} diff --git a/sys/arm64/arm64/disassem.c b/sys/arm64/arm64/disassem.c new file mode 100644 index 000000000000..ce0bf7660b02 --- /dev/null +++ b/sys/arm64/arm64/disassem.c @@ -0,0 +1,545 @@ +/*- + * Copyright (c) 2016 Cavium + * All rights reserved. + * + * This software was developed by Semihalf. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); +#include <sys/param.h> + +#include <sys/systm.h> +#include <machine/disassem.h> +#include <machine/armreg.h> +#include <ddb/ddb.h> + +#define ARM64_MAX_TOKEN_LEN 8 +#define ARM64_MAX_TOKEN_CNT 10 + +#define ARM_INSN_SIZE_OFFSET 30 +#define ARM_INSN_SIZE_MASK 0x3 + +/* Special options for instruction printing */ +#define OP_SIGN_EXT (1UL << 0) /* Sign-extend immediate value */ +#define OP_LITERAL (1UL << 1) /* Use literal (memory offset) */ +#define OP_MULT_4 (1UL << 2) /* Multiply immediate by 4 */ +#define OP_SF32 (1UL << 3) /* Force 32-bit access */ +#define OP_SF_INV (1UL << 6) /* SF is inverted (1 means 32 bit access) */ + +static const char *w_reg[] = { + "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7", + "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15", + "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23", + "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wSP", +}; + +static const char *x_reg[] = { + "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", + "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", + "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", + "x24", "x25", "x26", "x27", "x28", "x29", "LR", "SP", +}; + +static const char *shift_2[] = { + "LSL", "LSR", "ASR", "RSV" +}; + +/* + * Structure representing single token (operand) inside instruction. + * name - name of operand + * pos - position within the instruction (in bits) + * len - operand length (in bits) + */ +struct arm64_insn_token { + char name[ARM64_MAX_TOKEN_LEN]; + int pos; + int len; +}; + +/* + * Define generic types for instruction printing. + */ +enum arm64_format_type { + TYPE_01, /* OP <RD>, <RN>, <RM>{, <shift [LSL, LSR, ASR]> #<imm>} SF32/64 + OP <RD>, <RN>, #<imm>{, <shift [0, 12]>} SF32/64 */ + TYPE_02, /* OP <RT>, [<RN>, #<imm>]{!}] SF32/64 + OP <RT>, [<RN>], #<imm>{!} SF32/64 + OP <RT>, <RN>, <RM> {, EXTEND AMOUNT } */ + TYPE_03, /* OP <RT>, #imm SF32/64 */ +}; + +/* + * Structure representing single parsed instruction format. + * name - opcode name + * format - opcode format in a human-readable way + * type - syntax type for printing + * special_ops - special options passed to a printer (if any) + * mask - bitmask for instruction matching + * pattern - pattern to look for + * tokens - array of tokens (operands) inside instruction + */ +struct arm64_insn { + char* name; + char* format; + enum arm64_format_type type; + uint64_t special_ops; + uint32_t mask; + uint32_t pattern; + struct arm64_insn_token tokens[ARM64_MAX_TOKEN_CNT]; +}; + +/* + * Specify instruction opcode format in a human-readable way. Use notation + * obtained from ARM Architecture Reference Manual for ARMv8-A. + * + * Format string description: + * Each group must be separated by "|". Group made of 0/1 is used to + * generate mask and pattern for instruction matching. Groups containing + * an operand token (in format NAME(length_bits)) are used to retrieve any + * operand data from the instruction. Names here must be meaningful + * and match the one described in the Manual. + * + * Token description: + * SF - "0" represents 32-bit access, "1" represents 64-bit access + * SHIFT - type of shift (instruction dependent) + * IMM - immediate value + * Rx - register number + * OPTION - command specific options + * SCALE - scaling of immediate value + */ +static struct arm64_insn arm64_i[] = { + { "add", "SF(1)|0001011|SHIFT(2)|0|RM(5)|IMM(6)|RN(5)|RD(5)", + TYPE_01, 0 }, + { "mov", "SF(1)|001000100000000000000|RN(5)|RD(5)", + TYPE_01, 0 }, + { "add", "SF(1)|0010001|SHIFT(2)|IMM(12)|RN(5)|RD(5)", + TYPE_01, 0 }, + { "ldr", "1|SF(1)|111000010|IMM(9)|OPTION(2)|RN(5)|RT(5)", + TYPE_02, OP_SIGN_EXT }, /* ldr immediate post/pre index */ + { "ldr", "1|SF(1)|11100101|IMM(12)|RN(5)|RT(5)", + TYPE_02, 0 }, /* ldr immediate unsigned */ + { "ldr", "1|SF(1)|111000011|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)", + TYPE_02, 0 }, /* ldr register */ + { "ldr", "0|SF(1)|011000|IMM(19)|RT(5)", + TYPE_03, OP_SIGN_EXT | OP_LITERAL | OP_MULT_4 }, /* ldr literal */ + { "ldrb", "00|111000010|IMM(9)|OPTION(2)|RN(5)|RT(5)", + TYPE_02, OP_SIGN_EXT | OP_SF32 }, /* ldrb immediate post/pre index */ + { "ldrb", "00|11100101|IMM(12)|RN(5)|RT(5)", + TYPE_02, OP_SF32 }, /* ldrb immediate unsigned */ + { "ldrb", "00|111000011|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)", + TYPE_02, OP_SF32 }, /* ldrb register */ + { "ldrh", "01|111000010|IMM(9)|OPTION(2)|RN(5)|RT(5)", TYPE_02, + OP_SIGN_EXT | OP_SF32 }, /* ldrh immediate post/pre index */ + { "ldrh", "01|11100101|IMM(12)|RN(5)|RT(5)", + TYPE_02, OP_SF32 }, /* ldrh immediate unsigned */ + { "ldrh", "01|111000011|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)", + TYPE_02, OP_SF32 }, /* ldrh register */ + { "ldrsb", "001110001|SF(1)|0|IMM(9)|OPTION(2)|RN(5)|RT(5)", + TYPE_02, OP_SIGN_EXT | OP_SF_INV }, /* ldrsb immediate post/pre index */ + { "ldrsb", "001110011|SF(1)|IMM(12)|RN(5)|RT(5)",\ + TYPE_02, OP_SF_INV}, /* ldrsb immediate unsigned */ + { "ldrsb", "001110001|SF(1)|1|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)", + TYPE_02, OP_SF_INV }, /* ldrsb register */ + { "ldrsh", "011110001|SF(1)|0|IMM(9)|OPTION(2)|RN(5)|RT(5)", + TYPE_02, OP_SIGN_EXT | OP_SF_INV }, /* ldrsh immediate post/pre index */ + { "ldrsh", "011110011|SF(1)|IMM(12)|RN(5)|RT(5)", + TYPE_02, OP_SF_INV}, /* ldrsh immediate unsigned */ + { "ldrsh", "011110001|SF(1)|1|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)", + TYPE_02, OP_SF_INV }, /* ldrsh register */ + { "ldrsw", "10111000100|IMM(9)|OPTION(2)|RN(5)|RT(5)", + TYPE_02, OP_SIGN_EXT }, /* ldrsw immediate post/pre index */ + { "ldrsw", "1011100110|IMM(12)|RN(5)|RT(5)", + TYPE_02, 0 }, /* ldrsw immediate unsigned */ + { "ldrsw", "10111000101|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)", + TYPE_02, 0 }, /* ldrsw register */ + { "ldrsw", "10011000|IMM(19)|RT(5)", + TYPE_03, OP_SIGN_EXT | OP_LITERAL | OP_MULT_4 }, /* ldr literal */ + { NULL, NULL } +}; + +static void +arm64_disasm_generate_masks(struct arm64_insn *tab) +{ + uint32_t mask, val; + int a, i; + int len, ret; + int token = 0; + char *format; + int error; + + while (tab->name != NULL) { + mask = 0; + val = 0; + format = tab->format; + token = 0; + error = 0; + + /* + * For each entry analyze format strings from the + * left (i.e. from the MSB). + */ + a = (INSN_SIZE * NBBY) - 1; + while (*format != '\0' && (a >= 0)) { + switch(*format) { + case '0': + /* Bit is 0, add to mask and pattern */ + mask |= (1 << a); + a--; + format++; + break; + case '1': + /* Bit is 1, add to mask and pattern */ + mask |= (1 << a); + val |= (1 << a); + a--; + format++; + break; + case '|': + /* skip */ + format++; + break; + default: + /* Token found, copy the name */ + memset(tab->tokens[token].name, 0, + sizeof(tab->tokens[token].name)); + i = 0; + while (*format != '(') { + tab->tokens[token].name[i] = *format; + i++; + format++; + if (i >= ARM64_MAX_TOKEN_LEN) { + printf("ERROR: token too long in op %s\n", + tab->name); + error = 1; + break; + } + } + if (error != 0) + break; + + /* Read the length value */ + ret = sscanf(format, "(%d)", &len); + if (ret == 1) { + if (token >= ARM64_MAX_TOKEN_CNT) { + printf("ERROR: to many tokens in op %s\n", + tab->name); + error = 1; + break; + } + + a -= len; + tab->tokens[token].pos = a + 1; + tab->tokens[token].len = len; + token++; + } + + /* Skip to the end of the token */ + while (*format != 0 && *format != '|') + format++; + } + } + + /* Write mask and pattern to the instruction array */ + tab->mask = mask; + tab->pattern = val; + + /* + * If we got here, format string must be parsed and "a" + * should point to -1. If it's not, wrong number of bits + * in format string. Mark this as invalid and prevent + * from being matched. + */ + if (*format != 0 || (a != -1) || (error != 0)) { + tab->mask = 0; + tab->pattern = 0xffffffff; + printf("ERROR: skipping instruction op %s\n", + tab->name); + } + + tab++; + } +} + +static int +arm64_disasm_read_token(struct arm64_insn *insn, u_int opcode, + const char *token, int *val) +{ + int i; + + for (i = 0; i < ARM64_MAX_TOKEN_CNT; i++) { + if (strcmp(insn->tokens[i].name, token) == 0) { + *val = (opcode >> insn->tokens[i].pos & + ((1 << insn->tokens[i].len) - 1)); + return (0); + } + } + + return (EINVAL); +} + +static int +arm64_disasm_read_token_sign_ext(struct arm64_insn *insn, u_int opcode, + const char *token, int *val) +{ + int i; + int msk; + + for (i = 0; i < ARM64_MAX_TOKEN_CNT; i++) { + if (strcmp(insn->tokens[i].name, token) == 0) { + msk = (1 << insn->tokens[i].len) - 1; + *val = ((opcode >> insn->tokens[i].pos) & msk); + + /* If last bit is 1, sign-extend the value */ + if (*val & (1 << (insn->tokens[i].len - 1))) + *val |= ~msk; + + return (0); + } + } + + return (EINVAL); +} + +static const char * +arm64_reg(int b64, int num) +{ + + if (b64 != 0) + return (x_reg[num]); + + return (w_reg[num]); +} + +vm_offset_t +disasm(const struct disasm_interface *di, vm_offset_t loc, int altfmt) +{ + struct arm64_insn *i_ptr = arm64_i; + uint32_t insn; + int matchp; + int ret; + int shift, rm, rt, rd, rn, imm, sf, idx, option, scale, amount; + int sign_ext; + int rm_absent; + /* Indicate if immediate should be outside or inside brackets */ + int inside; + /* Print exclamation mark if pre-incremented */ + int pre; + + /* Initialize defaults, all are 0 except SF indicating 64bit access */ + shift = rd = rm = rn = imm = idx = option = amount = scale = 0; + sign_ext = 0; + sf = 1; + + matchp = 0; + insn = di->di_readword(loc); + while (i_ptr->name) { + /* If mask is 0 then the parser was not initialized yet */ + if ((i_ptr->mask != 0) && + ((insn & i_ptr->mask) == i_ptr->pattern)) { + matchp = 1; + break; + } + i_ptr++; + } + if (matchp == 0) + goto undefined; + + /* Global options */ + if (i_ptr->special_ops & OP_SF32) + sf = 0; + + /* Global optional tokens */ + arm64_disasm_read_token(i_ptr, insn, "SF", &sf); + if (i_ptr->special_ops & OP_SF_INV) + sf = 1 - sf; + if (arm64_disasm_read_token(i_ptr, insn, "SIGN", &sign_ext) == 0) + sign_ext = 1 - sign_ext; + if (i_ptr->special_ops & OP_SIGN_EXT) + sign_ext = 1; + if (sign_ext != 0) + arm64_disasm_read_token_sign_ext(i_ptr, insn, "IMM", &imm); + else + arm64_disasm_read_token(i_ptr, insn, "IMM", &imm); + if (i_ptr->special_ops & OP_MULT_4) + imm <<= 2; + + /* Print opcode by type */ + switch (i_ptr->type) { + case TYPE_01: + /* OP <RD>, <RN>, <RM>{, <shift [LSL, LSR, ASR]> #<imm>} SF32/64 + OP <RD>, <RN>, #<imm>{, <shift [0, 12]>} SF32/64 */ + + /* Mandatory tokens */ + ret = arm64_disasm_read_token(i_ptr, insn, "RD", &rd); + ret |= arm64_disasm_read_token(i_ptr, insn, "RN", &rn); + if (ret != 0) { + printf("ERROR: Missing mandatory token for op %s type %d\n", + i_ptr->name, i_ptr->type); + goto undefined; + } + + /* Optional tokens */ + arm64_disasm_read_token(i_ptr, insn, "SHIFT", &shift); + rm_absent = arm64_disasm_read_token(i_ptr, insn, "RM", &rm); + + di->di_printf("%s\t%s, %s", i_ptr->name, arm64_reg(sf, rd), + arm64_reg(sf, rn)); + + /* If RM is present use it, otherwise use immediate notation */ + if (rm_absent == 0) { + di->di_printf(", %s", arm64_reg(sf, rm)); + if (imm != 0) + di->di_printf(", %s #%d", shift_2[shift], imm); + } else { + if (imm != 0 || shift != 0) + di->di_printf(", #0x%x", imm); + if (shift != 0) + di->di_printf(" LSL #12"); + } + break; + case TYPE_02: + /* OP <RT>, [<RN>, #<imm>]{!}] SF32/64 + OP <RT>, [<RN>], #<imm>{!} SF32/64 + OP <RT>, <RN>, <RM> {, EXTEND AMOUNT } */ + + /* Mandatory tokens */ + ret = arm64_disasm_read_token(i_ptr, insn, "RT", &rt); + ret |= arm64_disasm_read_token(i_ptr, insn, "RN", &rn); + if (ret != 0) { + printf("ERROR: Missing mandatory token for op %s type %d\n", + i_ptr->name, i_ptr->type); + goto undefined; + } + + /* Optional tokens */ + arm64_disasm_read_token(i_ptr, insn, "OPTION", &option); + arm64_disasm_read_token(i_ptr, insn, "SCALE", &scale); + rm_absent = arm64_disasm_read_token(i_ptr, insn, "RM", &rm); + + if (rm_absent) { + /* + * In unsigned operation, shift immediate value + * and reset options to default. + */ + if (sign_ext == 0) { + imm = imm << ((insn >> ARM_INSN_SIZE_OFFSET) & + ARM_INSN_SIZE_MASK); + option = 0; + } + switch (option) { + case 0x0: + pre = 0; + inside = 1; + break; + case 0x1: + pre = 0; + inside = 0; + break; + case 0x2: + default: + pre = 1; + inside = 1; + break; + } + + di->di_printf("%s\t%s, ", i_ptr->name, arm64_reg(sf, rt)); + if (inside != 0) { + di->di_printf("[%s", arm64_reg(1, rn)); + if (imm != 0) + di->di_printf(", #%d", imm); + di->di_printf("]"); + } else { + di->di_printf("[%s]", arm64_reg(1, rn)); + if (imm != 0) + di->di_printf(", #%d", imm); + } + if (pre != 0) + di->di_printf("!"); + } else { + /* Last bit of option field determines 32/64 bit offset */ + di->di_printf("%s\t%s, [%s, %s", i_ptr->name, + arm64_reg(sf, rt), arm64_reg(1, rn), + arm64_reg(option & 1, rm)); + + /* Calculate amount, it's op(31:30) */ + amount = (insn >> ARM_INSN_SIZE_OFFSET) & + ARM_INSN_SIZE_MASK; + + switch (option) { + case 0x2: + di->di_printf(", uxtw #%d", amount); + break; + case 0x3: + if (scale != 0) + di->di_printf(", lsl #%d", amount); + break; + case 0x6: + di->di_printf(", sxtw #%d", amount); + break; + case 0x7: + di->di_printf(", sxts #%d", amount); + break; + default: + di->di_printf(", RSVD"); + break; + } + di->di_printf("]"); + } + + break; + + case TYPE_03: + /* OP <RT>, #imm SF32/64 */ + + /* Mandatory tokens */ + ret = arm64_disasm_read_token(i_ptr, insn, "RT", &rt); + if (ret != 0) { + printf("ERROR: Missing mandatory token for op %s type %d\n", + i_ptr->name, i_ptr->type); + goto undefined; + } + + di->di_printf("%s\t%s, ", i_ptr->name, arm64_reg(sf, rt)); + if (i_ptr->special_ops & OP_LITERAL) + di->di_printf("0x%lx", loc + imm); + else + di->di_printf("#%d", imm); + + break; + default: + goto undefined; + } + + di->di_printf("\n"); + return(loc + INSN_SIZE); + +undefined: + di->di_printf("undefined\t%08x\n", insn); + return(loc + INSN_SIZE); +} + +/* Parse format strings at the very beginning */ +SYSINIT(arm64_disasm_generate_masks, SI_SUB_DDB_SERVICES, + SI_ORDER_FIRST, arm64_disasm_generate_masks, arm64_i); diff --git a/sys/arm64/arm64/dump_machdep.c b/sys/arm64/arm64/dump_machdep.c new file mode 100644 index 000000000000..d92777fea051 --- /dev/null +++ b/sys/arm64/arm64/dump_machdep.c @@ -0,0 +1,73 @@ +/*- + * Copyright (c) 2015 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Andrew Turner under + * sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/conf.h> +#include <sys/kernel.h> +#include <sys/kerneldump.h> +#include <sys/proc.h> +#include <sys/sysctl.h> + +#include <machine/dump.h> + +int do_minidump = 1; +TUNABLE_INT("debug.minidump", &do_minidump); +SYSCTL_INT(_debug, OID_AUTO, minidump, CTLFLAG_RW, &do_minidump, 0, + "Enable mini crash dumps"); + +void +dumpsys_wbinv_all(void) +{ + + printf("dumpsys_wbinv_all\n"); +} + +void +dumpsys_map_chunk(vm_paddr_t pa, size_t chunk __unused, void **va) +{ + + printf("dumpsys_map_chunk\n"); + while(1); +} + +/* + * Add a header to be used by libkvm to get the va to pa delta + */ +int +dumpsys_write_aux_headers(struct dumperinfo *di) +{ + + printf("dumpsys_map_chunk\n"); + return (0); +} diff --git a/sys/arm64/arm64/efirt_machdep.c b/sys/arm64/arm64/efirt_machdep.c new file mode 100644 index 000000000000..cd4e5d7bae00 --- /dev/null +++ b/sys/arm64/arm64/efirt_machdep.c @@ -0,0 +1,280 @@ +/*- + * Copyright (c) 2004 Marcel Moolenaar + * Copyright (c) 2001 Doug Rabson + * Copyright (c) 2016 The FreeBSD Foundation + * Copyright (c) 2017 Andrew Turner + * All rights reserved. + * + * Portions of this software were developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * This software was developed by SRI International and the University of + * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-10-C-0237 + * ("CTSRD"), as part of the DARPA CRASH research programme. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/efi.h> +#include <sys/kernel.h> +#include <sys/linker.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/proc.h> +#include <sys/rwlock.h> +#include <sys/systm.h> +#include <sys/vmmeter.h> + +#include <machine/metadata.h> +#include <machine/pcb.h> +#include <machine/pte.h> +#include <machine/vfp.h> +#include <machine/vmparam.h> + +#include <vm/vm.h> +#include <vm/pmap.h> +#include <vm/vm_map.h> +#include <vm/vm_object.h> +#include <vm/vm_page.h> +#include <vm/vm_pager.h> + +static vm_object_t obj_1t1_pt; +static vm_pindex_t efi_1t1_idx; +static pd_entry_t *efi_l0; +static uint64_t efi_ttbr0; + +void +efi_destroy_1t1_map(void) +{ + vm_page_t m; + + if (obj_1t1_pt != NULL) { + VM_OBJECT_RLOCK(obj_1t1_pt); + TAILQ_FOREACH(m, &obj_1t1_pt->memq, listq) + m->ref_count = VPRC_OBJREF; + vm_wire_sub(obj_1t1_pt->resident_page_count); + VM_OBJECT_RUNLOCK(obj_1t1_pt); + vm_object_deallocate(obj_1t1_pt); + } + + obj_1t1_pt = NULL; + efi_1t1_idx = 0; + efi_l0 = NULL; + efi_ttbr0 = 0; +} + +static vm_page_t +efi_1t1_page(void) +{ + + return (vm_page_grab(obj_1t1_pt, efi_1t1_idx++, VM_ALLOC_NOBUSY | + VM_ALLOC_WIRED | VM_ALLOC_ZERO)); +} + +static pt_entry_t * +efi_1t1_l3(vm_offset_t va) +{ + pd_entry_t *l0, *l1, *l2; + pt_entry_t *l3; + vm_pindex_t l0_idx, l1_idx, l2_idx; + vm_page_t m; + vm_paddr_t mphys; + + l0_idx = pmap_l0_index(va); + l0 = &efi_l0[l0_idx]; + if (*l0 == 0) { + m = efi_1t1_page(); + mphys = VM_PAGE_TO_PHYS(m); + *l0 = mphys | L0_TABLE; + } else { + mphys = *l0 & ~ATTR_MASK; + } + + l1 = (pd_entry_t *)PHYS_TO_DMAP(mphys); + l1_idx = pmap_l1_index(va); + l1 += l1_idx; + if (*l1 == 0) { + m = efi_1t1_page(); + mphys = VM_PAGE_TO_PHYS(m); + *l1 = mphys | L1_TABLE; + } else { + mphys = *l1 & ~ATTR_MASK; + } + + l2 = (pd_entry_t *)PHYS_TO_DMAP(mphys); + l2_idx = pmap_l2_index(va); + l2 += l2_idx; + if (*l2 == 0) { + m = efi_1t1_page(); + mphys = VM_PAGE_TO_PHYS(m); + *l2 = mphys | L2_TABLE; + } else { + mphys = *l2 & ~ATTR_MASK; + } + + l3 = (pt_entry_t *)PHYS_TO_DMAP(mphys); + l3 += pmap_l3_index(va); + KASSERT(*l3 == 0, ("%s: Already mapped: va %#jx *pt %#jx", __func__, + va, *l3)); + + return (l3); +} + +/* + * Map a physical address from EFI runtime space into KVA space. Returns 0 to + * indicate a failed mapping so that the caller may handle error. + */ +vm_offset_t +efi_phys_to_kva(vm_paddr_t paddr) +{ + + if (!PHYS_IN_DMAP(paddr)) + return (0); + return (PHYS_TO_DMAP(paddr)); +} + +/* + * Create the 1:1 virtual to physical map for EFI + */ +bool +efi_create_1t1_map(struct efi_md *map, int ndesc, int descsz) +{ + struct efi_md *p; + pt_entry_t *l3, l3_attr; + vm_offset_t va; + vm_page_t efi_l0_page; + uint64_t idx; + int i, mode; + + obj_1t1_pt = vm_pager_allocate(OBJT_PHYS, NULL, L0_ENTRIES + + L0_ENTRIES * Ln_ENTRIES + L0_ENTRIES * Ln_ENTRIES * Ln_ENTRIES + + L0_ENTRIES * Ln_ENTRIES * Ln_ENTRIES * Ln_ENTRIES, + VM_PROT_ALL, 0, NULL); + VM_OBJECT_WLOCK(obj_1t1_pt); + efi_l0_page = efi_1t1_page(); + VM_OBJECT_WUNLOCK(obj_1t1_pt); + efi_l0 = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(efi_l0_page)); + efi_ttbr0 = ASID_TO_OPERAND(ASID_RESERVED_FOR_EFI) | + VM_PAGE_TO_PHYS(efi_l0_page); + + for (i = 0, p = map; i < ndesc; i++, p = efi_next_descriptor(p, + descsz)) { + if ((p->md_attr & EFI_MD_ATTR_RT) == 0) + continue; + if (p->md_virt != NULL && (uint64_t)p->md_virt != p->md_phys) { + if (bootverbose) + printf("EFI Runtime entry %d is mapped\n", i); + goto fail; + } + if ((p->md_phys & EFI_PAGE_MASK) != 0) { + if (bootverbose) + printf("EFI Runtime entry %d is not aligned\n", + i); + goto fail; + } + if (p->md_phys + p->md_pages * EFI_PAGE_SIZE < p->md_phys || + p->md_phys + p->md_pages * EFI_PAGE_SIZE >= + VM_MAXUSER_ADDRESS) { + printf("EFI Runtime entry %d is not in mappable for RT:" + "base %#016jx %#jx pages\n", + i, (uintmax_t)p->md_phys, + (uintmax_t)p->md_pages); + goto fail; + } + if ((p->md_attr & EFI_MD_ATTR_WB) != 0) + mode = VM_MEMATTR_WRITE_BACK; + else if ((p->md_attr & EFI_MD_ATTR_WT) != 0) + mode = VM_MEMATTR_WRITE_THROUGH; + else if ((p->md_attr & EFI_MD_ATTR_WC) != 0) + mode = VM_MEMATTR_WRITE_COMBINING; + else + mode = VM_MEMATTR_DEVICE; + + printf("MAP %lx mode %x pages %lu\n", p->md_phys, mode, p->md_pages); + + l3_attr = ATTR_DEFAULT | ATTR_S1_IDX(mode) | + ATTR_S1_AP(ATTR_S1_AP_RW) | ATTR_S1_nG | L3_PAGE; + if (mode == VM_MEMATTR_DEVICE || p->md_attr & EFI_MD_ATTR_XP) + l3_attr |= ATTR_S1_XN; + + VM_OBJECT_WLOCK(obj_1t1_pt); + for (va = p->md_phys, idx = 0; idx < p->md_pages; idx++, + va += PAGE_SIZE) { + l3 = efi_1t1_l3(va); + *l3 = va | l3_attr; + } + VM_OBJECT_WUNLOCK(obj_1t1_pt); + } + + return (true); +fail: + efi_destroy_1t1_map(); + return (false); +} + +int +efi_arch_enter(void) +{ + + CRITICAL_ASSERT(curthread); + + /* + * Temporarily switch to EFI's page table. However, we leave curpmap + * unchanged in order to prevent its ASID from being reclaimed before + * we switch back to its page table in efi_arch_leave(). + */ + set_ttbr0(efi_ttbr0); + if (PCPU_GET(bcast_tlbi_workaround) != 0) + invalidate_local_icache(); + + return (0); +} + +void +efi_arch_leave(void) +{ + + /* + * Restore the pcpu pointer. Some UEFI implementations trash it and + * we don't store it before calling into them. To fix this we need + * to restore it after returning to the kernel context. As reading + * curpmap will access x18 we need to restore it before loading + * the pmap pointer. + */ + __asm __volatile( + "mrs x18, tpidr_el1 \n" + ); + set_ttbr0(pmap_to_ttbr0(PCPU_GET(curpmap))); + if (PCPU_GET(bcast_tlbi_workaround) != 0) + invalidate_local_icache(); +} + +int +efi_rt_arch_call(struct efirt_callinfo *ec) +{ + + panic("not implemented"); +} diff --git a/sys/arm64/arm64/elf32_machdep.c b/sys/arm64/arm64/elf32_machdep.c new file mode 100644 index 000000000000..f99523cb6362 --- /dev/null +++ b/sys/arm64/arm64/elf32_machdep.c @@ -0,0 +1,261 @@ +/*- + * Copyright (c) 2014, 2015 The FreeBSD Foundation. + * Copyright (c) 2014, 2017 Andrew Turner. + * Copyright (c) 2018 Olivier Houchard + * All rights reserved. + * + * This software was developed by Andrew Turner under + * sponsorship from the FreeBSD Foundation. + * + * Portions of this software were developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#define __ELF_WORD_SIZE 32 + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/systm.h> +#include <sys/exec.h> +#include <sys/imgact.h> +#include <sys/linker.h> +#include <sys/proc.h> +#include <sys/sysent.h> +#include <sys/imgact_elf.h> +#include <sys/syscall.h> +#include <sys/signalvar.h> +#include <sys/vnode.h> + +#include <machine/elf.h> + +#include <compat/freebsd32/freebsd32_util.h> + +#define FREEBSD32_MINUSER 0x00001000 +#define FREEBSD32_MAXUSER ((1ul << 32) - PAGE_SIZE) +#define FREEBSD32_SHAREDPAGE (FREEBSD32_MAXUSER - PAGE_SIZE) +#define FREEBSD32_USRSTACK FREEBSD32_SHAREDPAGE + +extern const char *freebsd32_syscallnames[]; + +extern char aarch32_sigcode[]; +extern int sz_aarch32_sigcode; + +static int freebsd32_fetch_syscall_args(struct thread *td); +static void freebsd32_setregs(struct thread *td, struct image_params *imgp, + u_long stack); +static void freebsd32_set_syscall_retval(struct thread *, int); + +static boolean_t elf32_arm_abi_supported(struct image_params *, int32_t *, + uint32_t *); + +extern void freebsd32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); + +static struct sysentvec elf32_freebsd_sysvec = { + .sv_size = SYS_MAXSYSCALL, + .sv_table = freebsd32_sysent, + .sv_errsize = 0, + .sv_errtbl = NULL, + .sv_transtrap = NULL, + .sv_fixup = elf32_freebsd_fixup, + .sv_sendsig = freebsd32_sendsig, + .sv_sigcode = aarch32_sigcode, + .sv_szsigcode = &sz_aarch32_sigcode, + .sv_name = "FreeBSD ELF32", + .sv_coredump = elf32_coredump, + .sv_imgact_try = NULL, + .sv_minsigstksz = MINSIGSTKSZ, + .sv_minuser = FREEBSD32_MINUSER, + .sv_maxuser = FREEBSD32_MAXUSER, + .sv_usrstack = FREEBSD32_USRSTACK, + .sv_psstrings = FREEBSD32_PS_STRINGS, + .sv_stackprot = VM_PROT_READ | VM_PROT_WRITE, + .sv_copyout_auxargs = elf32_freebsd_copyout_auxargs, + .sv_copyout_strings = freebsd32_copyout_strings, + .sv_setregs = freebsd32_setregs, + .sv_fixlimit = NULL, // XXX + .sv_maxssiz = NULL, + .sv_flags = SV_ABI_FREEBSD | SV_ILP32 | SV_SHP | SV_TIMEKEEP, + .sv_set_syscall_retval = freebsd32_set_syscall_retval, + .sv_fetch_syscall_args = freebsd32_fetch_syscall_args, + .sv_syscallnames = freebsd32_syscallnames, + .sv_shared_page_base = FREEBSD32_SHAREDPAGE, + .sv_shared_page_len = PAGE_SIZE, + .sv_schedtail = NULL, + .sv_thread_detach = NULL, + .sv_trap = NULL, +}; +INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec); + +static Elf32_Brandinfo freebsd32_brand_info = { + .brand = ELFOSABI_FREEBSD, + .machine = EM_ARM, + .compat_3_brand = "FreeBSD", + .emul_path = NULL, + .interp_path = "/libexec/ld-elf.so.1", + .sysvec = &elf32_freebsd_sysvec, + .interp_newpath = "/libexec/ld-elf32.so.1", + .brand_note = &elf32_freebsd_brandnote, + .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE, + .header_supported= elf32_arm_abi_supported, +}; + +SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST, + (sysinit_cfunc_t)elf32_insert_brand_entry, &freebsd32_brand_info); + +static boolean_t +elf32_arm_abi_supported(struct image_params *imgp, int32_t *osrel __unused, + uint32_t *fctl0 __unused) +{ + const Elf32_Ehdr *hdr; + + /* Check if we support AArch32 */ + if (ID_AA64PFR0_EL0_VAL(READ_SPECIALREG(id_aa64pfr0_el1)) != + ID_AA64PFR0_EL0_64_32) + return (FALSE); + +#define EF_ARM_EABI_VERSION(x) (((x) & EF_ARM_EABIMASK) >> 24) +#define EF_ARM_EABI_FREEBSD_MIN 4 + hdr = (const Elf32_Ehdr *)imgp->image_header; + if (EF_ARM_EABI_VERSION(hdr->e_flags) < EF_ARM_EABI_FREEBSD_MIN) { + if (bootverbose) + uprintf("Attempting to execute non EABI binary " + "(rev %d) image %s", + EF_ARM_EABI_VERSION(hdr->e_flags), + imgp->args->fname); + return (FALSE); + } + + return (TRUE); +} + +static int +freebsd32_fetch_syscall_args(struct thread *td) +{ + struct proc *p; + register_t *ap; + struct syscall_args *sa; + int error, i, nap; + unsigned int args[4]; + + nap = 4; + p = td->td_proc; + ap = td->td_frame->tf_x; + sa = &td->td_sa; + + /* r7 is the syscall id */ + sa->code = td->td_frame->tf_x[7]; + + if (sa->code == SYS_syscall) { + sa->code = *ap++; + nap--; + } else if (sa->code == SYS___syscall) { + sa->code = ap[1]; + nap -= 2; + ap += 2; + } + + if (sa->code >= p->p_sysent->sv_size) + sa->callp = &p->p_sysent->sv_table[0]; + else + sa->callp = &p->p_sysent->sv_table[sa->code]; + + sa->narg = sa->callp->sy_narg; + for (i = 0; i < nap; i++) + sa->args[i] = ap[i]; + if (sa->narg > nap) { + if ((sa->narg - nap) > nitems(args)) + panic("Too many system call arguiments"); + error = copyin((void *)td->td_frame->tf_x[13], args, + (sa->narg - nap) * sizeof(int)); + for (i = 0; i < (sa->narg - nap); i++) + sa->args[i + nap] = args[i]; + } + + td->td_retval[0] = 0; + td->td_retval[1] = 0; + + return (0); +} + +static void +freebsd32_set_syscall_retval(struct thread *td, int error) +{ + struct trapframe *frame; + + frame = td->td_frame; + switch (error) { + case 0: + frame->tf_x[0] = td->td_retval[0]; + frame->tf_x[1] = td->td_retval[1]; + frame->tf_spsr &= ~PSR_C; + break; + case ERESTART: + /* + * Reconstruct the pc to point at the swi. + */ + if ((frame->tf_spsr & PSR_T) != 0) + frame->tf_elr -= 2; //THUMB_INSN_SIZE; + else + frame->tf_elr -= 4; //INSN_SIZE; + break; + case EJUSTRETURN: + /* nothing to do */ + break; + default: + frame->tf_x[0] = error; + frame->tf_spsr |= PSR_C; + break; + } +} + +static void +freebsd32_setregs(struct thread *td, struct image_params *imgp, + uintptr_t stack) +{ + struct trapframe *tf = td->td_frame; + + memset(tf, 0, sizeof(struct trapframe)); + + /* + * We need to set x0 for init as it doesn't call + * cpu_set_syscall_retval to copy the value. We also + * need to set td_retval for the cases where we do. + */ + tf->tf_x[0] = stack; + /* SP_usr is mapped to x13 */ + tf->tf_x[13] = stack; + /* LR_usr is mapped to x14 */ + tf->tf_x[14] = imgp->entry_addr; + tf->tf_elr = imgp->entry_addr; + tf->tf_spsr = PSR_M_32; +} + +void +elf32_dump_thread(struct thread *td, void *dst, size_t *off) +{ + /* XXX: VFP */ +} diff --git a/sys/arm64/arm64/elf_machdep.c b/sys/arm64/arm64/elf_machdep.c new file mode 100644 index 000000000000..392cdfaee246 --- /dev/null +++ b/sys/arm64/arm64/elf_machdep.c @@ -0,0 +1,284 @@ +/*- + * Copyright (c) 2014, 2015 The FreeBSD Foundation. + * Copyright (c) 2014 Andrew Turner. + * All rights reserved. + * + * This software was developed by Andrew Turner under + * sponsorship from the FreeBSD Foundation. + * + * Portions of this software were developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/systm.h> +#include <sys/exec.h> +#include <sys/imgact.h> +#include <sys/linker.h> +#include <sys/proc.h> +#include <sys/sysent.h> +#include <sys/imgact_elf.h> +#include <sys/syscall.h> +#include <sys/signalvar.h> +#include <sys/vnode.h> + +#include <vm/vm.h> +#include <vm/vm_param.h> + +#include <machine/elf.h> +#include <machine/md_var.h> + +#include "linker_if.h" + +u_long elf_hwcap; + +static struct sysentvec elf64_freebsd_sysvec = { + .sv_size = SYS_MAXSYSCALL, + .sv_table = sysent, + .sv_errsize = 0, + .sv_errtbl = NULL, + .sv_transtrap = NULL, + .sv_fixup = __elfN(freebsd_fixup), + .sv_sendsig = sendsig, + .sv_sigcode = sigcode, + .sv_szsigcode = &szsigcode, + .sv_name = "FreeBSD ELF64", + .sv_coredump = __elfN(coredump), + .sv_imgact_try = NULL, + .sv_minsigstksz = MINSIGSTKSZ, + .sv_minuser = VM_MIN_ADDRESS, + .sv_maxuser = VM_MAXUSER_ADDRESS, + .sv_usrstack = USRSTACK, + .sv_psstrings = PS_STRINGS, + .sv_stackprot = VM_PROT_READ | VM_PROT_WRITE, + .sv_copyout_auxargs = __elfN(freebsd_copyout_auxargs), + .sv_copyout_strings = exec_copyout_strings, + .sv_setregs = exec_setregs, + .sv_fixlimit = NULL, + .sv_maxssiz = NULL, + .sv_flags = SV_SHP | SV_TIMEKEEP | SV_ABI_FREEBSD | SV_LP64 | + SV_ASLR, + .sv_set_syscall_retval = cpu_set_syscall_retval, + .sv_fetch_syscall_args = cpu_fetch_syscall_args, + .sv_syscallnames = syscallnames, + .sv_shared_page_base = SHAREDPAGE, + .sv_shared_page_len = PAGE_SIZE, + .sv_schedtail = NULL, + .sv_thread_detach = NULL, + .sv_trap = NULL, + .sv_hwcap = &elf_hwcap, +}; +INIT_SYSENTVEC(elf64_sysvec, &elf64_freebsd_sysvec); + +static Elf64_Brandinfo freebsd_brand_info = { + .brand = ELFOSABI_FREEBSD, + .machine = EM_AARCH64, + .compat_3_brand = "FreeBSD", + .emul_path = NULL, + .interp_path = "/libexec/ld-elf.so.1", + .sysvec = &elf64_freebsd_sysvec, + .interp_newpath = NULL, + .brand_note = &elf64_freebsd_brandnote, + .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE +}; + +SYSINIT(elf64, SI_SUB_EXEC, SI_ORDER_FIRST, + (sysinit_cfunc_t)elf64_insert_brand_entry, &freebsd_brand_info); + +void +elf64_dump_thread(struct thread *td __unused, void *dst __unused, + size_t *off __unused) +{ + +} + +bool +elf_is_ifunc_reloc(Elf_Size r_info __unused) +{ + + return (ELF_R_TYPE(r_info) == R_AARCH64_IRELATIVE); +} + +static int +reloc_instr_imm(Elf32_Addr *where, Elf_Addr val, u_int msb, u_int lsb) +{ + + /* Check bounds: upper bits must be all ones or all zeros. */ + if ((uint64_t)((int64_t)val >> (msb + 1)) + 1 > 1) + return (-1); + val >>= lsb; + val &= (1 << (msb - lsb + 1)) - 1; + *where |= (Elf32_Addr)val; + return (0); +} + +/* + * Process a relocation. Support for some static relocations is required + * in order for the -zifunc-noplt optimization to work. + */ +static int +elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data, + int type, int flags, elf_lookup_fn lookup) +{ +#define ARM64_ELF_RELOC_LOCAL (1 << 0) +#define ARM64_ELF_RELOC_LATE_IFUNC (1 << 1) + Elf_Addr *where, addr, addend, val; + Elf_Word rtype, symidx; + const Elf_Rel *rel; + const Elf_Rela *rela; + int error; + + switch (type) { + case ELF_RELOC_REL: + rel = (const Elf_Rel *)data; + where = (Elf_Addr *) (relocbase + rel->r_offset); + addend = *where; + rtype = ELF_R_TYPE(rel->r_info); + symidx = ELF_R_SYM(rel->r_info); + break; + case ELF_RELOC_RELA: + rela = (const Elf_Rela *)data; + where = (Elf_Addr *) (relocbase + rela->r_offset); + addend = rela->r_addend; + rtype = ELF_R_TYPE(rela->r_info); + symidx = ELF_R_SYM(rela->r_info); + break; + default: + panic("unknown reloc type %d\n", type); + } + + if ((flags & ARM64_ELF_RELOC_LATE_IFUNC) != 0) { + KASSERT(type == ELF_RELOC_RELA, + ("Only RELA ifunc relocations are supported")); + if (rtype != R_AARCH64_IRELATIVE) + return (0); + } + + if ((flags & ARM64_ELF_RELOC_LOCAL) != 0) { + if (rtype == R_AARCH64_RELATIVE) + *where = elf_relocaddr(lf, relocbase + addend); + return (0); + } + + error = 0; + switch (rtype) { + case R_AARCH64_NONE: + case R_AARCH64_RELATIVE: + break; + case R_AARCH64_TSTBR14: + error = lookup(lf, symidx, 1, &addr); + if (error != 0) + return (-1); + error = reloc_instr_imm((Elf32_Addr *)where, + addr + addend - (Elf_Addr)where, 15, 2); + break; + case R_AARCH64_CONDBR19: + error = lookup(lf, symidx, 1, &addr); + if (error != 0) + return (-1); + error = reloc_instr_imm((Elf32_Addr *)where, + addr + addend - (Elf_Addr)where, 20, 2); + break; + case R_AARCH64_JUMP26: + case R_AARCH64_CALL26: + error = lookup(lf, symidx, 1, &addr); + if (error != 0) + return (-1); + error = reloc_instr_imm((Elf32_Addr *)where, + addr + addend - (Elf_Addr)where, 27, 2); + break; + case R_AARCH64_ABS64: + case R_AARCH64_GLOB_DAT: + case R_AARCH64_JUMP_SLOT: + error = lookup(lf, symidx, 1, &addr); + if (error != 0) + return (-1); + *where = addr + addend; + break; + case R_AARCH64_IRELATIVE: + addr = relocbase + addend; + val = ((Elf64_Addr (*)(void))addr)(); + if (*where != val) + *where = val; + break; + default: + printf("kldload: unexpected relocation type %d\n", rtype); + return (-1); + } + return (error); +} + +int +elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data, + int type, elf_lookup_fn lookup) +{ + + return (elf_reloc_internal(lf, relocbase, data, type, + ARM64_ELF_RELOC_LOCAL, lookup)); +} + +/* Process one elf relocation with addend. */ +int +elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, + elf_lookup_fn lookup) +{ + + return (elf_reloc_internal(lf, relocbase, data, type, 0, lookup)); +} + +int +elf_reloc_late(linker_file_t lf, Elf_Addr relocbase, const void *data, + int type, elf_lookup_fn lookup) +{ + + return (elf_reloc_internal(lf, relocbase, data, type, + ARM64_ELF_RELOC_LATE_IFUNC, lookup)); +} + +int +elf_cpu_load_file(linker_file_t lf) +{ + + if (lf->id != 1) + cpu_icache_sync_range((vm_offset_t)lf->address, lf->size); + return (0); +} + +int +elf_cpu_unload_file(linker_file_t lf __unused) +{ + + return (0); +} + +int +elf_cpu_parse_dynamic(caddr_t loadbase __unused, Elf_Dyn *dynamic __unused) +{ + + return (0); +} diff --git a/sys/arm64/arm64/exception.S b/sys/arm64/arm64/exception.S new file mode 100644 index 000000000000..123f73b49734 --- /dev/null +++ b/sys/arm64/arm64/exception.S @@ -0,0 +1,255 @@ +/*- + * Copyright (c) 2014 Andrew Turner + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include <machine/asm.h> +#include <machine/armreg.h> +__FBSDID("$FreeBSD$"); + +#include "assym.inc" + + .text + +.macro save_registers el +.if \el == 1 + mov x18, sp + sub sp, sp, #128 +.endif + sub sp, sp, #(TF_SIZE + 16) + stp x29, x30, [sp, #(TF_SIZE)] + stp x28, x29, [sp, #(TF_X + 28 * 8)] + stp x26, x27, [sp, #(TF_X + 26 * 8)] + stp x24, x25, [sp, #(TF_X + 24 * 8)] + stp x22, x23, [sp, #(TF_X + 22 * 8)] + stp x20, x21, [sp, #(TF_X + 20 * 8)] + stp x18, x19, [sp, #(TF_X + 18 * 8)] + stp x16, x17, [sp, #(TF_X + 16 * 8)] + stp x14, x15, [sp, #(TF_X + 14 * 8)] + stp x12, x13, [sp, #(TF_X + 12 * 8)] + stp x10, x11, [sp, #(TF_X + 10 * 8)] + stp x8, x9, [sp, #(TF_X + 8 * 8)] + stp x6, x7, [sp, #(TF_X + 6 * 8)] + stp x4, x5, [sp, #(TF_X + 4 * 8)] + stp x2, x3, [sp, #(TF_X + 2 * 8)] + stp x0, x1, [sp, #(TF_X + 0 * 8)] + mrs x10, elr_el1 + mrs x11, spsr_el1 + mrs x12, esr_el1 +.if \el == 0 + mrs x18, sp_el0 +.endif + str x10, [sp, #(TF_ELR)] + stp w11, w12, [sp, #(TF_SPSR)] + stp x18, lr, [sp, #(TF_SP)] + mrs x18, tpidr_el1 + add x29, sp, #(TF_SIZE) +.if \el == 0 + /* Apply the SSBD (CVE-2018-3639) workaround if needed */ + ldr x1, [x18, #PC_SSBD] + cbz x1, 1f + mov w0, #1 + blr x1 +1: + + ldr x0, [x18, #(PC_CURTHREAD)] + bl dbg_monitor_enter +.endif + msr daifclr, #8 /* Enable the debug exception */ +.endm + +.macro restore_registers el +.if \el == 1 + /* + * Disable interrupts and debug exceptions, x18 may change in the + * interrupt exception handler. For EL0 exceptions, do_ast already + * did this. + */ + msr daifset, #10 +.endif +.if \el == 0 + ldr x0, [x18, #PC_CURTHREAD] + mov x1, sp + bl dbg_monitor_exit + + /* Remove the SSBD (CVE-2018-3639) workaround if needed */ + ldr x1, [x18, #PC_SSBD] + cbz x1, 1f + mov w0, #0 + blr x1 +1: +.endif + ldp x18, lr, [sp, #(TF_SP)] + ldp x10, x11, [sp, #(TF_ELR)] +.if \el == 0 + msr sp_el0, x18 +.endif + msr spsr_el1, x11 + msr elr_el1, x10 + ldp x0, x1, [sp, #(TF_X + 0 * 8)] + ldp x2, x3, [sp, #(TF_X + 2 * 8)] + ldp x4, x5, [sp, #(TF_X + 4 * 8)] + ldp x6, x7, [sp, #(TF_X + 6 * 8)] + ldp x8, x9, [sp, #(TF_X + 8 * 8)] + ldp x10, x11, [sp, #(TF_X + 10 * 8)] + ldp x12, x13, [sp, #(TF_X + 12 * 8)] + ldp x14, x15, [sp, #(TF_X + 14 * 8)] + ldp x16, x17, [sp, #(TF_X + 16 * 8)] +.if \el == 0 + /* + * We only restore the callee saved registers when returning to + * userland as they may have been updated by a system call or signal. + */ + ldp x18, x19, [sp, #(TF_X + 18 * 8)] + ldp x20, x21, [sp, #(TF_X + 20 * 8)] + ldp x22, x23, [sp, #(TF_X + 22 * 8)] + ldp x24, x25, [sp, #(TF_X + 24 * 8)] + ldp x26, x27, [sp, #(TF_X + 26 * 8)] + ldp x28, x29, [sp, #(TF_X + 28 * 8)] +.else + ldr x29, [sp, #(TF_X + 29 * 8)] +.endif +.if \el == 0 + add sp, sp, #(TF_SIZE + 16) +.else + mov sp, x18 + mrs x18, tpidr_el1 +.endif +.endm + +.macro do_ast + mrs x19, daif + /* Make sure the IRQs are enabled before calling ast() */ + bic x19, x19, #PSR_I +1: + /* Disable interrupts */ + msr daifset, #10 + + /* Read the current thread flags */ + ldr x1, [x18, #PC_CURTHREAD] /* Load curthread */ + ldr x2, [x1, #TD_FLAGS] + + /* Check if we have either bits set */ + mov x3, #((TDF_ASTPENDING|TDF_NEEDRESCHED) >> 8) + lsl x3, x3, #8 + and x2, x2, x3 + cbz x2, 2f + + /* Restore interrupts */ + msr daif, x19 + + /* handle the ast */ + mov x0, sp + bl _C_LABEL(ast) + + /* Re-check for new ast scheduled */ + b 1b +2: +.endm + +ENTRY(handle_el1h_sync) + save_registers 1 + ldr x0, [x18, #PC_CURTHREAD] + mov x1, sp + bl do_el1h_sync + restore_registers 1 + ERET +END(handle_el1h_sync) + +ENTRY(handle_el1h_irq) + save_registers 1 + mov x0, sp + bl intr_irq_handler + restore_registers 1 + ERET +END(handle_el1h_irq) + +ENTRY(handle_el0_sync) + save_registers 0 + ldr x0, [x18, #PC_CURTHREAD] + mov x1, sp + str x1, [x0, #TD_FRAME] + bl do_el0_sync + do_ast + restore_registers 0 + ERET +END(handle_el0_sync) + +ENTRY(handle_el0_irq) + save_registers 0 + mov x0, sp + bl intr_irq_handler + do_ast + restore_registers 0 + ERET +END(handle_el0_irq) + +ENTRY(handle_serror) + save_registers 0 + mov x0, sp +1: bl do_serror + b 1b +END(handle_serror) + +ENTRY(handle_empty_exception) + save_registers 0 + mov x0, sp +1: bl unhandled_exception + b 1b +END(handle_unhandled_exception) + +.macro vempty + .align 7 + b handle_empty_exception +.endm + +.macro vector name + .align 7 + b handle_\name +.endm + + .align 11 + .globl exception_vectors +exception_vectors: + vempty /* Synchronous EL1t */ + vempty /* IRQ EL1t */ + vempty /* FIQ EL1t */ + vempty /* Error EL1t */ + + vector el1h_sync /* Synchronous EL1h */ + vector el1h_irq /* IRQ EL1h */ + vempty /* FIQ EL1h */ + vector serror /* Error EL1h */ + + vector el0_sync /* Synchronous 64-bit EL0 */ + vector el0_irq /* IRQ 64-bit EL0 */ + vempty /* FIQ 64-bit EL0 */ + vector serror /* Error 64-bit EL0 */ + + vector el0_sync /* Synchronous 32-bit EL0 */ + vector el0_irq /* IRQ 32-bit EL0 */ + vempty /* FIQ 32-bit EL0 */ + vector serror /* Error 32-bit EL0 */ + diff --git a/sys/arm64/arm64/freebsd32_machdep.c b/sys/arm64/arm64/freebsd32_machdep.c new file mode 100644 index 000000000000..b1e070feb4f6 --- /dev/null +++ b/sys/arm64/arm64/freebsd32_machdep.c @@ -0,0 +1,438 @@ +/*- + * Copyright (c) 2018 Olivier Houchard + * Copyright (c) 2017 Nuxi, https://nuxi.nl/ + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/types.h> +#include <sys/proc.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/syscallsubr.h> +#include <sys/ktr.h> +#include <sys/sysent.h> +#include <sys/sysproto.h> +#include <machine/armreg.h> +#ifdef VFP +#include <machine/vfp.h> +#endif +#include <compat/freebsd32/freebsd32_proto.h> +#include <compat/freebsd32/freebsd32_signal.h> + +extern void freebsd32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); + +/* + * The first two fields of a ucontext_t are the signal mask and the machine + * context. The next field is uc_link; we want to avoid destroying the link + * when copying out contexts. + */ +#define UC32_COPY_SIZE offsetof(ucontext32_t, uc_link) + +#ifdef VFP +static void get_fpcontext32(struct thread *td, mcontext32_vfp_t *); +#endif + +/* + * Stubs for machine dependent 32-bits system calls. + */ + +int +freebsd32_sysarch(struct thread *td, struct freebsd32_sysarch_args *uap) +{ + int error; + +#define ARM_SYNC_ICACHE 0 +#define ARM_DRAIN_WRITEBUF 1 +#define ARM_SET_TP 2 +#define ARM_GET_TP 3 +#define ARM_GET_VFPSTATE 4 + + switch(uap->op) { + case ARM_SET_TP: + WRITE_SPECIALREG(tpidr_el0, uap->parms); + WRITE_SPECIALREG(tpidrro_el0, uap->parms); + return 0; + case ARM_SYNC_ICACHE: + { + struct { + uint32_t addr; + uint32_t size; + } args; + + if ((error = copyin(uap->parms, &args, sizeof(args))) != 0) + return (error); + if ((uint64_t)args.addr + (uint64_t)args.size > 0xffffffff) + return (EINVAL); + cpu_icache_sync_range_checked(args.addr, args.size); + return 0; + } + case ARM_GET_VFPSTATE: + { + mcontext32_vfp_t mcontext_vfp; + + struct { + uint32_t mc_vfp_size; + uint32_t mc_vfp; + } args; + if ((error = copyin(uap->parms, &args, sizeof(args))) != 0) + return (error); + if (args.mc_vfp_size != sizeof(mcontext_vfp)) + return (EINVAL); +#ifdef VFP + get_fpcontext32(td, &mcontext_vfp); +#else + bzero(&mcontext_vfp, sizeof(mcontext_vfp)); +#endif + error = copyout(&mcontext_vfp, + (void *)(uintptr_t)args.mc_vfp, + sizeof(mcontext_vfp)); + return error; + } + } + + return (EINVAL); +} + +#ifdef VFP +static void +get_fpcontext32(struct thread *td, mcontext32_vfp_t *mcp) +{ + struct pcb *curpcb; + int i; + + critical_enter(); + curpcb = curthread->td_pcb; + + if ((curpcb->pcb_fpflags & PCB_FP_STARTED) != 0) { + /* + * If we have just been running VFP instructions we will + * need to save the state to memcpy it below. + */ + vfp_save_state(td, curpcb); + + KASSERT(curpcb->pcb_fpusaved == &curpcb->pcb_fpustate, + ("Called get_fpcontext while the kernel is using the VFP")); + KASSERT((curpcb->pcb_fpflags & ~PCB_FP_USERMASK) == 0, + ("Non-userspace FPU flags set in get_fpcontext")); + for (i = 0; i < 32; i++) + mcp->mcv_reg[i] = (uint64_t)curpcb->pcb_fpustate.vfp_regs[i]; + mcp->mcv_fpscr = VFP_FPSCR_FROM_SRCR(curpcb->pcb_fpustate.vfp_fpcr, + curpcb->pcb_fpustate.vfp_fpsr); + } + critical_exit(); +} + +static void +set_fpcontext32(struct thread *td, mcontext32_vfp_t *mcp) +{ + struct pcb *pcb; + int i; + + critical_enter(); + pcb = td->td_pcb; + if (td == curthread) + vfp_discard(td); + for (i = 0; i < 32; i++) + pcb->pcb_fpustate.vfp_regs[i] = mcp->mcv_reg[i]; + pcb->pcb_fpustate.vfp_fpsr = VFP_FPSR_FROM_FPSCR(mcp->mcv_fpscr); + pcb->pcb_fpustate.vfp_fpcr = VFP_FPSR_FROM_FPSCR(mcp->mcv_fpscr); + critical_exit(); +} +#endif +static void +get_mcontext32(struct thread *td, mcontext32_t *mcp, int flags) +{ + struct pcb *pcb; + struct trapframe *tf; + int i; + + pcb = td->td_pcb; + tf = td->td_frame; + + if ((flags & GET_MC_CLEAR_RET) != 0) { + mcp->mc_gregset[0] = 0; + mcp->mc_gregset[16] = tf->tf_spsr & ~PSR_C; + } else { + mcp->mc_gregset[0] = tf->tf_x[0]; + mcp->mc_gregset[16] = tf->tf_spsr; + } + for (i = 1; i < 15; i++) + mcp->mc_gregset[i] = tf->tf_x[i]; + mcp->mc_gregset[15] = tf->tf_elr; + + mcp->mc_vfp_size = 0; + mcp->mc_vfp_ptr = 0; + + memset(mcp->mc_spare, 0, sizeof(mcp->mc_spare)); +} + +static int +set_mcontext32(struct thread *td, mcontext32_t *mcp) +{ + struct trapframe *tf; + mcontext32_vfp_t mc_vfp; + int i; + + tf = td->td_frame; + + for (i = 0; i < 15; i++) + tf->tf_x[i] = mcp->mc_gregset[i]; + tf->tf_elr = mcp->mc_gregset[15]; + tf->tf_spsr = mcp->mc_gregset[16]; +#ifdef VFP + if (mcp->mc_vfp_size == sizeof(mc_vfp) && mcp->mc_vfp_ptr != 0) { + if (copyin((void *)(uintptr_t)mcp->mc_vfp_ptr, &mc_vfp, + sizeof(mc_vfp)) != 0) + return (EFAULT); + set_fpcontext32(td, &mc_vfp); + } +#endif + + return (0); +} + +#define UC_COPY_SIZE offsetof(ucontext32_t, uc_link) + +int +freebsd32_getcontext(struct thread *td, struct freebsd32_getcontext_args *uap) +{ + ucontext32_t uc; + int ret; + + if (uap->ucp == NULL) + ret = EINVAL; + else { + memset(&uc, 0, sizeof(uc)); + get_mcontext32(td, &uc.uc_mcontext, GET_MC_CLEAR_RET); + PROC_LOCK(td->td_proc); + uc.uc_sigmask = td->td_sigmask; + PROC_UNLOCK(td->td_proc); + ret = copyout(&uc, uap->ucp, UC_COPY_SIZE); + } + return (ret); +} + +int +freebsd32_setcontext(struct thread *td, struct freebsd32_setcontext_args *uap) +{ + ucontext32_t uc; + int ret; + + if (uap->ucp == NULL) + ret = EINVAL; + else { + ret = copyin(uap->ucp, &uc, UC_COPY_SIZE); + if (ret == 0) { + ret = set_mcontext32(td, &uc.uc_mcontext); + if (ret == 0) + kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, + NULL, 0); + } + } + return (ret); +} + +int +freebsd32_sigreturn(struct thread *td, struct freebsd32_sigreturn_args *uap) +{ + ucontext32_t uc; + int error; + + if (uap == NULL) + return (EFAULT); + if (copyin(uap->sigcntxp, &uc, sizeof(uc))) + return (EFAULT); + error = set_mcontext32(td, &uc.uc_mcontext); + if (error != 0) + return (0); + + /* Restore signal mask. */ + kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); + + return (EJUSTRETURN); + +} + +int +freebsd32_swapcontext(struct thread *td, struct freebsd32_swapcontext_args *uap) +{ + ucontext32_t uc; + int ret; + + if (uap->oucp == NULL || uap->ucp == NULL) + ret = EINVAL; + else { + bzero(&uc, sizeof(uc)); + get_mcontext32(td, &uc.uc_mcontext, GET_MC_CLEAR_RET); + PROC_LOCK(td->td_proc); + uc.uc_sigmask = td->td_sigmask; + PROC_UNLOCK(td->td_proc); + ret = copyout(&uc, uap->oucp, UC32_COPY_SIZE); + if (ret == 0) { + ret = copyin(uap->ucp, &uc, UC32_COPY_SIZE); + if (ret == 0) { + ret = set_mcontext32(td, &uc.uc_mcontext); + kern_sigprocmask(td, SIG_SETMASK, + &uc.uc_sigmask, NULL, 0); + } + } + } + return (ret); +} + +void +freebsd32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) +{ + struct thread *td; + struct proc *p; + struct trapframe *tf; + struct sigframe32 *fp, frame; + struct sigacts *psp; + struct siginfo32 siginfo; + struct sysentvec *sysent; + int onstack; + int sig; + int code; + + siginfo_to_siginfo32(&ksi->ksi_info, &siginfo); + td = curthread; + p = td->td_proc; + PROC_LOCK_ASSERT(p, MA_OWNED); + sig = ksi->ksi_signo; + code = ksi->ksi_code; + psp = p->p_sigacts; + mtx_assert(&psp->ps_mtx, MA_OWNED); + tf = td->td_frame; + onstack = sigonstack(tf->tf_x[13]); + + CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm, + catcher, sig); + + /* Allocate and validate space for the signal handler context. */ + if ((td->td_pflags & TDP_ALTSTACK) != 0 && !(onstack) && + SIGISMEMBER(psp->ps_sigonstack, sig)) { + fp = (struct sigframe32 *)((uintptr_t)td->td_sigstk.ss_sp + + td->td_sigstk.ss_size); +#if defined(COMPAT_43) + td->td_sigstk.ss_flags |= SS_ONSTACK; +#endif + } else + fp = (struct sigframe32 *)td->td_frame->tf_x[13]; + + /* make room on the stack */ + fp--; + + /* make the stack aligned */ + fp = (struct sigframe32 *)((unsigned long)(fp) &~ (8 - 1)); + /* Populate the siginfo frame. */ + get_mcontext32(td, &frame.sf_uc.uc_mcontext, 0); +#ifdef VFP + get_fpcontext32(td, &frame.sf_vfp); + frame.sf_uc.uc_mcontext.mc_vfp_size = sizeof(fp->sf_vfp); + frame.sf_uc.uc_mcontext.mc_vfp_ptr = (uint32_t)(uintptr_t)&fp->sf_vfp; +#else + frame.sf_uc.uc_mcontext.mc_vfp_size = 0; + frame.sf_uc.uc_mcontext.mc_vfp_ptr = (uint32_t)NULL; +#endif + frame.sf_si = siginfo; + frame.sf_uc.uc_sigmask = *mask; + frame.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK ) + ? ((onstack) ? SS_ONSTACK : 0) : SS_DISABLE; + frame.sf_uc.uc_stack.ss_sp = (uintptr_t)td->td_sigstk.ss_sp; + frame.sf_uc.uc_stack.ss_size = td->td_sigstk.ss_size; + + mtx_unlock(&psp->ps_mtx); + PROC_UNLOCK(td->td_proc); + + /* Copy the sigframe out to the user's stack. */ + if (copyout(&frame, fp, sizeof(*fp)) != 0) { + /* Process has trashed its stack. Kill it. */ + CTR2(KTR_SIG, "sendsig: sigexit td=%p fp=%p", td, fp); + PROC_LOCK(p); + sigexit(td, SIGILL); + } + + /* + * Build context to run handler in. We invoke the handler + * directly, only returning via the trampoline. Note the + * trampoline version numbers are coordinated with machine- + * dependent code in libc. + */ + + tf->tf_x[0] = sig; + tf->tf_x[1] = (register_t)&fp->sf_si; + tf->tf_x[2] = (register_t)&fp->sf_uc; + + /* the trampoline uses r5 as the uc address */ + tf->tf_x[5] = (register_t)&fp->sf_uc; + tf->tf_elr = (register_t)catcher; + tf->tf_x[13] = (register_t)fp; + sysent = p->p_sysent; + if (sysent->sv_sigcode_base != 0) + tf->tf_x[14] = (register_t)sysent->sv_sigcode_base; + else + tf->tf_x[14] = (register_t)(sysent->sv_psstrings - + *(sysent->sv_szsigcode)); + /* Set the mode to enter in the signal handler */ + if ((register_t)catcher & 1) + tf->tf_spsr |= PSR_T; + else + tf->tf_spsr &= ~PSR_T; + + CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->tf_x[14], + tf->tf_x[13]); + + PROC_LOCK(p); + mtx_lock(&psp->ps_mtx); + +} + +#ifdef COMPAT_43 +/* + * COMPAT_FREEBSD32 assumes we have this system call when COMPAT_43 is defined. + * FreeBSD/arm provies a similar getpagesize() syscall. + */ +#define ARM32_PAGE_SIZE 4096 +int +ofreebsd32_getpagesize(struct thread *td, + struct ofreebsd32_getpagesize_args *uap) +{ + + td->td_retval[0] = ARM32_PAGE_SIZE; + return (0); +} + +/* + * Mirror the osigreturn definition in kern_sig.c for !i386 platforms. This + * mirrors what's connected to the FreeBSD/arm syscall. + */ +int +ofreebsd32_sigreturn(struct thread *td, struct ofreebsd32_sigreturn_args *uap) +{ + + return (nosys(td, (struct nosys_args *)uap)); +} +#endif diff --git a/sys/arm64/arm64/genassym.c b/sys/arm64/arm64/genassym.c new file mode 100644 index 000000000000..3f664d898916 --- /dev/null +++ b/sys/arm64/arm64/genassym.c @@ -0,0 +1,79 @@ +/*- + * Copyright (c) 2004 Olivier Houchard + * Copyright (c) 2014 Andrew Turner + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); +#include <sys/param.h> +#include <sys/assym.h> +#include <sys/pcpu.h> +#include <sys/proc.h> + +#include <machine/frame.h> +#include <machine/machdep.h> +#include <machine/pcb.h> + +/* Sizeof arm64_bootparams, rounded to keep stack alignment */ +ASSYM(BOOTPARAMS_SIZE, roundup2(sizeof(struct arm64_bootparams), + STACKALIGNBYTES + 1)); +ASSYM(BP_MODULEP, offsetof(struct arm64_bootparams, modulep)); +ASSYM(BP_KERN_L1PT, offsetof(struct arm64_bootparams, kern_l1pt)); +ASSYM(BP_KERN_DELTA, offsetof(struct arm64_bootparams, kern_delta)); +ASSYM(BP_KERN_STACK, offsetof(struct arm64_bootparams, kern_stack)); +ASSYM(BP_KERN_L0PT, offsetof(struct arm64_bootparams, kern_l0pt)); +ASSYM(BP_BOOT_EL, offsetof(struct arm64_bootparams, boot_el)); + +ASSYM(TDF_ASTPENDING, TDF_ASTPENDING); +ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED); + +ASSYM(PCPU_SIZE, sizeof(struct pcpu)); +ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb)); +ASSYM(PC_CURTHREAD, offsetof(struct pcpu, pc_curthread)); +ASSYM(PC_SSBD, offsetof(struct pcpu, pc_ssbd)); + +/* Size of pcb, rounded to keep stack alignment */ +ASSYM(PCB_SIZE, roundup2(sizeof(struct pcb), STACKALIGNBYTES + 1)); +ASSYM(PCB_SINGLE_STEP_SHIFT, PCB_SINGLE_STEP_SHIFT); +ASSYM(PCB_REGS, offsetof(struct pcb, pcb_x)); +ASSYM(PCB_SP, offsetof(struct pcb, pcb_sp)); +ASSYM(PCB_TPIDRRO, offsetof(struct pcb, pcb_tpidrro_el0)); +ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault)); +ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags)); + +ASSYM(SF_UC, offsetof(struct sigframe, sf_uc)); + +ASSYM(TD_PROC, offsetof(struct thread, td_proc)); +ASSYM(TD_PCB, offsetof(struct thread, td_pcb)); +ASSYM(TD_FLAGS, offsetof(struct thread, td_flags)); +ASSYM(TD_FRAME, offsetof(struct thread, td_frame)); +ASSYM(TD_LOCK, offsetof(struct thread, td_lock)); + +ASSYM(TF_SIZE, sizeof(struct trapframe)); +ASSYM(TF_SP, offsetof(struct trapframe, tf_sp)); +ASSYM(TF_ELR, offsetof(struct trapframe, tf_elr)); +ASSYM(TF_SPSR, offsetof(struct trapframe, tf_spsr)); +ASSYM(TF_X, offsetof(struct trapframe, tf_x)); diff --git a/sys/arm64/arm64/gic_v3.c b/sys/arm64/arm64/gic_v3.c new file mode 100644 index 000000000000..a83ef576e30e --- /dev/null +++ b/sys/arm64/arm64/gic_v3.c @@ -0,0 +1,1271 @@ +/*- + * Copyright (c) 2015-2016 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Andrew Turner under + * the sponsorship of the FreeBSD Foundation. + * + * This software was developed by Semihalf under + * the sponsorship of the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "opt_acpi.h" +#include "opt_platform.h" + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/bitstring.h> +#include <sys/bus.h> +#include <sys/kernel.h> +#include <sys/ktr.h> +#include <sys/malloc.h> +#include <sys/module.h> +#include <sys/rman.h> +#include <sys/pcpu.h> +#include <sys/proc.h> +#include <sys/cpuset.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/smp.h> + +#include <vm/vm.h> +#include <vm/pmap.h> + +#include <machine/bus.h> +#include <machine/cpu.h> +#include <machine/intr.h> + +#ifdef FDT +#include <dev/fdt/fdt_intr.h> +#include <dev/ofw/ofw_bus_subr.h> +#endif + +#ifdef DEV_ACPI +#include <contrib/dev/acpica/include/acpi.h> +#include <dev/acpica/acpivar.h> +#endif + +#include "pic_if.h" + +#include <arm/arm/gic_common.h> +#include "gic_v3_reg.h" +#include "gic_v3_var.h" + +static bus_get_domain_t gic_v3_get_domain; +static bus_read_ivar_t gic_v3_read_ivar; + +static pic_disable_intr_t gic_v3_disable_intr; +static pic_enable_intr_t gic_v3_enable_intr; +static pic_map_intr_t gic_v3_map_intr; +static pic_setup_intr_t gic_v3_setup_intr; +static pic_teardown_intr_t gic_v3_teardown_intr; +static pic_post_filter_t gic_v3_post_filter; +static pic_post_ithread_t gic_v3_post_ithread; +static pic_pre_ithread_t gic_v3_pre_ithread; +static pic_bind_intr_t gic_v3_bind_intr; +#ifdef SMP +static pic_init_secondary_t gic_v3_init_secondary; +static pic_ipi_send_t gic_v3_ipi_send; +static pic_ipi_setup_t gic_v3_ipi_setup; +#endif + +static u_int gic_irq_cpu; +#ifdef SMP +static u_int sgi_to_ipi[GIC_LAST_SGI - GIC_FIRST_SGI + 1]; +static u_int sgi_first_unused = GIC_FIRST_SGI; +#endif + +static device_method_t gic_v3_methods[] = { + /* Device interface */ + DEVMETHOD(device_detach, gic_v3_detach), + + /* Bus interface */ + DEVMETHOD(bus_get_domain, gic_v3_get_domain), + DEVMETHOD(bus_read_ivar, gic_v3_read_ivar), + + /* Interrupt controller interface */ + DEVMETHOD(pic_disable_intr, gic_v3_disable_intr), + DEVMETHOD(pic_enable_intr, gic_v3_enable_intr), + DEVMETHOD(pic_map_intr, gic_v3_map_intr), + DEVMETHOD(pic_setup_intr, gic_v3_setup_intr), + DEVMETHOD(pic_teardown_intr, gic_v3_teardown_intr), + DEVMETHOD(pic_post_filter, gic_v3_post_filter), + DEVMETHOD(pic_post_ithread, gic_v3_post_ithread), + DEVMETHOD(pic_pre_ithread, gic_v3_pre_ithread), +#ifdef SMP + DEVMETHOD(pic_bind_intr, gic_v3_bind_intr), + DEVMETHOD(pic_init_secondary, gic_v3_init_secondary), + DEVMETHOD(pic_ipi_send, gic_v3_ipi_send), + DEVMETHOD(pic_ipi_setup, gic_v3_ipi_setup), +#endif + + /* End */ + DEVMETHOD_END +}; + +DEFINE_CLASS_0(gic, gic_v3_driver, gic_v3_methods, + sizeof(struct gic_v3_softc)); + +/* + * Driver-specific definitions. + */ +MALLOC_DEFINE(M_GIC_V3, "GICv3", GIC_V3_DEVSTR); + +/* + * Helper functions and definitions. + */ +/* Destination registers, either Distributor or Re-Distributor */ +enum gic_v3_xdist { + DIST = 0, + REDIST, +}; + +struct gic_v3_irqsrc { + struct intr_irqsrc gi_isrc; + uint32_t gi_irq; + enum intr_polarity gi_pol; + enum intr_trigger gi_trig; +}; + +/* Helper routines starting with gic_v3_ */ +static int gic_v3_dist_init(struct gic_v3_softc *); +static int gic_v3_redist_alloc(struct gic_v3_softc *); +static int gic_v3_redist_find(struct gic_v3_softc *); +static int gic_v3_redist_init(struct gic_v3_softc *); +static int gic_v3_cpu_init(struct gic_v3_softc *); +static void gic_v3_wait_for_rwp(struct gic_v3_softc *, enum gic_v3_xdist); + +/* A sequence of init functions for primary (boot) CPU */ +typedef int (*gic_v3_initseq_t) (struct gic_v3_softc *); +/* Primary CPU initialization sequence */ +static gic_v3_initseq_t gic_v3_primary_init[] = { + gic_v3_dist_init, + gic_v3_redist_alloc, + gic_v3_redist_init, + gic_v3_cpu_init, + NULL +}; + +#ifdef SMP +/* Secondary CPU initialization sequence */ +static gic_v3_initseq_t gic_v3_secondary_init[] = { + gic_v3_redist_init, + gic_v3_cpu_init, + NULL +}; +#endif + +uint32_t +gic_r_read_4(device_t dev, bus_size_t offset) +{ + struct gic_v3_softc *sc; + struct resource *rdist; + + sc = device_get_softc(dev); + rdist = &sc->gic_redists.pcpu[PCPU_GET(cpuid)]->res; + return (bus_read_4(rdist, offset)); +} + +uint64_t +gic_r_read_8(device_t dev, bus_size_t offset) +{ + struct gic_v3_softc *sc; + struct resource *rdist; + + sc = device_get_softc(dev); + rdist = &sc->gic_redists.pcpu[PCPU_GET(cpuid)]->res; + return (bus_read_8(rdist, offset)); +} + +void +gic_r_write_4(device_t dev, bus_size_t offset, uint32_t val) +{ + struct gic_v3_softc *sc; + struct resource *rdist; + + sc = device_get_softc(dev); + rdist = &sc->gic_redists.pcpu[PCPU_GET(cpuid)]->res; + bus_write_4(rdist, offset, val); +} + +void +gic_r_write_8(device_t dev, bus_size_t offset, uint64_t val) +{ + struct gic_v3_softc *sc; + struct resource *rdist; + + sc = device_get_softc(dev); + rdist = &sc->gic_redists.pcpu[PCPU_GET(cpuid)]->res; + bus_write_8(rdist, offset, val); +} + +/* + * Device interface. + */ +int +gic_v3_attach(device_t dev) +{ + struct gic_v3_softc *sc; + gic_v3_initseq_t *init_func; + uint32_t typer; + int rid; + int err; + size_t i; + u_int irq; + const char *name; + + sc = device_get_softc(dev); + sc->gic_registered = FALSE; + sc->dev = dev; + err = 0; + + /* Initialize mutex */ + mtx_init(&sc->gic_mtx, "GICv3 lock", NULL, MTX_SPIN); + + /* + * Allocate array of struct resource. + * One entry for Distributor and all remaining for Re-Distributor. + */ + sc->gic_res = malloc( + sizeof(*sc->gic_res) * (sc->gic_redists.nregions + 1), + M_GIC_V3, M_WAITOK); + + /* Now allocate corresponding resources */ + for (i = 0, rid = 0; i < (sc->gic_redists.nregions + 1); i++, rid++) { + sc->gic_res[rid] = bus_alloc_resource_any(dev, SYS_RES_MEMORY, + &rid, RF_ACTIVE); + if (sc->gic_res[rid] == NULL) + return (ENXIO); + } + + /* + * Distributor interface + */ + sc->gic_dist = sc->gic_res[0]; + + /* + * Re-Dristributor interface + */ + /* Allocate space under region descriptions */ + sc->gic_redists.regions = malloc( + sizeof(*sc->gic_redists.regions) * sc->gic_redists.nregions, + M_GIC_V3, M_WAITOK); + + /* Fill-up bus_space information for each region. */ + for (i = 0, rid = 1; i < sc->gic_redists.nregions; i++, rid++) + sc->gic_redists.regions[i] = sc->gic_res[rid]; + + /* Get the number of supported SPI interrupts */ + typer = gic_d_read(sc, 4, GICD_TYPER); + sc->gic_nirqs = GICD_TYPER_I_NUM(typer); + if (sc->gic_nirqs > GIC_I_NUM_MAX) + sc->gic_nirqs = GIC_I_NUM_MAX; + + sc->gic_irqs = malloc(sizeof(*sc->gic_irqs) * sc->gic_nirqs, + M_GIC_V3, M_WAITOK | M_ZERO); + name = device_get_nameunit(dev); + for (irq = 0; irq < sc->gic_nirqs; irq++) { + struct intr_irqsrc *isrc; + + sc->gic_irqs[irq].gi_irq = irq; + sc->gic_irqs[irq].gi_pol = INTR_POLARITY_CONFORM; + sc->gic_irqs[irq].gi_trig = INTR_TRIGGER_CONFORM; + + isrc = &sc->gic_irqs[irq].gi_isrc; + if (irq <= GIC_LAST_SGI) { + err = intr_isrc_register(isrc, sc->dev, + INTR_ISRCF_IPI, "%s,i%u", name, irq - GIC_FIRST_SGI); + } else if (irq <= GIC_LAST_PPI) { + err = intr_isrc_register(isrc, sc->dev, + INTR_ISRCF_PPI, "%s,p%u", name, irq - GIC_FIRST_PPI); + } else { + err = intr_isrc_register(isrc, sc->dev, 0, + "%s,s%u", name, irq - GIC_FIRST_SPI); + } + if (err != 0) { + /* XXX call intr_isrc_deregister() */ + free(sc->gic_irqs, M_DEVBUF); + return (err); + } + } + + /* + * Read the Peripheral ID2 register. This is an implementation + * defined register, but seems to be implemented in all GICv3 + * parts and Linux expects it to be there. + */ + sc->gic_pidr2 = gic_d_read(sc, 4, GICD_PIDR2); + + /* Get the number of supported interrupt identifier bits */ + sc->gic_idbits = GICD_TYPER_IDBITS(typer); + + if (bootverbose) { + device_printf(dev, "SPIs: %u, IDs: %u\n", + sc->gic_nirqs, (1 << sc->gic_idbits) - 1); + } + + /* Train init sequence for boot CPU */ + for (init_func = gic_v3_primary_init; *init_func != NULL; init_func++) { + err = (*init_func)(sc); + if (err != 0) + return (err); + } + + return (0); +} + +int +gic_v3_detach(device_t dev) +{ + struct gic_v3_softc *sc; + size_t i; + int rid; + + sc = device_get_softc(dev); + + if (device_is_attached(dev)) { + /* + * XXX: We should probably deregister PIC + */ + if (sc->gic_registered) + panic("Trying to detach registered PIC"); + } + for (rid = 0; rid < (sc->gic_redists.nregions + 1); rid++) + bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->gic_res[rid]); + + for (i = 0; i <= mp_maxid; i++) + free(sc->gic_redists.pcpu[i], M_GIC_V3); + + free(sc->gic_res, M_GIC_V3); + free(sc->gic_redists.regions, M_GIC_V3); + + return (0); +} + +static int +gic_v3_get_domain(device_t dev, device_t child, int *domain) +{ + struct gic_v3_devinfo *di; + + di = device_get_ivars(child); + if (di->gic_domain < 0) + return (ENOENT); + + *domain = di->gic_domain; + return (0); +} + +static int +gic_v3_read_ivar(device_t dev, device_t child, int which, uintptr_t *result) +{ + struct gic_v3_softc *sc; + + sc = device_get_softc(dev); + + switch (which) { + case GICV3_IVAR_NIRQS: + *result = (NIRQ - sc->gic_nirqs) / sc->gic_nchildren; + return (0); + case GICV3_IVAR_REDIST: + *result = (uintptr_t)sc->gic_redists.pcpu[PCPU_GET(cpuid)]; + return (0); + case GIC_IVAR_HW_REV: + KASSERT( + GICR_PIDR2_ARCH(sc->gic_pidr2) == GICR_PIDR2_ARCH_GICv3 || + GICR_PIDR2_ARCH(sc->gic_pidr2) == GICR_PIDR2_ARCH_GICv4, + ("gic_v3_read_ivar: Invalid GIC architecture: %d (%.08X)", + GICR_PIDR2_ARCH(sc->gic_pidr2), sc->gic_pidr2)); + *result = GICR_PIDR2_ARCH(sc->gic_pidr2); + return (0); + case GIC_IVAR_BUS: + KASSERT(sc->gic_bus != GIC_BUS_UNKNOWN, + ("gic_v3_read_ivar: Unknown bus type")); + KASSERT(sc->gic_bus <= GIC_BUS_MAX, + ("gic_v3_read_ivar: Invalid bus type %u", sc->gic_bus)); + *result = sc->gic_bus; + return (0); + } + + return (ENOENT); +} + +int +arm_gic_v3_intr(void *arg) +{ + struct gic_v3_softc *sc = arg; + struct gic_v3_irqsrc *gi; + struct intr_pic *pic; + uint64_t active_irq; + struct trapframe *tf; + + pic = sc->gic_pic; + + while (1) { + if (CPU_MATCH_ERRATA_CAVIUM_THUNDERX_1_1) { + /* + * Hardware: Cavium ThunderX + * Chip revision: Pass 1.0 (early version) + * Pass 1.1 (production) + * ERRATUM: 22978, 23154 + */ + __asm __volatile( + "nop;nop;nop;nop;nop;nop;nop;nop; \n" + "mrs %0, ICC_IAR1_EL1 \n" + "nop;nop;nop;nop; \n" + "dsb sy \n" + : "=&r" (active_irq)); + } else { + active_irq = gic_icc_read(IAR1); + } + + if (active_irq >= GIC_FIRST_LPI) { + intr_child_irq_handler(pic, active_irq); + continue; + } + + if (__predict_false(active_irq >= sc->gic_nirqs)) + return (FILTER_HANDLED); + + tf = curthread->td_intr_frame; + gi = &sc->gic_irqs[active_irq]; + if (active_irq <= GIC_LAST_SGI) { + /* Call EOI for all IPI before dispatch. */ + gic_icc_write(EOIR1, (uint64_t)active_irq); +#ifdef SMP + intr_ipi_dispatch(sgi_to_ipi[gi->gi_irq], tf); +#else + device_printf(sc->dev, "SGI %ju on UP system detected\n", + (uintmax_t)(active_irq - GIC_FIRST_SGI)); +#endif + } else if (active_irq >= GIC_FIRST_PPI && + active_irq <= GIC_LAST_SPI) { + if (gi->gi_trig == INTR_TRIGGER_EDGE) + gic_icc_write(EOIR1, gi->gi_irq); + + if (intr_isrc_dispatch(&gi->gi_isrc, tf) != 0) { + if (gi->gi_trig != INTR_TRIGGER_EDGE) + gic_icc_write(EOIR1, gi->gi_irq); + gic_v3_disable_intr(sc->dev, &gi->gi_isrc); + device_printf(sc->dev, + "Stray irq %lu disabled\n", active_irq); + } + } + } +} + +#ifdef FDT +static int +gic_map_fdt(device_t dev, u_int ncells, pcell_t *cells, u_int *irqp, + enum intr_polarity *polp, enum intr_trigger *trigp) +{ + u_int irq; + + if (ncells < 3) + return (EINVAL); + + /* + * The 1st cell is the interrupt type: + * 0 = SPI + * 1 = PPI + * The 2nd cell contains the interrupt number: + * [0 - 987] for SPI + * [0 - 15] for PPI + * The 3rd cell is the flags, encoded as follows: + * bits[3:0] trigger type and level flags + * 1 = edge triggered + * 2 = edge triggered (PPI only) + * 4 = level-sensitive + * 8 = level-sensitive (PPI only) + */ + switch (cells[0]) { + case 0: + irq = GIC_FIRST_SPI + cells[1]; + /* SPI irq is checked later. */ + break; + case 1: + irq = GIC_FIRST_PPI + cells[1]; + if (irq > GIC_LAST_PPI) { + device_printf(dev, "unsupported PPI interrupt " + "number %u\n", cells[1]); + return (EINVAL); + } + break; + default: + device_printf(dev, "unsupported interrupt type " + "configuration %u\n", cells[0]); + return (EINVAL); + } + + switch (cells[2] & FDT_INTR_MASK) { + case FDT_INTR_EDGE_RISING: + *trigp = INTR_TRIGGER_EDGE; + *polp = INTR_POLARITY_HIGH; + break; + case FDT_INTR_EDGE_FALLING: + *trigp = INTR_TRIGGER_EDGE; + *polp = INTR_POLARITY_LOW; + break; + case FDT_INTR_LEVEL_HIGH: + *trigp = INTR_TRIGGER_LEVEL; + *polp = INTR_POLARITY_HIGH; + break; + case FDT_INTR_LEVEL_LOW: + *trigp = INTR_TRIGGER_LEVEL; + *polp = INTR_POLARITY_LOW; + break; + default: + device_printf(dev, "unsupported trigger/polarity " + "configuration 0x%02x\n", cells[2]); + return (EINVAL); + } + + /* Check the interrupt is valid */ + if (irq >= GIC_FIRST_SPI && *polp != INTR_POLARITY_HIGH) + return (EINVAL); + + *irqp = irq; + return (0); +} +#endif + +static int +gic_map_msi(device_t dev, struct intr_map_data_msi *msi_data, u_int *irqp, + enum intr_polarity *polp, enum intr_trigger *trigp) +{ + struct gic_v3_irqsrc *gi; + + /* SPI-mapped MSI */ + gi = (struct gic_v3_irqsrc *)msi_data->isrc; + if (gi == NULL) + return (ENXIO); + + *irqp = gi->gi_irq; + + /* MSI/MSI-X interrupts are always edge triggered with high polarity */ + *polp = INTR_POLARITY_HIGH; + *trigp = INTR_TRIGGER_EDGE; + + return (0); +} + +static int +do_gic_v3_map_intr(device_t dev, struct intr_map_data *data, u_int *irqp, + enum intr_polarity *polp, enum intr_trigger *trigp) +{ + struct gic_v3_softc *sc; + enum intr_polarity pol; + enum intr_trigger trig; + struct intr_map_data_msi *dam; +#ifdef FDT + struct intr_map_data_fdt *daf; +#endif +#ifdef DEV_ACPI + struct intr_map_data_acpi *daa; +#endif + u_int irq; + + sc = device_get_softc(dev); + + switch (data->type) { +#ifdef FDT + case INTR_MAP_DATA_FDT: + daf = (struct intr_map_data_fdt *)data; + if (gic_map_fdt(dev, daf->ncells, daf->cells, &irq, &pol, + &trig) != 0) + return (EINVAL); + break; +#endif +#ifdef DEV_ACPI + case INTR_MAP_DATA_ACPI: + daa = (struct intr_map_data_acpi *)data; + irq = daa->irq; + pol = daa->pol; + trig = daa->trig; + break; +#endif + case INTR_MAP_DATA_MSI: + /* SPI-mapped MSI */ + dam = (struct intr_map_data_msi *)data; + if (gic_map_msi(dev, dam, &irq, &pol, &trig) != 0) + return (EINVAL); + break; + default: + return (EINVAL); + } + + if (irq >= sc->gic_nirqs) + return (EINVAL); + switch (pol) { + case INTR_POLARITY_CONFORM: + case INTR_POLARITY_LOW: + case INTR_POLARITY_HIGH: + break; + default: + return (EINVAL); + } + switch (trig) { + case INTR_TRIGGER_CONFORM: + case INTR_TRIGGER_EDGE: + case INTR_TRIGGER_LEVEL: + break; + default: + return (EINVAL); + } + + *irqp = irq; + if (polp != NULL) + *polp = pol; + if (trigp != NULL) + *trigp = trig; + return (0); +} + +static int +gic_v3_map_intr(device_t dev, struct intr_map_data *data, + struct intr_irqsrc **isrcp) +{ + struct gic_v3_softc *sc; + int error; + u_int irq; + + error = do_gic_v3_map_intr(dev, data, &irq, NULL, NULL); + if (error == 0) { + sc = device_get_softc(dev); + *isrcp = GIC_INTR_ISRC(sc, irq); + } + return (error); +} + +static int +gic_v3_setup_intr(device_t dev, struct intr_irqsrc *isrc, + struct resource *res, struct intr_map_data *data) +{ + struct gic_v3_softc *sc = device_get_softc(dev); + struct gic_v3_irqsrc *gi = (struct gic_v3_irqsrc *)isrc; + enum intr_trigger trig; + enum intr_polarity pol; + uint32_t reg; + u_int irq; + int error; + + if (data == NULL) + return (ENOTSUP); + + error = do_gic_v3_map_intr(dev, data, &irq, &pol, &trig); + if (error != 0) + return (error); + + if (gi->gi_irq != irq || pol == INTR_POLARITY_CONFORM || + trig == INTR_TRIGGER_CONFORM) + return (EINVAL); + + /* Compare config if this is not first setup. */ + if (isrc->isrc_handlers != 0) { + if (pol != gi->gi_pol || trig != gi->gi_trig) + return (EINVAL); + else + return (0); + } + + gi->gi_pol = pol; + gi->gi_trig = trig; + + /* + * XXX - In case that per CPU interrupt is going to be enabled in time + * when SMP is already started, we need some IPI call which + * enables it on others CPUs. Further, it's more complicated as + * pic_enable_source() and pic_disable_source() should act on + * per CPU basis only. Thus, it should be solved here somehow. + */ + if (isrc->isrc_flags & INTR_ISRCF_PPI) + CPU_SET(PCPU_GET(cpuid), &isrc->isrc_cpu); + + if (irq >= GIC_FIRST_PPI && irq <= GIC_LAST_SPI) { + mtx_lock_spin(&sc->gic_mtx); + + /* Set the trigger and polarity */ + if (irq <= GIC_LAST_PPI) + reg = gic_r_read(sc, 4, + GICR_SGI_BASE_SIZE + GICD_ICFGR(irq)); + else + reg = gic_d_read(sc, 4, GICD_ICFGR(irq)); + if (trig == INTR_TRIGGER_LEVEL) + reg &= ~(2 << ((irq % 16) * 2)); + else + reg |= 2 << ((irq % 16) * 2); + + if (irq <= GIC_LAST_PPI) { + gic_r_write(sc, 4, + GICR_SGI_BASE_SIZE + GICD_ICFGR(irq), reg); + gic_v3_wait_for_rwp(sc, REDIST); + } else { + gic_d_write(sc, 4, GICD_ICFGR(irq), reg); + gic_v3_wait_for_rwp(sc, DIST); + } + + mtx_unlock_spin(&sc->gic_mtx); + + gic_v3_bind_intr(dev, isrc); + } + + return (0); +} + +static int +gic_v3_teardown_intr(device_t dev, struct intr_irqsrc *isrc, + struct resource *res, struct intr_map_data *data) +{ + struct gic_v3_irqsrc *gi = (struct gic_v3_irqsrc *)isrc; + + if (isrc->isrc_handlers == 0) { + gi->gi_pol = INTR_POLARITY_CONFORM; + gi->gi_trig = INTR_TRIGGER_CONFORM; + } + + return (0); +} + +static void +gic_v3_disable_intr(device_t dev, struct intr_irqsrc *isrc) +{ + struct gic_v3_softc *sc; + struct gic_v3_irqsrc *gi; + u_int irq; + + sc = device_get_softc(dev); + gi = (struct gic_v3_irqsrc *)isrc; + irq = gi->gi_irq; + + if (irq <= GIC_LAST_PPI) { + /* SGIs and PPIs in corresponding Re-Distributor */ + gic_r_write(sc, 4, GICR_SGI_BASE_SIZE + GICD_ICENABLER(irq), + GICD_I_MASK(irq)); + gic_v3_wait_for_rwp(sc, REDIST); + } else if (irq >= GIC_FIRST_SPI && irq <= GIC_LAST_SPI) { + /* SPIs in distributor */ + gic_d_write(sc, 4, GICD_ICENABLER(irq), GICD_I_MASK(irq)); + gic_v3_wait_for_rwp(sc, DIST); + } else + panic("%s: Unsupported IRQ %u", __func__, irq); +} + +static void +gic_v3_enable_intr(device_t dev, struct intr_irqsrc *isrc) +{ + struct gic_v3_softc *sc; + struct gic_v3_irqsrc *gi; + u_int irq; + + sc = device_get_softc(dev); + gi = (struct gic_v3_irqsrc *)isrc; + irq = gi->gi_irq; + + if (irq <= GIC_LAST_PPI) { + /* SGIs and PPIs in corresponding Re-Distributor */ + gic_r_write(sc, 4, GICR_SGI_BASE_SIZE + GICD_ISENABLER(irq), + GICD_I_MASK(irq)); + gic_v3_wait_for_rwp(sc, REDIST); + } else if (irq >= GIC_FIRST_SPI && irq <= GIC_LAST_SPI) { + /* SPIs in distributor */ + gic_d_write(sc, 4, GICD_ISENABLER(irq), GICD_I_MASK(irq)); + gic_v3_wait_for_rwp(sc, DIST); + } else + panic("%s: Unsupported IRQ %u", __func__, irq); +} + +static void +gic_v3_pre_ithread(device_t dev, struct intr_irqsrc *isrc) +{ + struct gic_v3_irqsrc *gi = (struct gic_v3_irqsrc *)isrc; + + gic_v3_disable_intr(dev, isrc); + gic_icc_write(EOIR1, gi->gi_irq); +} + +static void +gic_v3_post_ithread(device_t dev, struct intr_irqsrc *isrc) +{ + + gic_v3_enable_intr(dev, isrc); +} + +static void +gic_v3_post_filter(device_t dev, struct intr_irqsrc *isrc) +{ + struct gic_v3_irqsrc *gi = (struct gic_v3_irqsrc *)isrc; + + if (gi->gi_trig == INTR_TRIGGER_EDGE) + return; + + gic_icc_write(EOIR1, gi->gi_irq); +} + +static int +gic_v3_bind_intr(device_t dev, struct intr_irqsrc *isrc) +{ + struct gic_v3_softc *sc; + struct gic_v3_irqsrc *gi; + int cpu; + + gi = (struct gic_v3_irqsrc *)isrc; + if (gi->gi_irq <= GIC_LAST_PPI) + return (EINVAL); + + KASSERT(gi->gi_irq >= GIC_FIRST_SPI && gi->gi_irq <= GIC_LAST_SPI, + ("%s: Attempting to bind an invalid IRQ", __func__)); + + sc = device_get_softc(dev); + + if (CPU_EMPTY(&isrc->isrc_cpu)) { + gic_irq_cpu = intr_irq_next_cpu(gic_irq_cpu, &all_cpus); + CPU_SETOF(gic_irq_cpu, &isrc->isrc_cpu); + gic_d_write(sc, 4, GICD_IROUTER(gi->gi_irq), + CPU_AFFINITY(gic_irq_cpu)); + } else { + /* + * We can only bind to a single CPU so select + * the first CPU found. + */ + cpu = CPU_FFS(&isrc->isrc_cpu) - 1; + gic_d_write(sc, 4, GICD_IROUTER(gi->gi_irq), CPU_AFFINITY(cpu)); + } + + return (0); +} + +#ifdef SMP +static void +gic_v3_init_secondary(device_t dev) +{ + device_t child; + struct gic_v3_softc *sc; + gic_v3_initseq_t *init_func; + struct intr_irqsrc *isrc; + u_int cpu, irq; + int err, i; + + sc = device_get_softc(dev); + cpu = PCPU_GET(cpuid); + + /* Train init sequence for boot CPU */ + for (init_func = gic_v3_secondary_init; *init_func != NULL; + init_func++) { + err = (*init_func)(sc); + if (err != 0) { + device_printf(dev, + "Could not initialize GIC for CPU%u\n", cpu); + return; + } + } + + /* Unmask attached SGI interrupts. */ + for (irq = GIC_FIRST_SGI; irq <= GIC_LAST_SGI; irq++) { + isrc = GIC_INTR_ISRC(sc, irq); + if (intr_isrc_init_on_cpu(isrc, cpu)) + gic_v3_enable_intr(dev, isrc); + } + + /* Unmask attached PPI interrupts. */ + for (irq = GIC_FIRST_PPI; irq <= GIC_LAST_PPI; irq++) { + isrc = GIC_INTR_ISRC(sc, irq); + if (intr_isrc_init_on_cpu(isrc, cpu)) + gic_v3_enable_intr(dev, isrc); + } + + for (i = 0; i < sc->gic_nchildren; i++) { + child = sc->gic_children[i]; + PIC_INIT_SECONDARY(child); + } +} + +static void +gic_v3_ipi_send(device_t dev, struct intr_irqsrc *isrc, cpuset_t cpus, + u_int ipi) +{ + struct gic_v3_irqsrc *gi = (struct gic_v3_irqsrc *)isrc; + uint64_t aff, val, irq; + int i; + +#define GIC_AFF_MASK (CPU_AFF3_MASK | CPU_AFF2_MASK | CPU_AFF1_MASK) +#define GIC_AFFINITY(i) (CPU_AFFINITY(i) & GIC_AFF_MASK) + aff = GIC_AFFINITY(0); + irq = gi->gi_irq; + val = 0; + + /* Iterate through all CPUs in set */ + for (i = 0; i <= mp_maxid; i++) { + /* Move to the next affinity group */ + if (aff != GIC_AFFINITY(i)) { + /* Send the IPI */ + if (val != 0) { + gic_icc_write(SGI1R, val); + val = 0; + } + aff = GIC_AFFINITY(i); + } + + /* Send the IPI to this cpu */ + if (CPU_ISSET(i, &cpus)) { +#define ICC_SGI1R_AFFINITY(aff) \ + (((uint64_t)CPU_AFF3(aff) << ICC_SGI1R_EL1_AFF3_SHIFT) | \ + ((uint64_t)CPU_AFF2(aff) << ICC_SGI1R_EL1_AFF2_SHIFT) | \ + ((uint64_t)CPU_AFF1(aff) << ICC_SGI1R_EL1_AFF1_SHIFT)) + /* Set the affinity when the first at this level */ + if (val == 0) + val = ICC_SGI1R_AFFINITY(aff) | + irq << ICC_SGI1R_EL1_SGIID_SHIFT; + /* Set the bit to send the IPI to te CPU */ + val |= 1 << CPU_AFF0(CPU_AFFINITY(i)); + } + } + + /* Send the IPI to the last cpu affinity group */ + if (val != 0) + gic_icc_write(SGI1R, val); +#undef GIC_AFF_MASK +#undef GIC_AFFINITY +} + +static int +gic_v3_ipi_setup(device_t dev, u_int ipi, struct intr_irqsrc **isrcp) +{ + struct intr_irqsrc *isrc; + struct gic_v3_softc *sc = device_get_softc(dev); + + if (sgi_first_unused > GIC_LAST_SGI) + return (ENOSPC); + + isrc = GIC_INTR_ISRC(sc, sgi_first_unused); + sgi_to_ipi[sgi_first_unused++] = ipi; + + CPU_SET(PCPU_GET(cpuid), &isrc->isrc_cpu); + + *isrcp = isrc; + return (0); +} +#endif /* SMP */ + +/* + * Helper routines + */ +static void +gic_v3_wait_for_rwp(struct gic_v3_softc *sc, enum gic_v3_xdist xdist) +{ + struct resource *res; + u_int cpuid; + size_t us_left = 1000000; + + cpuid = PCPU_GET(cpuid); + + switch (xdist) { + case DIST: + res = sc->gic_dist; + break; + case REDIST: + res = &sc->gic_redists.pcpu[cpuid]->res; + break; + default: + KASSERT(0, ("%s: Attempt to wait for unknown RWP", __func__)); + return; + } + + while ((bus_read_4(res, GICD_CTLR) & GICD_CTLR_RWP) != 0) { + DELAY(1); + if (us_left-- == 0) + panic("GICD Register write pending for too long"); + } +} + +/* CPU interface. */ +static __inline void +gic_v3_cpu_priority(uint64_t mask) +{ + + /* Set prority mask */ + gic_icc_write(PMR, mask & ICC_PMR_EL1_PRIO_MASK); +} + +static int +gic_v3_cpu_enable_sre(struct gic_v3_softc *sc) +{ + uint64_t sre; + u_int cpuid; + + cpuid = PCPU_GET(cpuid); + /* + * Set the SRE bit to enable access to GIC CPU interface + * via system registers. + */ + sre = READ_SPECIALREG(icc_sre_el1); + sre |= ICC_SRE_EL1_SRE; + WRITE_SPECIALREG(icc_sre_el1, sre); + isb(); + /* + * Now ensure that the bit is set. + */ + sre = READ_SPECIALREG(icc_sre_el1); + if ((sre & ICC_SRE_EL1_SRE) == 0) { + /* We are done. This was disabled in EL2 */ + device_printf(sc->dev, "ERROR: CPU%u cannot enable CPU interface " + "via system registers\n", cpuid); + return (ENXIO); + } else if (bootverbose) { + device_printf(sc->dev, + "CPU%u enabled CPU interface via system registers\n", + cpuid); + } + + return (0); +} + +static int +gic_v3_cpu_init(struct gic_v3_softc *sc) +{ + int err; + + /* Enable access to CPU interface via system registers */ + err = gic_v3_cpu_enable_sre(sc); + if (err != 0) + return (err); + /* Priority mask to minimum - accept all interrupts */ + gic_v3_cpu_priority(GIC_PRIORITY_MIN); + /* Disable EOI mode */ + gic_icc_clear(CTLR, ICC_CTLR_EL1_EOIMODE); + /* Enable group 1 (insecure) interrups */ + gic_icc_set(IGRPEN1, ICC_IGRPEN0_EL1_EN); + + return (0); +} + +/* Distributor */ +static int +gic_v3_dist_init(struct gic_v3_softc *sc) +{ + uint64_t aff; + u_int i; + + /* + * 1. Disable the Distributor + */ + gic_d_write(sc, 4, GICD_CTLR, 0); + gic_v3_wait_for_rwp(sc, DIST); + + /* + * 2. Configure the Distributor + */ + /* Set all SPIs to be Group 1 Non-secure */ + for (i = GIC_FIRST_SPI; i < sc->gic_nirqs; i += GICD_I_PER_IGROUPRn) + gic_d_write(sc, 4, GICD_IGROUPR(i), 0xFFFFFFFF); + + /* Set all global interrupts to be level triggered, active low. */ + for (i = GIC_FIRST_SPI; i < sc->gic_nirqs; i += GICD_I_PER_ICFGRn) + gic_d_write(sc, 4, GICD_ICFGR(i), 0x00000000); + + /* Set priority to all shared interrupts */ + for (i = GIC_FIRST_SPI; + i < sc->gic_nirqs; i += GICD_I_PER_IPRIORITYn) { + /* Set highest priority */ + gic_d_write(sc, 4, GICD_IPRIORITYR(i), GIC_PRIORITY_MAX); + } + + /* + * Disable all interrupts. Leave PPI and SGIs as they are enabled in + * Re-Distributor registers. + */ + for (i = GIC_FIRST_SPI; i < sc->gic_nirqs; i += GICD_I_PER_ISENABLERn) + gic_d_write(sc, 4, GICD_ICENABLER(i), 0xFFFFFFFF); + + gic_v3_wait_for_rwp(sc, DIST); + + /* + * 3. Enable Distributor + */ + /* Enable Distributor with ARE, Group 1 */ + gic_d_write(sc, 4, GICD_CTLR, GICD_CTLR_ARE_NS | GICD_CTLR_G1A | + GICD_CTLR_G1); + + /* + * 4. Route all interrupts to boot CPU. + */ + aff = CPU_AFFINITY(0); + for (i = GIC_FIRST_SPI; i < sc->gic_nirqs; i++) + gic_d_write(sc, 4, GICD_IROUTER(i), aff); + + return (0); +} + +/* Re-Distributor */ +static int +gic_v3_redist_alloc(struct gic_v3_softc *sc) +{ + u_int cpuid; + + /* Allocate struct resource for all CPU's Re-Distributor registers */ + for (cpuid = 0; cpuid <= mp_maxid; cpuid++) + if (CPU_ISSET(cpuid, &all_cpus) != 0) + sc->gic_redists.pcpu[cpuid] = + malloc(sizeof(*sc->gic_redists.pcpu[0]), + M_GIC_V3, M_WAITOK); + else + sc->gic_redists.pcpu[cpuid] = NULL; + return (0); +} + +static int +gic_v3_redist_find(struct gic_v3_softc *sc) +{ + struct resource r_res; + bus_space_handle_t r_bsh; + uint64_t aff; + uint64_t typer; + uint32_t pidr2; + u_int cpuid; + size_t i; + + cpuid = PCPU_GET(cpuid); + + aff = CPU_AFFINITY(cpuid); + /* Affinity in format for comparison with typer */ + aff = (CPU_AFF3(aff) << 24) | (CPU_AFF2(aff) << 16) | + (CPU_AFF1(aff) << 8) | CPU_AFF0(aff); + + if (bootverbose) { + device_printf(sc->dev, + "Start searching for Re-Distributor\n"); + } + /* Iterate through Re-Distributor regions */ + for (i = 0; i < sc->gic_redists.nregions; i++) { + /* Take a copy of the region's resource */ + r_res = *sc->gic_redists.regions[i]; + r_bsh = rman_get_bushandle(&r_res); + + pidr2 = bus_read_4(&r_res, GICR_PIDR2); + switch (GICR_PIDR2_ARCH(pidr2)) { + case GICR_PIDR2_ARCH_GICv3: /* fall through */ + case GICR_PIDR2_ARCH_GICv4: + break; + default: + device_printf(sc->dev, + "No Re-Distributor found for CPU%u\n", cpuid); + return (ENODEV); + } + + do { + typer = bus_read_8(&r_res, GICR_TYPER); + if ((typer >> GICR_TYPER_AFF_SHIFT) == aff) { + KASSERT(sc->gic_redists.pcpu[cpuid] != NULL, + ("Invalid pointer to per-CPU redistributor")); + /* Copy res contents to its final destination */ + sc->gic_redists.pcpu[cpuid]->res = r_res; + sc->gic_redists.pcpu[cpuid]->lpi_enabled = false; + if (bootverbose) { + device_printf(sc->dev, + "CPU%u Re-Distributor has been found\n", + cpuid); + } + return (0); + } + + r_bsh += (GICR_RD_BASE_SIZE + GICR_SGI_BASE_SIZE); + if ((typer & GICR_TYPER_VLPIS) != 0) { + r_bsh += + (GICR_VLPI_BASE_SIZE + GICR_RESERVED_SIZE); + } + + rman_set_bushandle(&r_res, r_bsh); + } while ((typer & GICR_TYPER_LAST) == 0); + } + + device_printf(sc->dev, "No Re-Distributor found for CPU%u\n", cpuid); + return (ENXIO); +} + +static int +gic_v3_redist_wake(struct gic_v3_softc *sc) +{ + uint32_t waker; + size_t us_left = 1000000; + + waker = gic_r_read(sc, 4, GICR_WAKER); + /* Wake up Re-Distributor for this CPU */ + waker &= ~GICR_WAKER_PS; + gic_r_write(sc, 4, GICR_WAKER, waker); + /* + * When clearing ProcessorSleep bit it is required to wait for + * ChildrenAsleep to become zero following the processor power-on. + */ + while ((gic_r_read(sc, 4, GICR_WAKER) & GICR_WAKER_CA) != 0) { + DELAY(1); + if (us_left-- == 0) { + panic("Could not wake Re-Distributor for CPU%u", + PCPU_GET(cpuid)); + } + } + + if (bootverbose) { + device_printf(sc->dev, "CPU%u Re-Distributor woke up\n", + PCPU_GET(cpuid)); + } + + return (0); +} + +static int +gic_v3_redist_init(struct gic_v3_softc *sc) +{ + int err; + size_t i; + + err = gic_v3_redist_find(sc); + if (err != 0) + return (err); + + err = gic_v3_redist_wake(sc); + if (err != 0) + return (err); + + /* Configure SGIs and PPIs to be Group1 Non-secure */ + gic_r_write(sc, 4, GICR_SGI_BASE_SIZE + GICR_IGROUPR0, + 0xFFFFFFFF); + + /* Disable SPIs */ + gic_r_write(sc, 4, GICR_SGI_BASE_SIZE + GICR_ICENABLER0, + GICR_I_ENABLER_PPI_MASK); + /* Enable SGIs */ + gic_r_write(sc, 4, GICR_SGI_BASE_SIZE + GICR_ISENABLER0, + GICR_I_ENABLER_SGI_MASK); + + /* Set priority for SGIs and PPIs */ + for (i = 0; i <= GIC_LAST_PPI; i += GICR_I_PER_IPRIORITYn) { + gic_r_write(sc, 4, GICR_SGI_BASE_SIZE + GICD_IPRIORITYR(i), + GIC_PRIORITY_MAX); + } + + gic_v3_wait_for_rwp(sc, REDIST); + + return (0); +} diff --git a/sys/arm64/arm64/gic_v3_acpi.c b/sys/arm64/arm64/gic_v3_acpi.c new file mode 100644 index 000000000000..b54ecfb014e5 --- /dev/null +++ b/sys/arm64/arm64/gic_v3_acpi.c @@ -0,0 +1,389 @@ +/*- + * Copyright (c) 2016 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Andrew Turner under + * the sponsorship of the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "opt_acpi.h" + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/types.h> +#include <sys/systm.h> +#include <sys/bus.h> +#include <sys/kernel.h> +#include <sys/malloc.h> +#include <sys/module.h> +#include <sys/rman.h> + +#include <machine/intr.h> +#include <machine/resource.h> + +#include <contrib/dev/acpica/include/acpi.h> +#include <dev/acpica/acpivar.h> + +#include "gic_v3_reg.h" +#include "gic_v3_var.h" + +struct gic_v3_acpi_devinfo { + struct gic_v3_devinfo di_gic_dinfo; + struct resource_list di_rl; +}; + +static device_identify_t gic_v3_acpi_identify; +static device_probe_t gic_v3_acpi_probe; +static device_attach_t gic_v3_acpi_attach; +static bus_alloc_resource_t gic_v3_acpi_bus_alloc_res; + +static void gic_v3_acpi_bus_attach(device_t); + +static device_method_t gic_v3_acpi_methods[] = { + /* Device interface */ + DEVMETHOD(device_identify, gic_v3_acpi_identify), + DEVMETHOD(device_probe, gic_v3_acpi_probe), + DEVMETHOD(device_attach, gic_v3_acpi_attach), + + /* Bus interface */ + DEVMETHOD(bus_alloc_resource, gic_v3_acpi_bus_alloc_res), + DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), + + /* End */ + DEVMETHOD_END +}; + +DEFINE_CLASS_1(gic, gic_v3_acpi_driver, gic_v3_acpi_methods, + sizeof(struct gic_v3_softc), gic_v3_driver); + +static devclass_t gic_v3_acpi_devclass; + +EARLY_DRIVER_MODULE(gic_v3, acpi, gic_v3_acpi_driver, gic_v3_acpi_devclass, + 0, 0, BUS_PASS_INTERRUPT + BUS_PASS_ORDER_MIDDLE); + +struct madt_table_data { + device_t parent; + device_t dev; + ACPI_MADT_GENERIC_DISTRIBUTOR *dist; + int count; +}; + +static void +madt_handler(ACPI_SUBTABLE_HEADER *entry, void *arg) +{ + struct madt_table_data *madt_data; + + madt_data = (struct madt_table_data *)arg; + + switch(entry->Type) { + case ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR: + if (madt_data->dist != NULL) { + if (bootverbose) + device_printf(madt_data->parent, + "gic: Already have a distributor table"); + break; + } + madt_data->dist = (ACPI_MADT_GENERIC_DISTRIBUTOR *)entry; + break; + + case ACPI_MADT_TYPE_GENERIC_REDISTRIBUTOR: + break; + + default: + break; + } +} + +static void +rdist_map(ACPI_SUBTABLE_HEADER *entry, void *arg) +{ + ACPI_MADT_GENERIC_REDISTRIBUTOR *redist; + struct madt_table_data *madt_data; + + madt_data = (struct madt_table_data *)arg; + + switch(entry->Type) { + case ACPI_MADT_TYPE_GENERIC_REDISTRIBUTOR: + redist = (ACPI_MADT_GENERIC_REDISTRIBUTOR *)entry; + + madt_data->count++; + BUS_SET_RESOURCE(madt_data->parent, madt_data->dev, + SYS_RES_MEMORY, madt_data->count, redist->BaseAddress, + redist->Length); + break; + + default: + break; + } +} + +static void +gic_v3_acpi_identify(driver_t *driver, device_t parent) +{ + struct madt_table_data madt_data; + ACPI_TABLE_MADT *madt; + vm_paddr_t physaddr; + device_t dev; + + physaddr = acpi_find_table(ACPI_SIG_MADT); + if (physaddr == 0) + return; + + madt = acpi_map_table(physaddr, ACPI_SIG_MADT); + if (madt == NULL) { + device_printf(parent, "gic: Unable to map the MADT\n"); + return; + } + + madt_data.parent = parent; + madt_data.dist = NULL; + madt_data.count = 0; + + acpi_walk_subtables(madt + 1, (char *)madt + madt->Header.Length, + madt_handler, &madt_data); + if (madt_data.dist == NULL) { + device_printf(parent, + "No gic interrupt or distributor table\n"); + goto out; + } + /* This is for the wrong GIC version */ + if (madt_data.dist->Version != ACPI_MADT_GIC_VERSION_V3) + goto out; + + dev = BUS_ADD_CHILD(parent, BUS_PASS_INTERRUPT + BUS_PASS_ORDER_MIDDLE, + "gic", -1); + if (dev == NULL) { + device_printf(parent, "add gic child failed\n"); + goto out; + } + + /* Add the MADT data */ + BUS_SET_RESOURCE(parent, dev, SYS_RES_MEMORY, 0, + madt_data.dist->BaseAddress, 128 * 1024); + + madt_data.dev = dev; + acpi_walk_subtables(madt + 1, (char *)madt + madt->Header.Length, + rdist_map, &madt_data); + + acpi_set_private(dev, (void *)(uintptr_t)madt_data.dist->Version); + +out: + acpi_unmap_table(madt); +} + +static int +gic_v3_acpi_probe(device_t dev) +{ + + switch((uintptr_t)acpi_get_private(dev)) { + case ACPI_MADT_GIC_VERSION_V3: + break; + default: + return (ENXIO); + } + + device_set_desc(dev, GIC_V3_DEVSTR); + return (BUS_PROBE_NOWILDCARD); +} + +static void +madt_count_redistrib(ACPI_SUBTABLE_HEADER *entry, void *arg) +{ + struct gic_v3_softc *sc = arg; + + if (entry->Type == ACPI_MADT_TYPE_GENERIC_REDISTRIBUTOR) + sc->gic_redists.nregions++; +} + +static int +gic_v3_acpi_count_regions(device_t dev) +{ + struct gic_v3_softc *sc; + ACPI_TABLE_MADT *madt; + vm_paddr_t physaddr; + + sc = device_get_softc(dev); + + physaddr = acpi_find_table(ACPI_SIG_MADT); + if (physaddr == 0) + return (ENXIO); + + madt = acpi_map_table(physaddr, ACPI_SIG_MADT); + if (madt == NULL) { + device_printf(dev, "Unable to map the MADT\n"); + return (ENXIO); + } + + acpi_walk_subtables(madt + 1, (char *)madt + madt->Header.Length, + madt_count_redistrib, sc); + acpi_unmap_table(madt); + + return (sc->gic_redists.nregions > 0 ? 0 : ENXIO); +} + +static int +gic_v3_acpi_attach(device_t dev) +{ + struct gic_v3_softc *sc; + int err; + + sc = device_get_softc(dev); + sc->dev = dev; + sc->gic_bus = GIC_BUS_ACPI; + + err = gic_v3_acpi_count_regions(dev); + if (err != 0) + goto error; + + err = gic_v3_attach(dev); + if (err != 0) + goto error; + + sc->gic_pic = intr_pic_register(dev, ACPI_INTR_XREF); + if (sc->gic_pic == NULL) { + device_printf(dev, "could not register PIC\n"); + err = ENXIO; + goto error; + } + + if (intr_pic_claim_root(dev, ACPI_INTR_XREF, arm_gic_v3_intr, sc, + GIC_LAST_SGI - GIC_FIRST_SGI + 1) != 0) { + err = ENXIO; + goto error; + } + + /* + * Try to register the ITS driver to this GIC. The GIC will act as + * a bus in that case. Failure here will not affect the main GIC + * functionality. + */ + gic_v3_acpi_bus_attach(dev); + + if (device_get_children(dev, &sc->gic_children, &sc->gic_nchildren) !=0) + sc->gic_nchildren = 0; + + return (0); + +error: + if (bootverbose) { + device_printf(dev, + "Failed to attach. Error %d\n", err); + } + /* Failure so free resources */ + gic_v3_detach(dev); + + return (err); +} + +static void +gic_v3_add_children(ACPI_SUBTABLE_HEADER *entry, void *arg) +{ + ACPI_MADT_GENERIC_TRANSLATOR *gict; + struct gic_v3_acpi_devinfo *di; + struct gic_v3_softc *sc; + device_t child, dev; + u_int xref; + int err, pxm; + + if (entry->Type == ACPI_MADT_TYPE_GENERIC_TRANSLATOR) { + /* We have an ITS, add it as a child */ + gict = (ACPI_MADT_GENERIC_TRANSLATOR *)entry; + dev = arg; + sc = device_get_softc(dev); + + child = device_add_child(dev, "its", -1); + if (child == NULL) + return; + + di = malloc(sizeof(*di), M_GIC_V3, M_WAITOK | M_ZERO); + resource_list_init(&di->di_rl); + resource_list_add(&di->di_rl, SYS_RES_MEMORY, 0, + gict->BaseAddress, gict->BaseAddress + 128 * 1024 - 1, + 128 * 1024); + err = acpi_iort_its_lookup(gict->TranslationId, &xref, &pxm); + if (err == 0) { + di->di_gic_dinfo.gic_domain = pxm; + di->di_gic_dinfo.msi_xref = xref; + } else { + di->di_gic_dinfo.gic_domain = -1; + di->di_gic_dinfo.msi_xref = ACPI_MSI_XREF; + } + sc->gic_nchildren++; + device_set_ivars(child, di); + } +} + +static void +gic_v3_acpi_bus_attach(device_t dev) +{ + ACPI_TABLE_MADT *madt; + vm_paddr_t physaddr; + + physaddr = acpi_find_table(ACPI_SIG_MADT); + if (physaddr == 0) + return; + + madt = acpi_map_table(physaddr, ACPI_SIG_MADT); + if (madt == NULL) { + device_printf(dev, "Unable to map the MADT to add children\n"); + return; + } + + acpi_walk_subtables(madt + 1, (char *)madt + madt->Header.Length, + gic_v3_add_children, dev); + + acpi_unmap_table(madt); + + bus_generic_attach(dev); +} + +static struct resource * +gic_v3_acpi_bus_alloc_res(device_t bus, device_t child, int type, int *rid, + rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) +{ + struct gic_v3_acpi_devinfo *di; + struct resource_list_entry *rle; + + /* We only allocate memory */ + if (type != SYS_RES_MEMORY) + return (NULL); + + if (RMAN_IS_DEFAULT_RANGE(start, end)) { + if ((di = device_get_ivars(child)) == NULL) + return (NULL); + + /* Find defaults for this rid */ + rle = resource_list_find(&di->di_rl, type, *rid); + if (rle == NULL) + return (NULL); + + start = rle->start; + end = rle->end; + count = rle->count; + } + + return (bus_generic_alloc_resource(bus, child, type, rid, start, end, + count, flags)); +} diff --git a/sys/arm64/arm64/gic_v3_fdt.c b/sys/arm64/arm64/gic_v3_fdt.c new file mode 100644 index 000000000000..c8a9615a8a5f --- /dev/null +++ b/sys/arm64/arm64/gic_v3_fdt.c @@ -0,0 +1,331 @@ +/*- + * Copyright (c) 2015 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Semihalf under + * the sponsorship of the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/bitstring.h> +#include <sys/bus.h> +#include <sys/kernel.h> +#include <sys/module.h> +#include <sys/rman.h> + +#include <machine/intr.h> +#include <machine/resource.h> + +#include <dev/ofw/openfirm.h> +#include <dev/ofw/ofw_bus.h> +#include <dev/ofw/ofw_bus_subr.h> + +#include <arm/arm/gic_common.h> +#include "gic_v3_reg.h" +#include "gic_v3_var.h" + +/* + * FDT glue. + */ +static int gic_v3_fdt_probe(device_t); +static int gic_v3_fdt_attach(device_t); +static int gic_v3_fdt_print_child(device_t, device_t); + +static struct resource *gic_v3_ofw_bus_alloc_res(device_t, device_t, int, int *, + rman_res_t, rman_res_t, rman_res_t, u_int); +static const struct ofw_bus_devinfo *gic_v3_ofw_get_devinfo(device_t, device_t); + +static device_method_t gic_v3_fdt_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, gic_v3_fdt_probe), + DEVMETHOD(device_attach, gic_v3_fdt_attach), + + /* Bus interface */ + DEVMETHOD(bus_print_child, gic_v3_fdt_print_child), + DEVMETHOD(bus_alloc_resource, gic_v3_ofw_bus_alloc_res), + DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), + + /* ofw_bus interface */ + DEVMETHOD(ofw_bus_get_devinfo, gic_v3_ofw_get_devinfo), + DEVMETHOD(ofw_bus_get_compat, ofw_bus_gen_get_compat), + DEVMETHOD(ofw_bus_get_model, ofw_bus_gen_get_model), + DEVMETHOD(ofw_bus_get_name, ofw_bus_gen_get_name), + DEVMETHOD(ofw_bus_get_node, ofw_bus_gen_get_node), + DEVMETHOD(ofw_bus_get_type, ofw_bus_gen_get_type), + + /* End */ + DEVMETHOD_END +}; + +DEFINE_CLASS_1(gic, gic_v3_fdt_driver, gic_v3_fdt_methods, + sizeof(struct gic_v3_softc), gic_v3_driver); + +static devclass_t gic_v3_fdt_devclass; + +EARLY_DRIVER_MODULE(gic_v3, simplebus, gic_v3_fdt_driver, gic_v3_fdt_devclass, + 0, 0, BUS_PASS_INTERRUPT + BUS_PASS_ORDER_MIDDLE); +EARLY_DRIVER_MODULE(gic_v3, ofwbus, gic_v3_fdt_driver, gic_v3_fdt_devclass, + 0, 0, BUS_PASS_INTERRUPT + BUS_PASS_ORDER_MIDDLE); + +/* + * Helper functions declarations. + */ +static int gic_v3_ofw_bus_attach(device_t); + +/* + * Device interface. + */ +static int +gic_v3_fdt_probe(device_t dev) +{ + + if (!ofw_bus_status_okay(dev)) + return (ENXIO); + + if (!ofw_bus_is_compatible(dev, "arm,gic-v3")) + return (ENXIO); + + device_set_desc(dev, GIC_V3_DEVSTR); + return (BUS_PROBE_DEFAULT); +} + +static int +gic_v3_fdt_attach(device_t dev) +{ + struct gic_v3_softc *sc; + pcell_t redist_regions; + intptr_t xref; + int err; + + sc = device_get_softc(dev); + sc->dev = dev; + sc->gic_bus = GIC_BUS_FDT; + + /* + * Recover number of the Re-Distributor regions. + */ + if (OF_getencprop(ofw_bus_get_node(dev), "#redistributor-regions", + &redist_regions, sizeof(redist_regions)) <= 0) + sc->gic_redists.nregions = 1; + else + sc->gic_redists.nregions = redist_regions; + + err = gic_v3_attach(dev); + if (err != 0) + goto error; + + xref = OF_xref_from_node(ofw_bus_get_node(dev)); + sc->gic_pic = intr_pic_register(dev, xref); + if (sc->gic_pic == NULL) { + device_printf(dev, "could not register PIC\n"); + err = ENXIO; + goto error; + } + + /* Register xref */ + OF_device_register_xref(xref, dev); + + if (intr_pic_claim_root(dev, xref, arm_gic_v3_intr, sc, + GIC_LAST_SGI - GIC_FIRST_SGI + 1) != 0) { + err = ENXIO; + goto error; + } + + /* + * Try to register ITS to this GIC. + * GIC will act as a bus in that case. + * Failure here will not affect main GIC functionality. + */ + if (gic_v3_ofw_bus_attach(dev) != 0) { + if (bootverbose) { + device_printf(dev, + "Failed to attach ITS to this GIC\n"); + } + } + + if (device_get_children(dev, &sc->gic_children, &sc->gic_nchildren) != 0) + sc->gic_nchildren = 0; + + return (err); + +error: + if (bootverbose) { + device_printf(dev, + "Failed to attach. Error %d\n", err); + } + /* Failure so free resources */ + gic_v3_detach(dev); + + return (err); +} + +/* OFW bus interface */ +struct gic_v3_ofw_devinfo { + struct gic_v3_devinfo di_gic_dinfo; + struct ofw_bus_devinfo di_dinfo; + struct resource_list di_rl; +}; + +static int +gic_v3_fdt_print_child(device_t bus, device_t child) +{ + struct gic_v3_ofw_devinfo *di = device_get_ivars(child); + struct resource_list *rl = &di->di_rl; + int retval = 0; + + retval += bus_print_child_header(bus, child); + retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#jx"); + retval += bus_print_child_footer(bus, child); + + return (retval); +} + +static const struct ofw_bus_devinfo * +gic_v3_ofw_get_devinfo(device_t bus __unused, device_t child) +{ + struct gic_v3_ofw_devinfo *di; + + di = device_get_ivars(child); + return (&di->di_dinfo); +} + +static struct resource * +gic_v3_ofw_bus_alloc_res(device_t bus, device_t child, int type, int *rid, + rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) +{ + struct gic_v3_ofw_devinfo *di; + struct resource_list_entry *rle; + int ranges_len; + + if (RMAN_IS_DEFAULT_RANGE(start, end)) { + if ((di = device_get_ivars(child)) == NULL) + return (NULL); + if (type != SYS_RES_MEMORY) + return (NULL); + + /* Find defaults for this rid */ + rle = resource_list_find(&di->di_rl, type, *rid); + if (rle == NULL) + return (NULL); + + start = rle->start; + end = rle->end; + count = rle->count; + } + /* + * XXX: No ranges remap! + * Absolute address is expected. + */ + if (ofw_bus_has_prop(bus, "ranges")) { + ranges_len = OF_getproplen(ofw_bus_get_node(bus), "ranges"); + if (ranges_len != 0) { + if (bootverbose) { + device_printf(child, + "Ranges remap not supported\n"); + } + return (NULL); + } + } + return (bus_generic_alloc_resource(bus, child, type, rid, start, end, + count, flags)); +} + +/* Helper functions */ + +/* + * Bus capability support for GICv3. + * Collects and configures device informations and finally + * adds ITS device as a child of GICv3 in Newbus hierarchy. + */ +static int +gic_v3_ofw_bus_attach(device_t dev) +{ + struct gic_v3_ofw_devinfo *di; + struct gic_v3_softc *sc; + device_t child; + phandle_t parent, node; + pcell_t addr_cells, size_cells; + + sc = device_get_softc(dev); + parent = ofw_bus_get_node(dev); + if (parent > 0) { + addr_cells = 2; + OF_getencprop(parent, "#address-cells", &addr_cells, + sizeof(addr_cells)); + size_cells = 2; + OF_getencprop(parent, "#size-cells", &size_cells, + sizeof(size_cells)); + /* Iterate through all GIC subordinates */ + for (node = OF_child(parent); node > 0; node = OF_peer(node)) { + /* Allocate and populate devinfo. */ + di = malloc(sizeof(*di), M_GIC_V3, M_WAITOK | M_ZERO); + + /* Read the numa node, or -1 if there is none */ + if (OF_getencprop(node, "numa-node-id", + &di->di_gic_dinfo.gic_domain, + sizeof(di->di_gic_dinfo.gic_domain)) <= 0) { + di->di_gic_dinfo.gic_domain = -1; + } + + if (ofw_bus_gen_setup_devinfo(&di->di_dinfo, node)) { + if (bootverbose) { + device_printf(dev, + "Could not set up devinfo for ITS\n"); + } + free(di, M_GIC_V3); + continue; + } + + /* Initialize and populate resource list. */ + resource_list_init(&di->di_rl); + ofw_bus_reg_to_rl(dev, node, addr_cells, size_cells, + &di->di_rl); + + /* Should not have any interrupts, so don't add any */ + + /* Add newbus device for this FDT node */ + child = device_add_child(dev, NULL, -1); + if (!child) { + if (bootverbose) { + device_printf(dev, + "Could not add child: %s\n", + di->di_dinfo.obd_name); + } + resource_list_free(&di->di_rl); + ofw_bus_gen_destroy_devinfo(&di->di_dinfo); + free(di, M_GIC_V3); + continue; + } + + sc->gic_nchildren++; + device_set_ivars(child, di); + } + } + + return (bus_generic_attach(dev)); +} diff --git a/sys/arm64/arm64/gic_v3_reg.h b/sys/arm64/arm64/gic_v3_reg.h new file mode 100644 index 000000000000..34082b1bde0a --- /dev/null +++ b/sys/arm64/arm64/gic_v3_reg.h @@ -0,0 +1,434 @@ +/*- + * Copyright (c) 2015 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Semihalf under + * the sponsorship of the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _GIC_V3_REG_H_ +#define _GIC_V3_REG_H_ + +/* + * Maximum number of interrupts + * supported by GIC (including SGIs, PPIs and SPIs) + */ +#define GIC_I_NUM_MAX (1020) +/* + * Priority MAX/MIN values + */ +#define GIC_PRIORITY_MAX (0x00UL) +/* Upper value is determined by LPI max priority */ +#define GIC_PRIORITY_MIN (0xFCUL) + +/* Numbers for shared peripheral interrupts */ +#define GIC_LAST_SPI (1019) +/* Numbers for local peripheral interrupts */ +#define GIC_FIRST_LPI (8192) + +/* + * Registers (v2/v3) + */ +/* GICD_CTLR */ +#define GICD_CTLR_G1 (1 << 0) +#define GICD_CTLR_G1A (1 << 1) +#define GICD_CTLR_ARE_NS (1 << 4) +#define GICD_CTLR_RWP (1 << 31) +/* GICD_TYPER */ +#define GICD_TYPER_IDBITS(n) ((((n) >> 19) & 0x1F) + 1) + +/* + * Registers (v3) + */ +#define GICD_IROUTER(n) (0x6000 + ((n) * 8)) + +#define GICD_PIDR4 0xFFD0 +#define GICD_PIDR5 0xFFD4 +#define GICD_PIDR6 0xFFD8 +#define GICD_PIDR7 0xFFDC +#define GICD_PIDR0 0xFFE0 +#define GICD_PIDR1 0xFFE4 +#define GICD_PIDR2 0xFFE8 + +#define GICR_PIDR2_ARCH_SHIFT 4 +#define GICR_PIDR2_ARCH_MASK 0xF0 +#define GICR_PIDR2_ARCH(x) \ + (((x) & GICR_PIDR2_ARCH_MASK) >> GICR_PIDR2_ARCH_SHIFT) +#define GICR_PIDR2_ARCH_GICv3 0x3 +#define GICR_PIDR2_ARCH_GICv4 0x4 + +#define GICD_PIDR3 0xFFEC + +/* Redistributor registers */ +#define GICR_CTLR GICD_CTLR +#define GICR_CTLR_LPI_ENABLE (1 << 0) + +#define GICR_PIDR2 GICD_PIDR2 + +#define GICR_TYPER (0x0008) +#define GICR_TYPER_PLPIS (1 << 0) +#define GICR_TYPER_VLPIS (1 << 1) +#define GICR_TYPER_LAST (1 << 4) +#define GICR_TYPER_CPUNUM_SHIFT (8) +#define GICR_TYPER_CPUNUM_MASK (0xFFFUL << GICR_TYPER_CPUNUM_SHIFT) +#define GICR_TYPER_CPUNUM(x) \ + (((x) & GICR_TYPER_CPUNUM_MASK) >> GICR_TYPER_CPUNUM_SHIFT) +#define GICR_TYPER_AFF_SHIFT (32) + +#define GICR_WAKER (0x0014) +#define GICR_WAKER_PS (1 << 1) /* Processor sleep */ +#define GICR_WAKER_CA (1 << 2) /* Children asleep */ + +#define GICR_PROPBASER (0x0070) +#define GICR_PROPBASER_IDBITS_MASK 0x1FUL +/* + * Cacheability + * 0x0 - Device-nGnRnE + * 0x1 - Normal Inner Non-cacheable + * 0x2 - Normal Inner Read-allocate, Write-through + * 0x3 - Normal Inner Read-allocate, Write-back + * 0x4 - Normal Inner Write-allocate, Write-through + * 0x5 - Normal Inner Write-allocate, Write-back + * 0x6 - Normal Inner Read-allocate, Write-allocate, Write-through + * 0x7 - Normal Inner Read-allocate, Write-allocate, Write-back + */ +#define GICR_PROPBASER_CACHE_SHIFT 7 +#define GICR_PROPBASER_CACHE_DnGnRnE 0x0UL +#define GICR_PROPBASER_CACHE_NIN 0x1UL +#define GICR_PROPBASER_CACHE_NIRAWT 0x2UL +#define GICR_PROPBASER_CACHE_NIRAWB 0x3UL +#define GICR_PROPBASER_CACHE_NIWAWT 0x4UL +#define GICR_PROPBASER_CACHE_NIWAWB 0x5UL +#define GICR_PROPBASER_CACHE_NIRAWAWT 0x6UL +#define GICR_PROPBASER_CACHE_NIRAWAWB 0x7UL +#define GICR_PROPBASER_CACHE_MASK \ + (0x7UL << GICR_PROPBASER_CACHE_SHIFT) + +/* + * Shareability + * 0x0 - Non-shareable + * 0x1 - Inner-shareable + * 0x2 - Outer-shareable + * 0x3 - Reserved. Threated as 0x0 + */ +#define GICR_PROPBASER_SHARE_SHIFT 10 +#define GICR_PROPBASER_SHARE_NS 0x0UL +#define GICR_PROPBASER_SHARE_IS 0x1UL +#define GICR_PROPBASER_SHARE_OS 0x2UL +#define GICR_PROPBASER_SHARE_RES 0x3UL +#define GICR_PROPBASER_SHARE_MASK \ + (0x3UL << GICR_PROPBASER_SHARE_SHIFT) + +#define GICR_PENDBASER (0x0078) +/* + * Cacheability + * 0x0 - Device-nGnRnE + * 0x1 - Normal Inner Non-cacheable + * 0x2 - Normal Inner Read-allocate, Write-through + * 0x3 - Normal Inner Read-allocate, Write-back + * 0x4 - Normal Inner Write-allocate, Write-through + * 0x5 - Normal Inner Write-allocate, Write-back + * 0x6 - Normal Inner Read-allocate, Write-allocate, Write-through + * 0x7 - Normal Inner Read-allocate, Write-allocate, Write-back + */ +#define GICR_PENDBASER_CACHE_SHIFT 7 +#define GICR_PENDBASER_CACHE_DnGnRnE 0x0UL +#define GICR_PENDBASER_CACHE_NIN 0x1UL +#define GICR_PENDBASER_CACHE_NIRAWT 0x2UL +#define GICR_PENDBASER_CACHE_NIRAWB 0x3UL +#define GICR_PENDBASER_CACHE_NIWAWT 0x4UL +#define GICR_PENDBASER_CACHE_NIWAWB 0x5UL +#define GICR_PENDBASER_CACHE_NIRAWAWT 0x6UL +#define GICR_PENDBASER_CACHE_NIRAWAWB 0x7UL +#define GICR_PENDBASER_CACHE_MASK \ + (0x7UL << GICR_PENDBASER_CACHE_SHIFT) + +/* + * Shareability + * 0x0 - Non-shareable + * 0x1 - Inner-shareable + * 0x2 - Outer-shareable + * 0x3 - Reserved. Threated as 0x0 + */ +#define GICR_PENDBASER_SHARE_SHIFT 10 +#define GICR_PENDBASER_SHARE_NS 0x0UL +#define GICR_PENDBASER_SHARE_IS 0x1UL +#define GICR_PENDBASER_SHARE_OS 0x2UL +#define GICR_PENDBASER_SHARE_RES 0x3UL +#define GICR_PENDBASER_SHARE_MASK \ + (0x3UL << GICR_PENDBASER_SHARE_SHIFT) + +/* Re-distributor registers for SGIs and PPIs */ +#define GICR_RD_BASE_SIZE PAGE_SIZE_64K +#define GICR_SGI_BASE_SIZE PAGE_SIZE_64K +#define GICR_VLPI_BASE_SIZE PAGE_SIZE_64K +#define GICR_RESERVED_SIZE PAGE_SIZE_64K + +#define GICR_IGROUPR0 (0x0080) +#define GICR_ISENABLER0 (0x0100) +#define GICR_ICENABLER0 (0x0180) +#define GICR_I_ENABLER_SGI_MASK (0x0000FFFF) +#define GICR_I_ENABLER_PPI_MASK (0xFFFF0000) + +#define GICR_I_PER_IPRIORITYn (GICD_I_PER_IPRIORITYn) + +/* ITS registers */ +#define GITS_PIDR2 GICR_PIDR2 +#define GITS_PIDR2_ARCH_MASK GICR_PIDR2_ARCH_MASK +#define GITS_PIDR2_ARCH_GICv3 GICR_PIDR2_ARCH_GICv3 +#define GITS_PIDR2_ARCH_GICv4 GICR_PIDR2_ARCH_GICv4 + +#define GITS_CTLR (0x0000) +#define GITS_CTLR_EN (1 << 0) + +#define GITS_IIDR (0x0004) +#define GITS_IIDR_PRODUCT_SHIFT 24 +#define GITS_IIDR_PRODUCT_MASK (0xff << GITS_IIDR_PRODUCT_SHIFT) +#define GITS_IIDR_VARIANT_SHIFT 16 +#define GITS_IIDR_VARIANT_MASK (0xf << GITS_IIDR_VARIANT_SHIFT) +#define GITS_IIDR_REVISION_SHIFT 12 +#define GITS_IIDR_REVISION_MASK (0xf << GITS_IIDR_REVISION_SHIFT) +#define GITS_IIDR_IMPLEMENTOR_SHIFT 0 +#define GITS_IIDR_IMPLEMENTOR_MASK (0xfff << GITS_IIDR_IMPLEMENTOR_SHIFT) + +#define GITS_IIDR_RAW(impl, prod, var, rev) \ + ((prod) << GITS_IIDR_PRODUCT_SHIFT | \ + (var) << GITS_IIDR_VARIANT_SHIFT | \ + (rev) << GITS_IIDR_REVISION_SHIFT | \ + (impl) << GITS_IIDR_IMPLEMENTOR_SHIFT) + +#define GITS_IIDR_IMPL_ARM (0x43B) +#define GITS_IIDR_PROD_GIC500 (0x0) +#define GITS_IIDR_IMPL_CAVIUM (0x34c) +#define GITS_IIDR_PROD_THUNDER (0xa1) +#define GITS_IIDR_VAR_THUNDER_1 (0x0) + +#define GITS_CBASER (0x0080) +#define GITS_CBASER_VALID (1UL << 63) +/* + * Cacheability + * 0x0 - Device-nGnRnE + * 0x1 - Normal Inner Non-cacheable + * 0x2 - Normal Inner Read-allocate, Write-through + * 0x3 - Normal Inner Read-allocate, Write-back + * 0x4 - Normal Inner Write-allocate, Write-through + * 0x5 - Normal Inner Write-allocate, Write-back + * 0x6 - Normal Inner Read-allocate, Write-allocate, Write-through + * 0x7 - Normal Inner Read-allocate, Write-allocate, Write-back + */ +#define GITS_CBASER_CACHE_SHIFT 59 +#define GITS_CBASER_CACHE_DnGnRnE 0x0UL +#define GITS_CBASER_CACHE_NIN 0x1UL +#define GITS_CBASER_CACHE_NIRAWT 0x2UL +#define GITS_CBASER_CACHE_NIRAWB 0x3UL +#define GITS_CBASER_CACHE_NIWAWT 0x4UL +#define GITS_CBASER_CACHE_NIWAWB 0x5UL +#define GITS_CBASER_CACHE_NIRAWAWT 0x6UL +#define GITS_CBASER_CACHE_NIRAWAWB 0x7UL +#define GITS_CBASER_CACHE_MASK (0x7UL << GITS_CBASER_CACHE_SHIFT) +/* + * Shareability + * 0x0 - Non-shareable + * 0x1 - Inner-shareable + * 0x2 - Outer-shareable + * 0x3 - Reserved. Threated as 0x0 + */ +#define GITS_CBASER_SHARE_SHIFT 10 +#define GITS_CBASER_SHARE_NS 0x0UL +#define GITS_CBASER_SHARE_IS 0x1UL +#define GITS_CBASER_SHARE_OS 0x2UL +#define GITS_CBASER_SHARE_RES 0x3UL +#define GITS_CBASER_SHARE_MASK \ + (0x3UL << GITS_CBASER_SHARE_SHIFT) + +#define GITS_CBASER_PA_SHIFT 12 +#define GITS_CBASER_PA_MASK (0xFFFFFFFFFUL << GITS_CBASER_PA_SHIFT) + +#define GITS_CWRITER (0x0088) +#define GITS_CREADR (0x0090) + +#define GITS_BASER_BASE (0x0100) +#define GITS_BASER(x) (GITS_BASER_BASE + (x) * 8) + +#define GITS_BASER_VALID (1UL << 63) + +#define GITS_BASER_TYPE_SHIFT 56 +#define GITS_BASER_TYPE(x) \ + (((x) & GITS_BASER_TYPE_MASK) >> GITS_BASER_TYPE_SHIFT) +#define GITS_BASER_TYPE_UNIMPL 0x0UL /* Unimplemented */ +#define GITS_BASER_TYPE_DEV 0x1UL /* Devices */ +#define GITS_BASER_TYPE_VP 0x2UL /* Virtual Processors */ +#define GITS_BASER_TYPE_PP 0x3UL /* Physical Processors */ +#define GITS_BASER_TYPE_IC 0x4UL /* Interrupt Collections */ +#define GITS_BASER_TYPE_RES5 0x5UL /* Reserved */ +#define GITS_BASER_TYPE_RES6 0x6UL /* Reserved */ +#define GITS_BASER_TYPE_RES7 0x7UL /* Reserved */ +#define GITS_BASER_TYPE_MASK (0x7UL << GITS_BASER_TYPE_SHIFT) +/* + * Cacheability + * 0x0 - Non-cacheable, non-bufferable + * 0x1 - Non-cacheable + * 0x2 - Read-allocate, Write-through + * 0x3 - Read-allocate, Write-back + * 0x4 - Write-allocate, Write-through + * 0x5 - Write-allocate, Write-back + * 0x6 - Read-allocate, Write-allocate, Write-through + * 0x7 - Read-allocate, Write-allocate, Write-back + */ +#define GITS_BASER_CACHE_SHIFT 59 +#define GITS_BASER_CACHE_NCNB 0x0UL +#define GITS_BASER_CACHE_NC 0x1UL +#define GITS_BASER_CACHE_RAWT 0x2UL +#define GITS_BASER_CACHE_RAWB 0x3UL +#define GITS_BASER_CACHE_WAWT 0x4UL +#define GITS_BASER_CACHE_WAWB 0x5UL +#define GITS_BASER_CACHE_RAWAWT 0x6UL +#define GITS_BASER_CACHE_RAWAWB 0x7UL +#define GITS_BASER_CACHE_MASK (0x7UL << GITS_BASER_CACHE_SHIFT) + +#define GITS_BASER_ESIZE_SHIFT 48 +#define GITS_BASER_ESIZE_MASK (0x1FUL << GITS_BASER_ESIZE_SHIFT) +#define GITS_BASER_ESIZE(x) \ + ((((x) & GITS_BASER_ESIZE_MASK) >> GITS_BASER_ESIZE_SHIFT) + 1) + +#define GITS_BASER_PA_SHIFT 12 +#define GITS_BASER_PA_MASK (0xFFFFFFFFFUL << GITS_BASER_PA_SHIFT) + +/* + * Shareability + * 0x0 - Non-shareable + * 0x1 - Inner-shareable + * 0x2 - Outer-shareable + * 0x3 - Reserved. Threated as 0x0 + */ +#define GITS_BASER_SHARE_SHIFT 10 +#define GITS_BASER_SHARE_NS 0x0UL +#define GITS_BASER_SHARE_IS 0x1UL +#define GITS_BASER_SHARE_OS 0x2UL +#define GITS_BASER_SHARE_RES 0x3UL +#define GITS_BASER_SHARE_MASK (0x3UL << GITS_BASER_SHARE_SHIFT) + +#define GITS_BASER_PSZ_SHIFT 8 +#define GITS_BASER_PSZ_4K 0x0UL +#define GITS_BASER_PSZ_16K 0x1UL +#define GITS_BASER_PSZ_64K 0x2UL +#define GITS_BASER_PSZ_MASK (0x3UL << GITS_BASER_PSZ_SHIFT) + +#define GITS_BASER_SIZE_MASK 0xFFUL + +#define GITS_BASER_NUM 8 + +#define GITS_TYPER (0x0008) +#define GITS_TYPER_PTA (1UL << 19) +#define GITS_TYPER_DEVB_SHIFT 13 +#define GITS_TYPER_DEVB_MASK (0x1FUL << GITS_TYPER_DEVB_SHIFT) +/* Number of device identifiers implemented */ +#define GITS_TYPER_DEVB(x) \ + ((((x) & GITS_TYPER_DEVB_MASK) >> GITS_TYPER_DEVB_SHIFT) + 1) +#define GITS_TYPER_ITTES_SHIFT 4 +#define GITS_TYPER_ITTES_MASK (0xFUL << GITS_TYPER_ITTES_SHIFT) +/* Number of bytes per ITT Entry */ +#define GITS_TYPER_ITTES(x) \ + ((((x) & GITS_TYPER_ITTES_MASK) >> GITS_TYPER_ITTES_SHIFT) + 1) + +#define GITS_TRANSLATER (0x10040) +/* + * LPI related + */ +#define LPI_CONF_PRIO_MASK (0xFC) +#define LPI_CONF_GROUP1 (1 << 1) +#define LPI_CONF_ENABLE (1 << 0) + +/* + * GIC 500 ITS tracking facility + */ +#define GITS_TRKCTLR 0xC000 +#define GITS_TRKR 0xC004 +#define GITS_TRKDIDR 0xC008 +#define GITS_TRKPIDR 0xC00C +#define GITS_TRKVIDR 0xC010 +#define GITS_TRKTGTR 0xC014 +#define GITS_TRKICR 0xC018 +#define GITS_TRKLCR 0xC018 + +/* + * CPU interface + */ + +/* + * Registers list (ICC_xyz_EL1): + * + * PMR - Priority Mask Register + * * interrupts of priority higher than specified + * in this mask will be signalled to the CPU. + * (0xff - lowest possible prio., 0x00 - highest prio.) + * + * CTLR - Control Register + * * controls behavior of the CPU interface and displays + * implemented features. + * + * IGRPEN1 - Interrupt Group 1 Enable Register + * + * IAR1 - Interrupt Acknowledge Register Group 1 + * * contains number of the highest priority pending + * interrupt from the Group 1. + * + * EOIR1 - End of Interrupt Register Group 1 + * * Writes inform CPU interface about completed Group 1 + * interrupts processing. + */ + +#define gic_icc_write(reg, val) \ +do { \ + WRITE_SPECIALREG(icc_ ##reg ##_el1, val); \ + isb(); \ +} while (0) + +#define gic_icc_read(reg) \ +({ \ + uint64_t val; \ + \ + val = READ_SPECIALREG(icc_ ##reg ##_el1); \ + (val); \ +}) + +#define gic_icc_set(reg, mask) \ +do { \ + uint64_t val; \ + val = gic_icc_read(reg); \ + val |= (mask); \ + gic_icc_write(reg, val); \ +} while (0) + +#define gic_icc_clear(reg, mask) \ +do { \ + uint64_t val; \ + val = gic_icc_read(reg); \ + val &= ~(mask); \ + gic_icc_write(reg, val); \ +} while (0) + +#endif /* _GIC_V3_REG_H_ */ diff --git a/sys/arm64/arm64/gic_v3_var.h b/sys/arm64/arm64/gic_v3_var.h new file mode 100644 index 000000000000..f855e425d66d --- /dev/null +++ b/sys/arm64/arm64/gic_v3_var.h @@ -0,0 +1,145 @@ +/*- + * Copyright (c) 2015 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Semihalf under + * the sponsorship of the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _GIC_V3_VAR_H_ +#define _GIC_V3_VAR_H_ + +#include <arm/arm/gic_common.h> + +#define GIC_V3_DEVSTR "ARM Generic Interrupt Controller v3.0" + +DECLARE_CLASS(gic_v3_driver); + +struct gic_v3_irqsrc; + +struct redist_pcpu { + struct resource res; /* mem resource for redist */ + vm_offset_t pend_base; + bool lpi_enabled; /* redist LPI configured? */ +}; + +struct gic_redists { + /* + * Re-Distributor region description. + * We will have few of those depending + * on the #redistributor-regions property in FDT. + */ + struct resource ** regions; + /* Number of Re-Distributor regions */ + u_int nregions; + /* Per-CPU Re-Distributor data */ + struct redist_pcpu *pcpu[MAXCPU]; +}; + +struct gic_v3_softc { + device_t dev; + struct resource ** gic_res; + struct mtx gic_mtx; + /* Distributor */ + struct resource * gic_dist; + /* Re-Distributors */ + struct gic_redists gic_redists; + + uint32_t gic_pidr2; + u_int gic_bus; + + u_int gic_nirqs; + u_int gic_idbits; + + boolean_t gic_registered; + + int gic_nchildren; + device_t *gic_children; + struct intr_pic *gic_pic; + struct gic_v3_irqsrc *gic_irqs; +}; + +struct gic_v3_devinfo { + int gic_domain; + int msi_xref; +}; + +#define GIC_INTR_ISRC(sc, irq) (&sc->gic_irqs[irq].gi_isrc) + +MALLOC_DECLARE(M_GIC_V3); + +/* ivars */ +#define GICV3_IVAR_NIRQS 1000 +/* 1001 was GICV3_IVAR_REDIST_VADDR */ +#define GICV3_IVAR_REDIST 1002 + +__BUS_ACCESSOR(gicv3, nirqs, GICV3, NIRQS, u_int); +__BUS_ACCESSOR(gicv3, redist, GICV3, REDIST, void *); + +/* Device methods */ +int gic_v3_attach(device_t dev); +int gic_v3_detach(device_t dev); +int arm_gic_v3_intr(void *); + +uint32_t gic_r_read_4(device_t, bus_size_t); +uint64_t gic_r_read_8(device_t, bus_size_t); +void gic_r_write_4(device_t, bus_size_t, uint32_t var); +void gic_r_write_8(device_t, bus_size_t, uint64_t var); + +/* + * GIC Distributor accessors. + * Notice that only GIC sofc can be passed. + */ +#define gic_d_read(sc, len, reg) \ +({ \ + bus_read_##len(sc->gic_dist, reg); \ +}) + +#define gic_d_write(sc, len, reg, val) \ +({ \ + bus_write_##len(sc->gic_dist, reg, val);\ +}) + +/* GIC Re-Distributor accessors (per-CPU) */ +#define gic_r_read(sc, len, reg) \ +({ \ + u_int cpu = PCPU_GET(cpuid); \ + \ + bus_read_##len( \ + &sc->gic_redists.pcpu[cpu]->res, \ + reg); \ +}) + +#define gic_r_write(sc, len, reg, val) \ +({ \ + u_int cpu = PCPU_GET(cpuid); \ + \ + bus_write_##len( \ + &sc->gic_redists.pcpu[cpu]->res, \ + reg, val); \ +}) + +#endif /* _GIC_V3_VAR_H_ */ diff --git a/sys/arm64/arm64/gicv3_its.c b/sys/arm64/arm64/gicv3_its.c new file mode 100644 index 000000000000..bfb069c195a5 --- /dev/null +++ b/sys/arm64/arm64/gicv3_its.c @@ -0,0 +1,1960 @@ +/*- + * Copyright (c) 2015-2016 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Andrew Turner under + * the sponsorship of the FreeBSD Foundation. + * + * This software was developed by Semihalf under + * the sponsorship of the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "opt_acpi.h" +#include "opt_platform.h" + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/bus.h> +#include <sys/cpuset.h> +#include <sys/endian.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/module.h> +#include <sys/mutex.h> +#include <sys/proc.h> +#include <sys/queue.h> +#include <sys/rman.h> +#include <sys/sbuf.h> +#include <sys/smp.h> +#include <sys/sysctl.h> +#include <sys/vmem.h> + +#include <vm/vm.h> +#include <vm/pmap.h> + +#include <machine/bus.h> +#include <machine/intr.h> + +#include <arm/arm/gic_common.h> +#include <arm64/arm64/gic_v3_reg.h> +#include <arm64/arm64/gic_v3_var.h> + +#ifdef FDT +#include <dev/ofw/openfirm.h> +#include <dev/ofw/ofw_bus.h> +#include <dev/ofw/ofw_bus_subr.h> +#endif +#include <dev/pci/pcireg.h> +#include <dev/pci/pcivar.h> + +#include "pcib_if.h" +#include "pic_if.h" +#include "msi_if.h" + +MALLOC_DEFINE(M_GICV3_ITS, "GICv3 ITS", + "ARM GICv3 Interrupt Translation Service"); + +#define LPI_NIRQS (64 * 1024) + +/* The size and alignment of the command circular buffer */ +#define ITS_CMDQ_SIZE (64 * 1024) /* Must be a multiple of 4K */ +#define ITS_CMDQ_ALIGN (64 * 1024) + +#define LPI_CONFTAB_SIZE LPI_NIRQS +#define LPI_CONFTAB_ALIGN (64 * 1024) +#define LPI_CONFTAB_MAX_ADDR ((1ul << 48) - 1) /* We need a 47 bit PA */ + +/* 1 bit per SPI, PPI, and SGI (8k), and 1 bit per LPI (LPI_CONFTAB_SIZE) */ +#define LPI_PENDTAB_SIZE ((LPI_NIRQS + GIC_FIRST_LPI) / 8) +#define LPI_PENDTAB_ALIGN (64 * 1024) +#define LPI_PENDTAB_MAX_ADDR ((1ul << 48) - 1) /* We need a 47 bit PA */ + +#define LPI_INT_TRANS_TAB_ALIGN 256 +#define LPI_INT_TRANS_TAB_MAX_ADDR ((1ul << 48) - 1) + +/* ITS commands encoding */ +#define ITS_CMD_MOVI (0x01) +#define ITS_CMD_SYNC (0x05) +#define ITS_CMD_MAPD (0x08) +#define ITS_CMD_MAPC (0x09) +#define ITS_CMD_MAPTI (0x0a) +#define ITS_CMD_MAPI (0x0b) +#define ITS_CMD_INV (0x0c) +#define ITS_CMD_INVALL (0x0d) +/* Command */ +#define CMD_COMMAND_MASK (0xFFUL) +/* PCI device ID */ +#define CMD_DEVID_SHIFT (32) +#define CMD_DEVID_MASK (0xFFFFFFFFUL << CMD_DEVID_SHIFT) +/* Size of IRQ ID bitfield */ +#define CMD_SIZE_MASK (0xFFUL) +/* Virtual LPI ID */ +#define CMD_ID_MASK (0xFFFFFFFFUL) +/* Physical LPI ID */ +#define CMD_PID_SHIFT (32) +#define CMD_PID_MASK (0xFFFFFFFFUL << CMD_PID_SHIFT) +/* Collection */ +#define CMD_COL_MASK (0xFFFFUL) +/* Target (CPU or Re-Distributor) */ +#define CMD_TARGET_SHIFT (16) +#define CMD_TARGET_MASK (0xFFFFFFFFUL << CMD_TARGET_SHIFT) +/* Interrupt Translation Table address */ +#define CMD_ITT_MASK (0xFFFFFFFFFF00UL) +/* Valid command bit */ +#define CMD_VALID_SHIFT (63) +#define CMD_VALID_MASK (1UL << CMD_VALID_SHIFT) + +#define ITS_TARGET_NONE 0xFBADBEEF + +/* LPI chunk owned by ITS device */ +struct lpi_chunk { + u_int lpi_base; + u_int lpi_free; /* First free LPI in set */ + u_int lpi_num; /* Total number of LPIs in chunk */ + u_int lpi_busy; /* Number of busy LPIs in chink */ +}; + +/* ITS device */ +struct its_dev { + TAILQ_ENTRY(its_dev) entry; + /* PCI device */ + device_t pci_dev; + /* Device ID (i.e. PCI device ID) */ + uint32_t devid; + /* List of assigned LPIs */ + struct lpi_chunk lpis; + /* Virtual address of ITT */ + vm_offset_t itt; + size_t itt_size; +}; + +/* + * ITS command descriptor. + * Idea for command description passing taken from Linux. + */ +struct its_cmd_desc { + uint8_t cmd_type; + + union { + struct { + struct its_dev *its_dev; + struct its_col *col; + uint32_t id; + } cmd_desc_movi; + + struct { + struct its_col *col; + } cmd_desc_sync; + + struct { + struct its_col *col; + uint8_t valid; + } cmd_desc_mapc; + + struct { + struct its_dev *its_dev; + struct its_col *col; + uint32_t pid; + uint32_t id; + } cmd_desc_mapvi; + + struct { + struct its_dev *its_dev; + struct its_col *col; + uint32_t pid; + } cmd_desc_mapi; + + struct { + struct its_dev *its_dev; + uint8_t valid; + } cmd_desc_mapd; + + struct { + struct its_dev *its_dev; + struct its_col *col; + uint32_t pid; + } cmd_desc_inv; + + struct { + struct its_col *col; + } cmd_desc_invall; + }; +}; + +/* ITS command. Each command is 32 bytes long */ +struct its_cmd { + uint64_t cmd_dword[4]; /* ITS command double word */ +}; + +/* An ITS private table */ +struct its_ptable { + vm_offset_t ptab_vaddr; + unsigned long ptab_size; +}; + +/* ITS collection description. */ +struct its_col { + uint64_t col_target; /* Target Re-Distributor */ + uint64_t col_id; /* Collection ID */ +}; + +struct gicv3_its_irqsrc { + struct intr_irqsrc gi_isrc; + u_int gi_id; + u_int gi_lpi; + struct its_dev *gi_its_dev; + TAILQ_ENTRY(gicv3_its_irqsrc) gi_link; +}; + +struct gicv3_its_softc { + device_t dev; + struct intr_pic *sc_pic; + struct resource *sc_its_res; + + cpuset_t sc_cpus; + u_int gic_irq_cpu; + + struct its_ptable sc_its_ptab[GITS_BASER_NUM]; + struct its_col *sc_its_cols[MAXCPU]; /* Per-CPU collections */ + + /* + * TODO: We should get these from the parent as we only want a + * single copy of each across the interrupt controller. + */ + uint8_t *sc_conf_base; + vm_offset_t sc_pend_base[MAXCPU]; + + /* Command handling */ + struct mtx sc_its_cmd_lock; + struct its_cmd *sc_its_cmd_base; /* Command circular buffer address */ + size_t sc_its_cmd_next_idx; + + vmem_t *sc_irq_alloc; + struct gicv3_its_irqsrc **sc_irqs; + u_int sc_irq_base; + u_int sc_irq_length; + u_int sc_irq_count; + + struct mtx sc_its_dev_lock; + TAILQ_HEAD(its_dev_list, its_dev) sc_its_dev_list; + TAILQ_HEAD(free_irqs, gicv3_its_irqsrc) sc_free_irqs; + +#define ITS_FLAGS_CMDQ_FLUSH 0x00000001 +#define ITS_FLAGS_LPI_CONF_FLUSH 0x00000002 +#define ITS_FLAGS_ERRATA_CAVIUM_22375 0x00000004 + u_int sc_its_flags; + bool trace_enable; +}; + +static void *conf_base; + +typedef void (its_quirk_func_t)(device_t); +static its_quirk_func_t its_quirk_cavium_22375; + +static const struct { + const char *desc; + uint32_t iidr; + uint32_t iidr_mask; + its_quirk_func_t *func; +} its_quirks[] = { + { + /* Cavium ThunderX Pass 1.x */ + .desc = "Cavium ThunderX errata: 22375, 24313", + .iidr = GITS_IIDR_RAW(GITS_IIDR_IMPL_CAVIUM, + GITS_IIDR_PROD_THUNDER, GITS_IIDR_VAR_THUNDER_1, 0), + .iidr_mask = ~GITS_IIDR_REVISION_MASK, + .func = its_quirk_cavium_22375, + }, +}; + +#define gic_its_read_4(sc, reg) \ + bus_read_4((sc)->sc_its_res, (reg)) +#define gic_its_read_8(sc, reg) \ + bus_read_8((sc)->sc_its_res, (reg)) + +#define gic_its_write_4(sc, reg, val) \ + bus_write_4((sc)->sc_its_res, (reg), (val)) +#define gic_its_write_8(sc, reg, val) \ + bus_write_8((sc)->sc_its_res, (reg), (val)) + +static device_attach_t gicv3_its_attach; +static device_detach_t gicv3_its_detach; + +static pic_disable_intr_t gicv3_its_disable_intr; +static pic_enable_intr_t gicv3_its_enable_intr; +static pic_map_intr_t gicv3_its_map_intr; +static pic_setup_intr_t gicv3_its_setup_intr; +static pic_post_filter_t gicv3_its_post_filter; +static pic_post_ithread_t gicv3_its_post_ithread; +static pic_pre_ithread_t gicv3_its_pre_ithread; +static pic_bind_intr_t gicv3_its_bind_intr; +#ifdef SMP +static pic_init_secondary_t gicv3_its_init_secondary; +#endif +static msi_alloc_msi_t gicv3_its_alloc_msi; +static msi_release_msi_t gicv3_its_release_msi; +static msi_alloc_msix_t gicv3_its_alloc_msix; +static msi_release_msix_t gicv3_its_release_msix; +static msi_map_msi_t gicv3_its_map_msi; + +static void its_cmd_movi(device_t, struct gicv3_its_irqsrc *); +static void its_cmd_mapc(device_t, struct its_col *, uint8_t); +static void its_cmd_mapti(device_t, struct gicv3_its_irqsrc *); +static void its_cmd_mapd(device_t, struct its_dev *, uint8_t); +static void its_cmd_inv(device_t, struct its_dev *, struct gicv3_its_irqsrc *); +static void its_cmd_invall(device_t, struct its_col *); + +static device_method_t gicv3_its_methods[] = { + /* Device interface */ + DEVMETHOD(device_detach, gicv3_its_detach), + + /* Interrupt controller interface */ + DEVMETHOD(pic_disable_intr, gicv3_its_disable_intr), + DEVMETHOD(pic_enable_intr, gicv3_its_enable_intr), + DEVMETHOD(pic_map_intr, gicv3_its_map_intr), + DEVMETHOD(pic_setup_intr, gicv3_its_setup_intr), + DEVMETHOD(pic_post_filter, gicv3_its_post_filter), + DEVMETHOD(pic_post_ithread, gicv3_its_post_ithread), + DEVMETHOD(pic_pre_ithread, gicv3_its_pre_ithread), +#ifdef SMP + DEVMETHOD(pic_bind_intr, gicv3_its_bind_intr), + DEVMETHOD(pic_init_secondary, gicv3_its_init_secondary), +#endif + + /* MSI/MSI-X */ + DEVMETHOD(msi_alloc_msi, gicv3_its_alloc_msi), + DEVMETHOD(msi_release_msi, gicv3_its_release_msi), + DEVMETHOD(msi_alloc_msix, gicv3_its_alloc_msix), + DEVMETHOD(msi_release_msix, gicv3_its_release_msix), + DEVMETHOD(msi_map_msi, gicv3_its_map_msi), + + /* End */ + DEVMETHOD_END +}; + +static DEFINE_CLASS_0(gic, gicv3_its_driver, gicv3_its_methods, + sizeof(struct gicv3_its_softc)); + +static void +gicv3_its_cmdq_init(struct gicv3_its_softc *sc) +{ + vm_paddr_t cmd_paddr; + uint64_t reg, tmp; + + /* Set up the command circular buffer */ + sc->sc_its_cmd_base = contigmalloc(ITS_CMDQ_SIZE, M_GICV3_ITS, + M_WAITOK | M_ZERO, 0, (1ul << 48) - 1, ITS_CMDQ_ALIGN, 0); + sc->sc_its_cmd_next_idx = 0; + + cmd_paddr = vtophys(sc->sc_its_cmd_base); + + /* Set the base of the command buffer */ + reg = GITS_CBASER_VALID | + (GITS_CBASER_CACHE_NIWAWB << GITS_CBASER_CACHE_SHIFT) | + cmd_paddr | (GITS_CBASER_SHARE_IS << GITS_CBASER_SHARE_SHIFT) | + (ITS_CMDQ_SIZE / 4096 - 1); + gic_its_write_8(sc, GITS_CBASER, reg); + + /* Read back to check for fixed value fields */ + tmp = gic_its_read_8(sc, GITS_CBASER); + + if ((tmp & GITS_CBASER_SHARE_MASK) != + (GITS_CBASER_SHARE_IS << GITS_CBASER_SHARE_SHIFT)) { + /* Check if the hardware reported non-shareable */ + if ((tmp & GITS_CBASER_SHARE_MASK) == + (GITS_CBASER_SHARE_NS << GITS_CBASER_SHARE_SHIFT)) { + /* If so remove the cache attribute */ + reg &= ~GITS_CBASER_CACHE_MASK; + reg &= ~GITS_CBASER_SHARE_MASK; + /* Set to Non-cacheable, Non-shareable */ + reg |= GITS_CBASER_CACHE_NIN << GITS_CBASER_CACHE_SHIFT; + reg |= GITS_CBASER_SHARE_NS << GITS_CBASER_SHARE_SHIFT; + + gic_its_write_8(sc, GITS_CBASER, reg); + } + + /* The command queue has to be flushed after each command */ + sc->sc_its_flags |= ITS_FLAGS_CMDQ_FLUSH; + } + + /* Get the next command from the start of the buffer */ + gic_its_write_8(sc, GITS_CWRITER, 0x0); +} + +static int +gicv3_its_table_init(device_t dev, struct gicv3_its_softc *sc) +{ + vm_offset_t table; + vm_paddr_t paddr; + uint64_t cache, reg, share, tmp, type; + size_t esize, its_tbl_size, nidents, nitspages, npages; + int i, page_size; + int devbits; + + if ((sc->sc_its_flags & ITS_FLAGS_ERRATA_CAVIUM_22375) != 0) { + /* + * GITS_TYPER[17:13] of ThunderX reports that device IDs + * are to be 21 bits in length. The entry size of the ITS + * table can be read from GITS_BASERn[52:48] and on ThunderX + * is supposed to be 8 bytes in length (for device table). + * Finally the page size that is to be used by ITS to access + * this table will be set to 64KB. + * + * This gives 0x200000 entries of size 0x8 bytes covered by + * 256 pages each of which 64KB in size. The number of pages + * (minus 1) should then be written to GITS_BASERn[7:0]. In + * that case this value would be 0xFF but on ThunderX the + * maximum value that HW accepts is 0xFD. + * + * Set an arbitrary number of device ID bits to 20 in order + * to limit the number of entries in ITS device table to + * 0x100000 and the table size to 8MB. + */ + devbits = 20; + cache = 0; + } else { + devbits = GITS_TYPER_DEVB(gic_its_read_8(sc, GITS_TYPER)); + cache = GITS_BASER_CACHE_WAWB; + } + share = GITS_BASER_SHARE_IS; + page_size = PAGE_SIZE_64K; + + for (i = 0; i < GITS_BASER_NUM; i++) { + reg = gic_its_read_8(sc, GITS_BASER(i)); + /* The type of table */ + type = GITS_BASER_TYPE(reg); + /* The table entry size */ + esize = GITS_BASER_ESIZE(reg); + + switch(type) { + case GITS_BASER_TYPE_DEV: + nidents = (1 << devbits); + its_tbl_size = esize * nidents; + its_tbl_size = roundup2(its_tbl_size, PAGE_SIZE_64K); + break; + case GITS_BASER_TYPE_VP: + case GITS_BASER_TYPE_PP: /* Undocumented? */ + case GITS_BASER_TYPE_IC: + its_tbl_size = page_size; + break; + default: + continue; + } + npages = howmany(its_tbl_size, PAGE_SIZE); + + /* Allocate the table */ + table = (vm_offset_t)contigmalloc(npages * PAGE_SIZE, + M_GICV3_ITS, M_WAITOK | M_ZERO, 0, (1ul << 48) - 1, + PAGE_SIZE_64K, 0); + + sc->sc_its_ptab[i].ptab_vaddr = table; + sc->sc_its_ptab[i].ptab_size = npages * PAGE_SIZE; + + paddr = vtophys(table); + + while (1) { + nitspages = howmany(its_tbl_size, page_size); + + /* Clear the fields we will be setting */ + reg &= ~(GITS_BASER_VALID | + GITS_BASER_CACHE_MASK | GITS_BASER_TYPE_MASK | + GITS_BASER_ESIZE_MASK | GITS_BASER_PA_MASK | + GITS_BASER_SHARE_MASK | GITS_BASER_PSZ_MASK | + GITS_BASER_SIZE_MASK); + /* Set the new values */ + reg |= GITS_BASER_VALID | + (cache << GITS_BASER_CACHE_SHIFT) | + (type << GITS_BASER_TYPE_SHIFT) | + ((esize - 1) << GITS_BASER_ESIZE_SHIFT) | + paddr | (share << GITS_BASER_SHARE_SHIFT) | + (nitspages - 1); + + switch (page_size) { + case PAGE_SIZE: /* 4KB */ + reg |= + GITS_BASER_PSZ_4K << GITS_BASER_PSZ_SHIFT; + break; + case PAGE_SIZE_16K: /* 16KB */ + reg |= + GITS_BASER_PSZ_16K << GITS_BASER_PSZ_SHIFT; + break; + case PAGE_SIZE_64K: /* 64KB */ + reg |= + GITS_BASER_PSZ_64K << GITS_BASER_PSZ_SHIFT; + break; + } + + gic_its_write_8(sc, GITS_BASER(i), reg); + + /* Read back to check */ + tmp = gic_its_read_8(sc, GITS_BASER(i)); + + /* Do the shareability masks line up? */ + if ((tmp & GITS_BASER_SHARE_MASK) != + (reg & GITS_BASER_SHARE_MASK)) { + share = (tmp & GITS_BASER_SHARE_MASK) >> + GITS_BASER_SHARE_SHIFT; + continue; + } + + if ((tmp & GITS_BASER_PSZ_MASK) != + (reg & GITS_BASER_PSZ_MASK)) { + switch (page_size) { + case PAGE_SIZE_16K: + page_size = PAGE_SIZE; + continue; + case PAGE_SIZE_64K: + page_size = PAGE_SIZE_16K; + continue; + } + } + + if (tmp != reg) { + device_printf(dev, "GITS_BASER%d: " + "unable to be updated: %lx != %lx\n", + i, reg, tmp); + return (ENXIO); + } + + /* We should have made all needed changes */ + break; + } + } + + return (0); +} + +static void +gicv3_its_conftable_init(struct gicv3_its_softc *sc) +{ + void *conf_table; + + conf_table = atomic_load_ptr(&conf_base); + if (conf_table == NULL) { + conf_table = contigmalloc(LPI_CONFTAB_SIZE, + M_GICV3_ITS, M_WAITOK, 0, LPI_CONFTAB_MAX_ADDR, + LPI_CONFTAB_ALIGN, 0); + + if (atomic_cmpset_ptr((uintptr_t *)&conf_base, + (uintptr_t)NULL, (uintptr_t)conf_table) == 0) { + contigfree(conf_table, LPI_CONFTAB_SIZE, M_GICV3_ITS); + conf_table = atomic_load_ptr(&conf_base); + } + } + sc->sc_conf_base = conf_table; + + /* Set the default configuration */ + memset(sc->sc_conf_base, GIC_PRIORITY_MAX | LPI_CONF_GROUP1, + LPI_CONFTAB_SIZE); + + /* Flush the table to memory */ + cpu_dcache_wb_range((vm_offset_t)sc->sc_conf_base, LPI_CONFTAB_SIZE); +} + +static void +gicv3_its_pendtables_init(struct gicv3_its_softc *sc) +{ + int i; + + for (i = 0; i <= mp_maxid; i++) { + if (CPU_ISSET(i, &sc->sc_cpus) == 0) + continue; + + sc->sc_pend_base[i] = (vm_offset_t)contigmalloc( + LPI_PENDTAB_SIZE, M_GICV3_ITS, M_WAITOK | M_ZERO, + 0, LPI_PENDTAB_MAX_ADDR, LPI_PENDTAB_ALIGN, 0); + + /* Flush so the ITS can see the memory */ + cpu_dcache_wb_range((vm_offset_t)sc->sc_pend_base[i], + LPI_PENDTAB_SIZE); + } +} + +static void +its_init_cpu_lpi(device_t dev, struct gicv3_its_softc *sc) +{ + device_t gicv3; + uint64_t xbaser, tmp; + uint32_t ctlr; + u_int cpuid; + + gicv3 = device_get_parent(dev); + cpuid = PCPU_GET(cpuid); + + /* Disable LPIs */ + ctlr = gic_r_read_4(gicv3, GICR_CTLR); + ctlr &= ~GICR_CTLR_LPI_ENABLE; + gic_r_write_4(gicv3, GICR_CTLR, ctlr); + + /* Make sure changes are observable my the GIC */ + dsb(sy); + + /* + * Set the redistributor base + */ + xbaser = vtophys(sc->sc_conf_base) | + (GICR_PROPBASER_SHARE_IS << GICR_PROPBASER_SHARE_SHIFT) | + (GICR_PROPBASER_CACHE_NIWAWB << GICR_PROPBASER_CACHE_SHIFT) | + (flsl(LPI_CONFTAB_SIZE | GIC_FIRST_LPI) - 1); + gic_r_write_8(gicv3, GICR_PROPBASER, xbaser); + + /* Check the cache attributes we set */ + tmp = gic_r_read_8(gicv3, GICR_PROPBASER); + + if ((tmp & GICR_PROPBASER_SHARE_MASK) != + (xbaser & GICR_PROPBASER_SHARE_MASK)) { + if ((tmp & GICR_PROPBASER_SHARE_MASK) == + (GICR_PROPBASER_SHARE_NS << GICR_PROPBASER_SHARE_SHIFT)) { + /* We need to mark as non-cacheable */ + xbaser &= ~(GICR_PROPBASER_SHARE_MASK | + GICR_PROPBASER_CACHE_MASK); + /* Non-cacheable */ + xbaser |= GICR_PROPBASER_CACHE_NIN << + GICR_PROPBASER_CACHE_SHIFT; + /* Non-sareable */ + xbaser |= GICR_PROPBASER_SHARE_NS << + GICR_PROPBASER_SHARE_SHIFT; + gic_r_write_8(gicv3, GICR_PROPBASER, xbaser); + } + sc->sc_its_flags |= ITS_FLAGS_LPI_CONF_FLUSH; + } + + /* + * Set the LPI pending table base + */ + xbaser = vtophys(sc->sc_pend_base[cpuid]) | + (GICR_PENDBASER_CACHE_NIWAWB << GICR_PENDBASER_CACHE_SHIFT) | + (GICR_PENDBASER_SHARE_IS << GICR_PENDBASER_SHARE_SHIFT); + + gic_r_write_8(gicv3, GICR_PENDBASER, xbaser); + + tmp = gic_r_read_8(gicv3, GICR_PENDBASER); + + if ((tmp & GICR_PENDBASER_SHARE_MASK) == + (GICR_PENDBASER_SHARE_NS << GICR_PENDBASER_SHARE_SHIFT)) { + /* Clear the cahce and shareability bits */ + xbaser &= ~(GICR_PENDBASER_CACHE_MASK | + GICR_PENDBASER_SHARE_MASK); + /* Mark as non-shareable */ + xbaser |= GICR_PENDBASER_SHARE_NS << GICR_PENDBASER_SHARE_SHIFT; + /* And non-cacheable */ + xbaser |= GICR_PENDBASER_CACHE_NIN << + GICR_PENDBASER_CACHE_SHIFT; + } + + /* Enable LPIs */ + ctlr = gic_r_read_4(gicv3, GICR_CTLR); + ctlr |= GICR_CTLR_LPI_ENABLE; + gic_r_write_4(gicv3, GICR_CTLR, ctlr); + + /* Make sure the GIC has seen everything */ + dsb(sy); +} + +static int +its_init_cpu(device_t dev, struct gicv3_its_softc *sc) +{ + device_t gicv3; + vm_paddr_t target; + u_int cpuid; + struct redist_pcpu *rpcpu; + + gicv3 = device_get_parent(dev); + cpuid = PCPU_GET(cpuid); + if (!CPU_ISSET(cpuid, &sc->sc_cpus)) + return (0); + + /* Check if the ITS is enabled on this CPU */ + if ((gic_r_read_4(gicv3, GICR_TYPER) & GICR_TYPER_PLPIS) == 0) + return (ENXIO); + + rpcpu = gicv3_get_redist(dev); + + /* Do per-cpu LPI init once */ + if (!rpcpu->lpi_enabled) { + its_init_cpu_lpi(dev, sc); + rpcpu->lpi_enabled = true; + } + + if ((gic_its_read_8(sc, GITS_TYPER) & GITS_TYPER_PTA) != 0) { + /* This ITS wants the redistributor physical address */ + target = vtophys(rman_get_virtual(&rpcpu->res)); + } else { + /* This ITS wants the unique processor number */ + target = GICR_TYPER_CPUNUM(gic_r_read_8(gicv3, GICR_TYPER)) << + CMD_TARGET_SHIFT; + } + + sc->sc_its_cols[cpuid]->col_target = target; + sc->sc_its_cols[cpuid]->col_id = cpuid; + + its_cmd_mapc(dev, sc->sc_its_cols[cpuid], 1); + its_cmd_invall(dev, sc->sc_its_cols[cpuid]); + + return (0); +} + +static int +gicv3_its_sysctl_trace_enable(SYSCTL_HANDLER_ARGS) +{ + struct gicv3_its_softc *sc; + int rv; + + sc = arg1; + + rv = sysctl_handle_bool(oidp, &sc->trace_enable, 0, req); + if (rv != 0 || req->newptr == NULL) + return (rv); + if (sc->trace_enable) + gic_its_write_8(sc, GITS_TRKCTLR, 3); + else + gic_its_write_8(sc, GITS_TRKCTLR, 0); + + return (0); +} + +static int +gicv3_its_sysctl_trace_regs(SYSCTL_HANDLER_ARGS) +{ + struct gicv3_its_softc *sc; + struct sbuf *sb; + int err; + + sc = arg1; + sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); + if (sb == NULL) { + device_printf(sc->dev, "Could not allocate sbuf for output.\n"); + return (ENOMEM); + } + sbuf_cat(sb, "\n"); + sbuf_printf(sb, "GITS_TRKCTLR: 0x%08X\n", + gic_its_read_4(sc, GITS_TRKCTLR)); + sbuf_printf(sb, "GITS_TRKR: 0x%08X\n", + gic_its_read_4(sc, GITS_TRKR)); + sbuf_printf(sb, "GITS_TRKDIDR: 0x%08X\n", + gic_its_read_4(sc, GITS_TRKDIDR)); + sbuf_printf(sb, "GITS_TRKPIDR: 0x%08X\n", + gic_its_read_4(sc, GITS_TRKPIDR)); + sbuf_printf(sb, "GITS_TRKVIDR: 0x%08X\n", + gic_its_read_4(sc, GITS_TRKVIDR)); + sbuf_printf(sb, "GITS_TRKTGTR: 0x%08X\n", + gic_its_read_4(sc, GITS_TRKTGTR)); + + err = sbuf_finish(sb); + if (err) + device_printf(sc->dev, "Error finishing sbuf: %d\n", err); + sbuf_delete(sb); + return(err); +} + +static int +gicv3_its_init_sysctl(struct gicv3_its_softc *sc) +{ + struct sysctl_oid *oid, *child; + struct sysctl_ctx_list *ctx_list; + + ctx_list = device_get_sysctl_ctx(sc->dev); + child = device_get_sysctl_tree(sc->dev); + oid = SYSCTL_ADD_NODE(ctx_list, + SYSCTL_CHILDREN(child), OID_AUTO, "tracing", + CTLFLAG_RD| CTLFLAG_MPSAFE, NULL, "Messages tracing"); + if (oid == NULL) + return (ENXIO); + + /* Add registers */ + SYSCTL_ADD_PROC(ctx_list, + SYSCTL_CHILDREN(oid), OID_AUTO, "enable", + CTLTYPE_U8 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, + gicv3_its_sysctl_trace_enable, "CU", "Enable tracing"); + SYSCTL_ADD_PROC(ctx_list, + SYSCTL_CHILDREN(oid), OID_AUTO, "capture", + CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, + gicv3_its_sysctl_trace_regs, "", "Captured tracing registers."); + + return (0); +} + +static int +gicv3_its_attach(device_t dev) +{ + struct gicv3_its_softc *sc; + uint32_t iidr; + int domain, err, i, rid; + + sc = device_get_softc(dev); + + sc->sc_irq_length = gicv3_get_nirqs(dev); + sc->sc_irq_base = GIC_FIRST_LPI; + sc->sc_irq_base += device_get_unit(dev) * sc->sc_irq_length; + + rid = 0; + sc->sc_its_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, + RF_ACTIVE); + if (sc->sc_its_res == NULL) { + device_printf(dev, "Could not allocate memory\n"); + return (ENXIO); + } + + iidr = gic_its_read_4(sc, GITS_IIDR); + for (i = 0; i < nitems(its_quirks); i++) { + if ((iidr & its_quirks[i].iidr_mask) == its_quirks[i].iidr) { + if (bootverbose) { + device_printf(dev, "Applying %s\n", + its_quirks[i].desc); + } + its_quirks[i].func(dev); + break; + } + } + + /* Allocate the private tables */ + err = gicv3_its_table_init(dev, sc); + if (err != 0) + return (err); + + /* Protects access to the device list */ + mtx_init(&sc->sc_its_dev_lock, "ITS device lock", NULL, MTX_SPIN); + + /* Protects access to the ITS command circular buffer. */ + mtx_init(&sc->sc_its_cmd_lock, "ITS cmd lock", NULL, MTX_SPIN); + + CPU_ZERO(&sc->sc_cpus); + if (bus_get_domain(dev, &domain) == 0) { + if (domain < MAXMEMDOM) + CPU_COPY(&cpuset_domain[domain], &sc->sc_cpus); + } else { + CPU_COPY(&all_cpus, &sc->sc_cpus); + } + + /* Allocate the command circular buffer */ + gicv3_its_cmdq_init(sc); + + /* Allocate the per-CPU collections */ + for (int cpu = 0; cpu <= mp_maxid; cpu++) + if (CPU_ISSET(cpu, &sc->sc_cpus) != 0) + sc->sc_its_cols[cpu] = malloc( + sizeof(*sc->sc_its_cols[0]), M_GICV3_ITS, + M_WAITOK | M_ZERO); + else + sc->sc_its_cols[cpu] = NULL; + + /* Enable the ITS */ + gic_its_write_4(sc, GITS_CTLR, + gic_its_read_4(sc, GITS_CTLR) | GITS_CTLR_EN); + + /* Create the LPI configuration table */ + gicv3_its_conftable_init(sc); + + /* And the pending tebles */ + gicv3_its_pendtables_init(sc); + + /* Enable LPIs on this CPU */ + its_init_cpu(dev, sc); + + TAILQ_INIT(&sc->sc_its_dev_list); + TAILQ_INIT(&sc->sc_free_irqs); + + /* + * Create the vmem object to allocate INTRNG IRQs from. We try to + * use all IRQs not already used by the GICv3. + * XXX: This assumes there are no other interrupt controllers in the + * system. + */ + sc->sc_irq_alloc = vmem_create(device_get_nameunit(dev), 0, + gicv3_get_nirqs(dev), 1, 0, M_FIRSTFIT | M_WAITOK); + + sc->sc_irqs = malloc(sizeof(*sc->sc_irqs) * sc->sc_irq_length, + M_GICV3_ITS, M_WAITOK | M_ZERO); + + /* For GIC-500 install tracking sysctls. */ + if ((iidr & (GITS_IIDR_PRODUCT_MASK | GITS_IIDR_IMPLEMENTOR_MASK)) == + GITS_IIDR_RAW(GITS_IIDR_IMPL_ARM, GITS_IIDR_PROD_GIC500, 0, 0)) + gicv3_its_init_sysctl(sc); + + return (0); +} + +static int +gicv3_its_detach(device_t dev) +{ + + return (ENXIO); +} + +static void +its_quirk_cavium_22375(device_t dev) +{ + struct gicv3_its_softc *sc; + + sc = device_get_softc(dev); + sc->sc_its_flags |= ITS_FLAGS_ERRATA_CAVIUM_22375; +} + +static void +gicv3_its_disable_intr(device_t dev, struct intr_irqsrc *isrc) +{ + struct gicv3_its_softc *sc; + struct gicv3_its_irqsrc *girq; + uint8_t *conf; + + sc = device_get_softc(dev); + girq = (struct gicv3_its_irqsrc *)isrc; + conf = sc->sc_conf_base; + + conf[girq->gi_lpi] &= ~LPI_CONF_ENABLE; + + if ((sc->sc_its_flags & ITS_FLAGS_LPI_CONF_FLUSH) != 0) { + /* Clean D-cache under command. */ + cpu_dcache_wb_range((vm_offset_t)&conf[girq->gi_lpi], 1); + } else { + /* DSB inner shareable, store */ + dsb(ishst); + } + + its_cmd_inv(dev, girq->gi_its_dev, girq); +} + +static void +gicv3_its_enable_intr(device_t dev, struct intr_irqsrc *isrc) +{ + struct gicv3_its_softc *sc; + struct gicv3_its_irqsrc *girq; + uint8_t *conf; + + sc = device_get_softc(dev); + girq = (struct gicv3_its_irqsrc *)isrc; + conf = sc->sc_conf_base; + + conf[girq->gi_lpi] |= LPI_CONF_ENABLE; + + if ((sc->sc_its_flags & ITS_FLAGS_LPI_CONF_FLUSH) != 0) { + /* Clean D-cache under command. */ + cpu_dcache_wb_range((vm_offset_t)&conf[girq->gi_lpi], 1); + } else { + /* DSB inner shareable, store */ + dsb(ishst); + } + + its_cmd_inv(dev, girq->gi_its_dev, girq); +} + +static int +gicv3_its_intr(void *arg, uintptr_t irq) +{ + struct gicv3_its_softc *sc = arg; + struct gicv3_its_irqsrc *girq; + struct trapframe *tf; + + irq -= sc->sc_irq_base; + girq = sc->sc_irqs[irq]; + if (girq == NULL) + panic("gicv3_its_intr: Invalid interrupt %ld", + irq + sc->sc_irq_base); + + tf = curthread->td_intr_frame; + intr_isrc_dispatch(&girq->gi_isrc, tf); + return (FILTER_HANDLED); +} + +static void +gicv3_its_pre_ithread(device_t dev, struct intr_irqsrc *isrc) +{ + struct gicv3_its_irqsrc *girq; + struct gicv3_its_softc *sc; + + sc = device_get_softc(dev); + girq = (struct gicv3_its_irqsrc *)isrc; + gicv3_its_disable_intr(dev, isrc); + gic_icc_write(EOIR1, girq->gi_lpi + GIC_FIRST_LPI); +} + +static void +gicv3_its_post_ithread(device_t dev, struct intr_irqsrc *isrc) +{ + + gicv3_its_enable_intr(dev, isrc); +} + +static void +gicv3_its_post_filter(device_t dev, struct intr_irqsrc *isrc) +{ + struct gicv3_its_irqsrc *girq; + struct gicv3_its_softc *sc; + + sc = device_get_softc(dev); + girq = (struct gicv3_its_irqsrc *)isrc; + gic_icc_write(EOIR1, girq->gi_lpi + GIC_FIRST_LPI); +} + +static int +gicv3_its_select_cpu(device_t dev, struct intr_irqsrc *isrc) +{ + struct gicv3_its_softc *sc; + + sc = device_get_softc(dev); + if (CPU_EMPTY(&isrc->isrc_cpu)) { + sc->gic_irq_cpu = intr_irq_next_cpu(sc->gic_irq_cpu, + &sc->sc_cpus); + CPU_SETOF(sc->gic_irq_cpu, &isrc->isrc_cpu); + } + + return (0); +} + +static int +gicv3_its_bind_intr(device_t dev, struct intr_irqsrc *isrc) +{ + struct gicv3_its_irqsrc *girq; + + gicv3_its_select_cpu(dev, isrc); + + girq = (struct gicv3_its_irqsrc *)isrc; + its_cmd_movi(dev, girq); + return (0); +} + +static int +gicv3_its_map_intr(device_t dev, struct intr_map_data *data, + struct intr_irqsrc **isrcp) +{ + + /* + * This should never happen, we only call this function to map + * interrupts found before the controller driver is ready. + */ + panic("gicv3_its_map_intr: Unable to map a MSI interrupt"); +} + +static int +gicv3_its_setup_intr(device_t dev, struct intr_irqsrc *isrc, + struct resource *res, struct intr_map_data *data) +{ + + /* Bind the interrupt to a CPU */ + gicv3_its_bind_intr(dev, isrc); + + return (0); +} + +#ifdef SMP +static void +gicv3_its_init_secondary(device_t dev) +{ + struct gicv3_its_softc *sc; + + sc = device_get_softc(dev); + + /* + * This is fatal as otherwise we may bind interrupts to this CPU. + * We need a way to tell the interrupt framework to only bind to a + * subset of given CPUs when it performs the shuffle. + */ + if (its_init_cpu(dev, sc) != 0) + panic("gicv3_its_init_secondary: No usable ITS on CPU%d", + PCPU_GET(cpuid)); +} +#endif + +static uint32_t +its_get_devid(device_t pci_dev) +{ + uintptr_t id; + + if (pci_get_id(pci_dev, PCI_ID_MSI, &id) != 0) + panic("its_get_devid: Unable to get the MSI DeviceID"); + + return (id); +} + +static struct its_dev * +its_device_find(device_t dev, device_t child) +{ + struct gicv3_its_softc *sc; + struct its_dev *its_dev = NULL; + + sc = device_get_softc(dev); + + mtx_lock_spin(&sc->sc_its_dev_lock); + TAILQ_FOREACH(its_dev, &sc->sc_its_dev_list, entry) { + if (its_dev->pci_dev == child) + break; + } + mtx_unlock_spin(&sc->sc_its_dev_lock); + + return (its_dev); +} + +static struct its_dev * +its_device_get(device_t dev, device_t child, u_int nvecs) +{ + struct gicv3_its_softc *sc; + struct its_dev *its_dev; + vmem_addr_t irq_base; + size_t esize; + + sc = device_get_softc(dev); + + its_dev = its_device_find(dev, child); + if (its_dev != NULL) + return (its_dev); + + its_dev = malloc(sizeof(*its_dev), M_GICV3_ITS, M_NOWAIT | M_ZERO); + if (its_dev == NULL) + return (NULL); + + its_dev->pci_dev = child; + its_dev->devid = its_get_devid(child); + + its_dev->lpis.lpi_busy = 0; + its_dev->lpis.lpi_num = nvecs; + its_dev->lpis.lpi_free = nvecs; + + if (vmem_alloc(sc->sc_irq_alloc, nvecs, M_FIRSTFIT | M_NOWAIT, + &irq_base) != 0) { + free(its_dev, M_GICV3_ITS); + return (NULL); + } + its_dev->lpis.lpi_base = irq_base; + + /* Get ITT entry size */ + esize = GITS_TYPER_ITTES(gic_its_read_8(sc, GITS_TYPER)); + + /* + * Allocate ITT for this device. + * PA has to be 256 B aligned. At least two entries for device. + */ + its_dev->itt_size = roundup2(MAX(nvecs, 2) * esize, 256); + its_dev->itt = (vm_offset_t)contigmalloc(its_dev->itt_size, + M_GICV3_ITS, M_NOWAIT | M_ZERO, 0, LPI_INT_TRANS_TAB_MAX_ADDR, + LPI_INT_TRANS_TAB_ALIGN, 0); + if (its_dev->itt == 0) { + vmem_free(sc->sc_irq_alloc, its_dev->lpis.lpi_base, nvecs); + free(its_dev, M_GICV3_ITS); + return (NULL); + } + + mtx_lock_spin(&sc->sc_its_dev_lock); + TAILQ_INSERT_TAIL(&sc->sc_its_dev_list, its_dev, entry); + mtx_unlock_spin(&sc->sc_its_dev_lock); + + /* Map device to its ITT */ + its_cmd_mapd(dev, its_dev, 1); + + return (its_dev); +} + +static void +its_device_release(device_t dev, struct its_dev *its_dev) +{ + struct gicv3_its_softc *sc; + + KASSERT(its_dev->lpis.lpi_busy == 0, + ("its_device_release: Trying to release an inuse ITS device")); + + /* Unmap device in ITS */ + its_cmd_mapd(dev, its_dev, 0); + + sc = device_get_softc(dev); + + /* Remove the device from the list of devices */ + mtx_lock_spin(&sc->sc_its_dev_lock); + TAILQ_REMOVE(&sc->sc_its_dev_list, its_dev, entry); + mtx_unlock_spin(&sc->sc_its_dev_lock); + + /* Free ITT */ + KASSERT(its_dev->itt != 0, ("Invalid ITT in valid ITS device")); + contigfree((void *)its_dev->itt, its_dev->itt_size, M_GICV3_ITS); + + /* Free the IRQ allocation */ + vmem_free(sc->sc_irq_alloc, its_dev->lpis.lpi_base, + its_dev->lpis.lpi_num); + + free(its_dev, M_GICV3_ITS); +} + +static struct gicv3_its_irqsrc * +gicv3_its_alloc_irqsrc(device_t dev, struct gicv3_its_softc *sc, u_int irq) +{ + struct gicv3_its_irqsrc *girq = NULL; + + KASSERT(sc->sc_irqs[irq] == NULL, + ("%s: Interrupt %u already allocated", __func__, irq)); + mtx_lock_spin(&sc->sc_its_dev_lock); + if (!TAILQ_EMPTY(&sc->sc_free_irqs)) { + girq = TAILQ_FIRST(&sc->sc_free_irqs); + TAILQ_REMOVE(&sc->sc_free_irqs, girq, gi_link); + } + mtx_unlock_spin(&sc->sc_its_dev_lock); + if (girq == NULL) { + girq = malloc(sizeof(*girq), M_GICV3_ITS, + M_NOWAIT | M_ZERO); + if (girq == NULL) + return (NULL); + girq->gi_id = -1; + if (intr_isrc_register(&girq->gi_isrc, dev, 0, + "%s,%u", device_get_nameunit(dev), irq) != 0) { + free(girq, M_GICV3_ITS); + return (NULL); + } + } + girq->gi_lpi = irq + sc->sc_irq_base - GIC_FIRST_LPI; + sc->sc_irqs[irq] = girq; + + return (girq); +} + +static void +gicv3_its_release_irqsrc(struct gicv3_its_softc *sc, + struct gicv3_its_irqsrc *girq) +{ + u_int irq; + + mtx_assert(&sc->sc_its_dev_lock, MA_OWNED); + + irq = girq->gi_lpi + GIC_FIRST_LPI - sc->sc_irq_base; + sc->sc_irqs[irq] = NULL; + + girq->gi_id = -1; + girq->gi_its_dev = NULL; + TAILQ_INSERT_TAIL(&sc->sc_free_irqs, girq, gi_link); +} + +static int +gicv3_its_alloc_msi(device_t dev, device_t child, int count, int maxcount, + device_t *pic, struct intr_irqsrc **srcs) +{ + struct gicv3_its_softc *sc; + struct gicv3_its_irqsrc *girq; + struct its_dev *its_dev; + u_int irq; + int i; + + its_dev = its_device_get(dev, child, count); + if (its_dev == NULL) + return (ENXIO); + + KASSERT(its_dev->lpis.lpi_free >= count, + ("gicv3_its_alloc_msi: No free LPIs")); + sc = device_get_softc(dev); + irq = its_dev->lpis.lpi_base + its_dev->lpis.lpi_num - + its_dev->lpis.lpi_free; + + /* Allocate the irqsrc for each MSI */ + for (i = 0; i < count; i++, irq++) { + its_dev->lpis.lpi_free--; + srcs[i] = (struct intr_irqsrc *)gicv3_its_alloc_irqsrc(dev, + sc, irq); + if (srcs[i] == NULL) + break; + } + + /* The allocation failed, release them */ + if (i != count) { + mtx_lock_spin(&sc->sc_its_dev_lock); + for (i = 0; i < count; i++) { + girq = (struct gicv3_its_irqsrc *)srcs[i]; + if (girq == NULL) + break; + gicv3_its_release_irqsrc(sc, girq); + srcs[i] = NULL; + } + mtx_unlock_spin(&sc->sc_its_dev_lock); + return (ENXIO); + } + + /* Finish the allocation now we have all MSI irqsrcs */ + for (i = 0; i < count; i++) { + girq = (struct gicv3_its_irqsrc *)srcs[i]; + girq->gi_id = i; + girq->gi_its_dev = its_dev; + + /* Map the message to the given IRQ */ + gicv3_its_select_cpu(dev, (struct intr_irqsrc *)girq); + its_cmd_mapti(dev, girq); + } + its_dev->lpis.lpi_busy += count; + *pic = dev; + + return (0); +} + +static int +gicv3_its_release_msi(device_t dev, device_t child, int count, + struct intr_irqsrc **isrc) +{ + struct gicv3_its_softc *sc; + struct gicv3_its_irqsrc *girq; + struct its_dev *its_dev; + int i; + + its_dev = its_device_find(dev, child); + + KASSERT(its_dev != NULL, + ("gicv3_its_release_msi: Releasing a MSI interrupt with " + "no ITS device")); + KASSERT(its_dev->lpis.lpi_busy >= count, + ("gicv3_its_release_msi: Releasing more interrupts than " + "were allocated: releasing %d, allocated %d", count, + its_dev->lpis.lpi_busy)); + + sc = device_get_softc(dev); + mtx_lock_spin(&sc->sc_its_dev_lock); + for (i = 0; i < count; i++) { + girq = (struct gicv3_its_irqsrc *)isrc[i]; + gicv3_its_release_irqsrc(sc, girq); + } + mtx_unlock_spin(&sc->sc_its_dev_lock); + its_dev->lpis.lpi_busy -= count; + + if (its_dev->lpis.lpi_busy == 0) + its_device_release(dev, its_dev); + + return (0); +} + +static int +gicv3_its_alloc_msix(device_t dev, device_t child, device_t *pic, + struct intr_irqsrc **isrcp) +{ + struct gicv3_its_softc *sc; + struct gicv3_its_irqsrc *girq; + struct its_dev *its_dev; + u_int nvecs, irq; + + nvecs = pci_msix_count(child); + its_dev = its_device_get(dev, child, nvecs); + if (its_dev == NULL) + return (ENXIO); + + KASSERT(its_dev->lpis.lpi_free > 0, + ("gicv3_its_alloc_msix: No free LPIs")); + sc = device_get_softc(dev); + irq = its_dev->lpis.lpi_base + its_dev->lpis.lpi_num - + its_dev->lpis.lpi_free; + + girq = gicv3_its_alloc_irqsrc(dev, sc, irq); + if (girq == NULL) + return (ENXIO); + girq->gi_id = its_dev->lpis.lpi_busy; + girq->gi_its_dev = its_dev; + + its_dev->lpis.lpi_free--; + its_dev->lpis.lpi_busy++; + + /* Map the message to the given IRQ */ + gicv3_its_select_cpu(dev, (struct intr_irqsrc *)girq); + its_cmd_mapti(dev, girq); + + *pic = dev; + *isrcp = (struct intr_irqsrc *)girq; + + return (0); +} + +static int +gicv3_its_release_msix(device_t dev, device_t child, struct intr_irqsrc *isrc) +{ + struct gicv3_its_softc *sc; + struct gicv3_its_irqsrc *girq; + struct its_dev *its_dev; + + its_dev = its_device_find(dev, child); + + KASSERT(its_dev != NULL, + ("gicv3_its_release_msix: Releasing a MSI-X interrupt with " + "no ITS device")); + KASSERT(its_dev->lpis.lpi_busy > 0, + ("gicv3_its_release_msix: Releasing more interrupts than " + "were allocated: allocated %d", its_dev->lpis.lpi_busy)); + + sc = device_get_softc(dev); + girq = (struct gicv3_its_irqsrc *)isrc; + gicv3_its_release_irqsrc(sc, girq); + its_dev->lpis.lpi_busy--; + + if (its_dev->lpis.lpi_busy == 0) + its_device_release(dev, its_dev); + + return (0); +} + +static int +gicv3_its_map_msi(device_t dev, device_t child, struct intr_irqsrc *isrc, + uint64_t *addr, uint32_t *data) +{ + struct gicv3_its_softc *sc; + struct gicv3_its_irqsrc *girq; + + sc = device_get_softc(dev); + girq = (struct gicv3_its_irqsrc *)isrc; + + *addr = vtophys(rman_get_virtual(sc->sc_its_res)) + GITS_TRANSLATER; + *data = girq->gi_id; + + return (0); +} + +/* + * Commands handling. + */ + +static __inline void +cmd_format_command(struct its_cmd *cmd, uint8_t cmd_type) +{ + /* Command field: DW0 [7:0] */ + cmd->cmd_dword[0] &= htole64(~CMD_COMMAND_MASK); + cmd->cmd_dword[0] |= htole64(cmd_type); +} + +static __inline void +cmd_format_devid(struct its_cmd *cmd, uint32_t devid) +{ + /* Device ID field: DW0 [63:32] */ + cmd->cmd_dword[0] &= htole64(~CMD_DEVID_MASK); + cmd->cmd_dword[0] |= htole64((uint64_t)devid << CMD_DEVID_SHIFT); +} + +static __inline void +cmd_format_size(struct its_cmd *cmd, uint16_t size) +{ + /* Size field: DW1 [4:0] */ + cmd->cmd_dword[1] &= htole64(~CMD_SIZE_MASK); + cmd->cmd_dword[1] |= htole64((size & CMD_SIZE_MASK)); +} + +static __inline void +cmd_format_id(struct its_cmd *cmd, uint32_t id) +{ + /* ID field: DW1 [31:0] */ + cmd->cmd_dword[1] &= htole64(~CMD_ID_MASK); + cmd->cmd_dword[1] |= htole64(id); +} + +static __inline void +cmd_format_pid(struct its_cmd *cmd, uint32_t pid) +{ + /* Physical ID field: DW1 [63:32] */ + cmd->cmd_dword[1] &= htole64(~CMD_PID_MASK); + cmd->cmd_dword[1] |= htole64((uint64_t)pid << CMD_PID_SHIFT); +} + +static __inline void +cmd_format_col(struct its_cmd *cmd, uint16_t col_id) +{ + /* Collection field: DW2 [16:0] */ + cmd->cmd_dword[2] &= htole64(~CMD_COL_MASK); + cmd->cmd_dword[2] |= htole64(col_id); +} + +static __inline void +cmd_format_target(struct its_cmd *cmd, uint64_t target) +{ + /* Target Address field: DW2 [47:16] */ + cmd->cmd_dword[2] &= htole64(~CMD_TARGET_MASK); + cmd->cmd_dword[2] |= htole64(target & CMD_TARGET_MASK); +} + +static __inline void +cmd_format_itt(struct its_cmd *cmd, uint64_t itt) +{ + /* ITT Address field: DW2 [47:8] */ + cmd->cmd_dword[2] &= htole64(~CMD_ITT_MASK); + cmd->cmd_dword[2] |= htole64(itt & CMD_ITT_MASK); +} + +static __inline void +cmd_format_valid(struct its_cmd *cmd, uint8_t valid) +{ + /* Valid field: DW2 [63] */ + cmd->cmd_dword[2] &= htole64(~CMD_VALID_MASK); + cmd->cmd_dword[2] |= htole64((uint64_t)valid << CMD_VALID_SHIFT); +} + +static inline bool +its_cmd_queue_full(struct gicv3_its_softc *sc) +{ + size_t read_idx, next_write_idx; + + /* Get the index of the next command */ + next_write_idx = (sc->sc_its_cmd_next_idx + 1) % + (ITS_CMDQ_SIZE / sizeof(struct its_cmd)); + /* And the index of the current command being read */ + read_idx = gic_its_read_4(sc, GITS_CREADR) / sizeof(struct its_cmd); + + /* + * The queue is full when the write offset points + * at the command before the current read offset. + */ + return (next_write_idx == read_idx); +} + +static inline void +its_cmd_sync(struct gicv3_its_softc *sc, struct its_cmd *cmd) +{ + + if ((sc->sc_its_flags & ITS_FLAGS_CMDQ_FLUSH) != 0) { + /* Clean D-cache under command. */ + cpu_dcache_wb_range((vm_offset_t)cmd, sizeof(*cmd)); + } else { + /* DSB inner shareable, store */ + dsb(ishst); + } + +} + +static inline uint64_t +its_cmd_cwriter_offset(struct gicv3_its_softc *sc, struct its_cmd *cmd) +{ + uint64_t off; + + off = (cmd - sc->sc_its_cmd_base) * sizeof(*cmd); + + return (off); +} + +static void +its_cmd_wait_completion(device_t dev, struct its_cmd *cmd_first, + struct its_cmd *cmd_last) +{ + struct gicv3_its_softc *sc; + uint64_t first, last, read; + size_t us_left; + + sc = device_get_softc(dev); + + /* + * XXX ARM64TODO: This is obviously a significant delay. + * The reason for that is that currently the time frames for + * the command to complete are not known. + */ + us_left = 1000000; + + first = its_cmd_cwriter_offset(sc, cmd_first); + last = its_cmd_cwriter_offset(sc, cmd_last); + + for (;;) { + read = gic_its_read_8(sc, GITS_CREADR); + if (first < last) { + if (read < first || read >= last) + break; + } else if (read < first && read >= last) + break; + + if (us_left-- == 0) { + /* This means timeout */ + device_printf(dev, + "Timeout while waiting for CMD completion.\n"); + return; + } + DELAY(1); + } +} + +static struct its_cmd * +its_cmd_alloc_locked(device_t dev) +{ + struct gicv3_its_softc *sc; + struct its_cmd *cmd; + size_t us_left; + + sc = device_get_softc(dev); + + /* + * XXX ARM64TODO: This is obviously a significant delay. + * The reason for that is that currently the time frames for + * the command to complete (and therefore free the descriptor) + * are not known. + */ + us_left = 1000000; + + mtx_assert(&sc->sc_its_cmd_lock, MA_OWNED); + while (its_cmd_queue_full(sc)) { + if (us_left-- == 0) { + /* Timeout while waiting for free command */ + device_printf(dev, + "Timeout while waiting for free command\n"); + return (NULL); + } + DELAY(1); + } + + cmd = &sc->sc_its_cmd_base[sc->sc_its_cmd_next_idx]; + sc->sc_its_cmd_next_idx++; + sc->sc_its_cmd_next_idx %= ITS_CMDQ_SIZE / sizeof(struct its_cmd); + + return (cmd); +} + +static uint64_t +its_cmd_prepare(struct its_cmd *cmd, struct its_cmd_desc *desc) +{ + uint64_t target; + uint8_t cmd_type; + u_int size; + + cmd_type = desc->cmd_type; + target = ITS_TARGET_NONE; + + switch (cmd_type) { + case ITS_CMD_MOVI: /* Move interrupt ID to another collection */ + target = desc->cmd_desc_movi.col->col_target; + cmd_format_command(cmd, ITS_CMD_MOVI); + cmd_format_id(cmd, desc->cmd_desc_movi.id); + cmd_format_col(cmd, desc->cmd_desc_movi.col->col_id); + cmd_format_devid(cmd, desc->cmd_desc_movi.its_dev->devid); + break; + case ITS_CMD_SYNC: /* Wait for previous commands completion */ + target = desc->cmd_desc_sync.col->col_target; + cmd_format_command(cmd, ITS_CMD_SYNC); + cmd_format_target(cmd, target); + break; + case ITS_CMD_MAPD: /* Assign ITT to device */ + cmd_format_command(cmd, ITS_CMD_MAPD); + cmd_format_itt(cmd, vtophys(desc->cmd_desc_mapd.its_dev->itt)); + /* + * Size describes number of bits to encode interrupt IDs + * supported by the device minus one. + * When V (valid) bit is zero, this field should be written + * as zero. + */ + if (desc->cmd_desc_mapd.valid != 0) { + size = fls(desc->cmd_desc_mapd.its_dev->lpis.lpi_num); + size = MAX(1, size) - 1; + } else + size = 0; + + cmd_format_size(cmd, size); + cmd_format_devid(cmd, desc->cmd_desc_mapd.its_dev->devid); + cmd_format_valid(cmd, desc->cmd_desc_mapd.valid); + break; + case ITS_CMD_MAPC: /* Map collection to Re-Distributor */ + target = desc->cmd_desc_mapc.col->col_target; + cmd_format_command(cmd, ITS_CMD_MAPC); + cmd_format_col(cmd, desc->cmd_desc_mapc.col->col_id); + cmd_format_valid(cmd, desc->cmd_desc_mapc.valid); + cmd_format_target(cmd, target); + break; + case ITS_CMD_MAPTI: + target = desc->cmd_desc_mapvi.col->col_target; + cmd_format_command(cmd, ITS_CMD_MAPTI); + cmd_format_devid(cmd, desc->cmd_desc_mapvi.its_dev->devid); + cmd_format_id(cmd, desc->cmd_desc_mapvi.id); + cmd_format_pid(cmd, desc->cmd_desc_mapvi.pid); + cmd_format_col(cmd, desc->cmd_desc_mapvi.col->col_id); + break; + case ITS_CMD_MAPI: + target = desc->cmd_desc_mapi.col->col_target; + cmd_format_command(cmd, ITS_CMD_MAPI); + cmd_format_devid(cmd, desc->cmd_desc_mapi.its_dev->devid); + cmd_format_id(cmd, desc->cmd_desc_mapi.pid); + cmd_format_col(cmd, desc->cmd_desc_mapi.col->col_id); + break; + case ITS_CMD_INV: + target = desc->cmd_desc_inv.col->col_target; + cmd_format_command(cmd, ITS_CMD_INV); + cmd_format_devid(cmd, desc->cmd_desc_inv.its_dev->devid); + cmd_format_id(cmd, desc->cmd_desc_inv.pid); + break; + case ITS_CMD_INVALL: + cmd_format_command(cmd, ITS_CMD_INVALL); + cmd_format_col(cmd, desc->cmd_desc_invall.col->col_id); + break; + default: + panic("its_cmd_prepare: Invalid command: %x", cmd_type); + } + + return (target); +} + +static int +its_cmd_send(device_t dev, struct its_cmd_desc *desc) +{ + struct gicv3_its_softc *sc; + struct its_cmd *cmd, *cmd_sync, *cmd_write; + struct its_col col_sync; + struct its_cmd_desc desc_sync; + uint64_t target, cwriter; + + sc = device_get_softc(dev); + mtx_lock_spin(&sc->sc_its_cmd_lock); + cmd = its_cmd_alloc_locked(dev); + if (cmd == NULL) { + device_printf(dev, "could not allocate ITS command\n"); + mtx_unlock_spin(&sc->sc_its_cmd_lock); + return (EBUSY); + } + + target = its_cmd_prepare(cmd, desc); + its_cmd_sync(sc, cmd); + + if (target != ITS_TARGET_NONE) { + cmd_sync = its_cmd_alloc_locked(dev); + if (cmd_sync != NULL) { + desc_sync.cmd_type = ITS_CMD_SYNC; + col_sync.col_target = target; + desc_sync.cmd_desc_sync.col = &col_sync; + its_cmd_prepare(cmd_sync, &desc_sync); + its_cmd_sync(sc, cmd_sync); + } + } + + /* Update GITS_CWRITER */ + cwriter = sc->sc_its_cmd_next_idx * sizeof(struct its_cmd); + gic_its_write_8(sc, GITS_CWRITER, cwriter); + cmd_write = &sc->sc_its_cmd_base[sc->sc_its_cmd_next_idx]; + mtx_unlock_spin(&sc->sc_its_cmd_lock); + + its_cmd_wait_completion(dev, cmd, cmd_write); + + return (0); +} + +/* Handlers to send commands */ +static void +its_cmd_movi(device_t dev, struct gicv3_its_irqsrc *girq) +{ + struct gicv3_its_softc *sc; + struct its_cmd_desc desc; + struct its_col *col; + + sc = device_get_softc(dev); + col = sc->sc_its_cols[CPU_FFS(&girq->gi_isrc.isrc_cpu) - 1]; + + desc.cmd_type = ITS_CMD_MOVI; + desc.cmd_desc_movi.its_dev = girq->gi_its_dev; + desc.cmd_desc_movi.col = col; + desc.cmd_desc_movi.id = girq->gi_id; + + its_cmd_send(dev, &desc); +} + +static void +its_cmd_mapc(device_t dev, struct its_col *col, uint8_t valid) +{ + struct its_cmd_desc desc; + + desc.cmd_type = ITS_CMD_MAPC; + desc.cmd_desc_mapc.col = col; + /* + * Valid bit set - map the collection. + * Valid bit cleared - unmap the collection. + */ + desc.cmd_desc_mapc.valid = valid; + + its_cmd_send(dev, &desc); +} + +static void +its_cmd_mapti(device_t dev, struct gicv3_its_irqsrc *girq) +{ + struct gicv3_its_softc *sc; + struct its_cmd_desc desc; + struct its_col *col; + u_int col_id; + + sc = device_get_softc(dev); + + col_id = CPU_FFS(&girq->gi_isrc.isrc_cpu) - 1; + col = sc->sc_its_cols[col_id]; + + desc.cmd_type = ITS_CMD_MAPTI; + desc.cmd_desc_mapvi.its_dev = girq->gi_its_dev; + desc.cmd_desc_mapvi.col = col; + /* The EventID sent to the device */ + desc.cmd_desc_mapvi.id = girq->gi_id; + /* The physical interrupt presented to softeware */ + desc.cmd_desc_mapvi.pid = girq->gi_lpi + GIC_FIRST_LPI; + + its_cmd_send(dev, &desc); +} + +static void +its_cmd_mapd(device_t dev, struct its_dev *its_dev, uint8_t valid) +{ + struct its_cmd_desc desc; + + desc.cmd_type = ITS_CMD_MAPD; + desc.cmd_desc_mapd.its_dev = its_dev; + desc.cmd_desc_mapd.valid = valid; + + its_cmd_send(dev, &desc); +} + +static void +its_cmd_inv(device_t dev, struct its_dev *its_dev, + struct gicv3_its_irqsrc *girq) +{ + struct gicv3_its_softc *sc; + struct its_cmd_desc desc; + struct its_col *col; + + sc = device_get_softc(dev); + col = sc->sc_its_cols[CPU_FFS(&girq->gi_isrc.isrc_cpu) - 1]; + + desc.cmd_type = ITS_CMD_INV; + /* The EventID sent to the device */ + desc.cmd_desc_inv.pid = girq->gi_id; + desc.cmd_desc_inv.its_dev = its_dev; + desc.cmd_desc_inv.col = col; + + its_cmd_send(dev, &desc); +} + +static void +its_cmd_invall(device_t dev, struct its_col *col) +{ + struct its_cmd_desc desc; + + desc.cmd_type = ITS_CMD_INVALL; + desc.cmd_desc_invall.col = col; + + its_cmd_send(dev, &desc); +} + +#ifdef FDT +static device_probe_t gicv3_its_fdt_probe; +static device_attach_t gicv3_its_fdt_attach; + +static device_method_t gicv3_its_fdt_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, gicv3_its_fdt_probe), + DEVMETHOD(device_attach, gicv3_its_fdt_attach), + + /* End */ + DEVMETHOD_END +}; + +#define its_baseclasses its_fdt_baseclasses +DEFINE_CLASS_1(its, gicv3_its_fdt_driver, gicv3_its_fdt_methods, + sizeof(struct gicv3_its_softc), gicv3_its_driver); +#undef its_baseclasses +static devclass_t gicv3_its_fdt_devclass; + +EARLY_DRIVER_MODULE(its_fdt, gic, gicv3_its_fdt_driver, + gicv3_its_fdt_devclass, 0, 0, BUS_PASS_INTERRUPT + BUS_PASS_ORDER_MIDDLE); + +static int +gicv3_its_fdt_probe(device_t dev) +{ + + if (!ofw_bus_status_okay(dev)) + return (ENXIO); + + if (!ofw_bus_is_compatible(dev, "arm,gic-v3-its")) + return (ENXIO); + + device_set_desc(dev, "ARM GIC Interrupt Translation Service"); + return (BUS_PROBE_DEFAULT); +} + +static int +gicv3_its_fdt_attach(device_t dev) +{ + struct gicv3_its_softc *sc; + phandle_t xref; + int err; + + sc = device_get_softc(dev); + sc->dev = dev; + err = gicv3_its_attach(dev); + if (err != 0) + return (err); + + /* Register this device as a interrupt controller */ + xref = OF_xref_from_node(ofw_bus_get_node(dev)); + sc->sc_pic = intr_pic_register(dev, xref); + intr_pic_add_handler(device_get_parent(dev), sc->sc_pic, + gicv3_its_intr, sc, sc->sc_irq_base, sc->sc_irq_length); + + /* Register this device to handle MSI interrupts */ + intr_msi_register(dev, xref); + + return (0); +} +#endif + +#ifdef DEV_ACPI +static device_probe_t gicv3_its_acpi_probe; +static device_attach_t gicv3_its_acpi_attach; + +static device_method_t gicv3_its_acpi_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, gicv3_its_acpi_probe), + DEVMETHOD(device_attach, gicv3_its_acpi_attach), + + /* End */ + DEVMETHOD_END +}; + +#define its_baseclasses its_acpi_baseclasses +DEFINE_CLASS_1(its, gicv3_its_acpi_driver, gicv3_its_acpi_methods, + sizeof(struct gicv3_its_softc), gicv3_its_driver); +#undef its_baseclasses +static devclass_t gicv3_its_acpi_devclass; + +EARLY_DRIVER_MODULE(its_acpi, gic, gicv3_its_acpi_driver, + gicv3_its_acpi_devclass, 0, 0, BUS_PASS_INTERRUPT + BUS_PASS_ORDER_MIDDLE); + +static int +gicv3_its_acpi_probe(device_t dev) +{ + + if (gic_get_bus(dev) != GIC_BUS_ACPI) + return (EINVAL); + + if (gic_get_hw_rev(dev) < 3) + return (EINVAL); + + device_set_desc(dev, "ARM GIC Interrupt Translation Service"); + return (BUS_PROBE_DEFAULT); +} + +static int +gicv3_its_acpi_attach(device_t dev) +{ + struct gicv3_its_softc *sc; + struct gic_v3_devinfo *di; + int err; + + sc = device_get_softc(dev); + sc->dev = dev; + err = gicv3_its_attach(dev); + if (err != 0) + return (err); + + di = device_get_ivars(dev); + sc->sc_pic = intr_pic_register(dev, di->msi_xref); + intr_pic_add_handler(device_get_parent(dev), sc->sc_pic, + gicv3_its_intr, sc, sc->sc_irq_base, sc->sc_irq_length); + + /* Register this device to handle MSI interrupts */ + intr_msi_register(dev, di->msi_xref); + + return (0); +} +#endif diff --git a/sys/arm64/arm64/identcpu.c b/sys/arm64/arm64/identcpu.c new file mode 100644 index 000000000000..d9c5c50fe568 --- /dev/null +++ b/sys/arm64/arm64/identcpu.c @@ -0,0 +1,1667 @@ +/*- + * Copyright (c) 2014 Andrew Turner + * Copyright (c) 2014 The FreeBSD Foundation + * All rights reserved. + * + * Portions of this software were developed by Semihalf + * under sponsorship of the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/pcpu.h> +#include <sys/sbuf.h> +#include <sys/smp.h> +#include <sys/sysctl.h> +#include <sys/systm.h> + +#include <machine/atomic.h> +#include <machine/cpu.h> +#include <machine/cpufunc.h> +#include <machine/undefined.h> +#include <machine/elf.h> + +static void print_cpu_features(u_int cpu); +static u_long parse_cpu_features_hwcap(void); + +char machine[] = "arm64"; + +#ifdef SCTL_MASK32 +extern int adaptive_machine_arch; +#endif + +static SYSCTL_NODE(_machdep, OID_AUTO, cache, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, + "Cache management tuning"); + +static int allow_dic = 1; +SYSCTL_INT(_machdep_cache, OID_AUTO, allow_dic, CTLFLAG_RDTUN, &allow_dic, 0, + "Allow optimizations based on the DIC cache bit"); + +static int allow_idc = 1; +SYSCTL_INT(_machdep_cache, OID_AUTO, allow_idc, CTLFLAG_RDTUN, &allow_idc, 0, + "Allow optimizations based on the IDC cache bit"); + +static void check_cpu_regs(u_int cpu); + +/* + * The default implementation of I-cache sync assumes we have an + * aliasing cache until we know otherwise. + */ +void (*arm64_icache_sync_range)(vm_offset_t, vm_size_t) = + &arm64_aliasing_icache_sync_range; + +static int +sysctl_hw_machine(SYSCTL_HANDLER_ARGS) +{ +#ifdef SCTL_MASK32 + static const char machine32[] = "arm"; +#endif + int error; + +#ifdef SCTL_MASK32 + if ((req->flags & SCTL_MASK32) != 0 && adaptive_machine_arch) + error = SYSCTL_OUT(req, machine32, sizeof(machine32)); + else +#endif + error = SYSCTL_OUT(req, machine, sizeof(machine)); + return (error); +} + +SYSCTL_PROC(_hw, HW_MACHINE, machine, CTLTYPE_STRING | CTLFLAG_RD | + CTLFLAG_MPSAFE, NULL, 0, sysctl_hw_machine, "A", "Machine class"); + +static char cpu_model[64]; +SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD, + cpu_model, sizeof(cpu_model), "Machine model"); + +/* + * Per-CPU affinity as provided in MPIDR_EL1 + * Indexed by CPU number in logical order selected by the system. + * Relevant fields can be extracted using CPU_AFFn macros, + * Aff3.Aff2.Aff1.Aff0 construct a unique CPU address in the system. + * + * Fields used by us: + * Aff1 - Cluster number + * Aff0 - CPU number in Aff1 cluster + */ +uint64_t __cpu_affinity[MAXCPU]; +static u_int cpu_aff_levels; + +struct cpu_desc { + u_int cpu_impl; + u_int cpu_part_num; + u_int cpu_variant; + u_int cpu_revision; + const char *cpu_impl_name; + const char *cpu_part_name; + + uint64_t mpidr; + uint64_t id_aa64afr0; + uint64_t id_aa64afr1; + uint64_t id_aa64dfr0; + uint64_t id_aa64dfr1; + uint64_t id_aa64isar0; + uint64_t id_aa64isar1; + uint64_t id_aa64mmfr0; + uint64_t id_aa64mmfr1; + uint64_t id_aa64mmfr2; + uint64_t id_aa64pfr0; + uint64_t id_aa64pfr1; + uint64_t ctr; +}; + +static struct cpu_desc cpu_desc[MAXCPU]; +static struct cpu_desc kern_cpu_desc; +static struct cpu_desc user_cpu_desc; +static u_int cpu_print_regs; +#define PRINT_ID_AA64_AFR0 0x00000001 +#define PRINT_ID_AA64_AFR1 0x00000002 +#define PRINT_ID_AA64_DFR0 0x00000010 +#define PRINT_ID_AA64_DFR1 0x00000020 +#define PRINT_ID_AA64_ISAR0 0x00000100 +#define PRINT_ID_AA64_ISAR1 0x00000200 +#define PRINT_ID_AA64_MMFR0 0x00001000 +#define PRINT_ID_AA64_MMFR1 0x00002000 +#define PRINT_ID_AA64_MMFR2 0x00004000 +#define PRINT_ID_AA64_PFR0 0x00010000 +#define PRINT_ID_AA64_PFR1 0x00020000 +#define PRINT_CTR_EL0 0x10000000 + +struct cpu_parts { + u_int part_id; + const char *part_name; +}; +#define CPU_PART_NONE { 0, "Unknown Processor" } + +struct cpu_implementers { + u_int impl_id; + const char *impl_name; + /* + * Part number is implementation defined + * so each vendor will have its own set of values and names. + */ + const struct cpu_parts *cpu_parts; +}; +#define CPU_IMPLEMENTER_NONE { 0, "Unknown Implementer", cpu_parts_none } + +/* + * Per-implementer table of (PartNum, CPU Name) pairs. + */ +/* ARM Ltd. */ +static const struct cpu_parts cpu_parts_arm[] = { + { CPU_PART_FOUNDATION, "Foundation-Model" }, + { CPU_PART_CORTEX_A35, "Cortex-A35" }, + { CPU_PART_CORTEX_A53, "Cortex-A53" }, + { CPU_PART_CORTEX_A55, "Cortex-A55" }, + { CPU_PART_CORTEX_A57, "Cortex-A57" }, + { CPU_PART_CORTEX_A65, "Cortex-A65" }, + { CPU_PART_CORTEX_A72, "Cortex-A72" }, + { CPU_PART_CORTEX_A73, "Cortex-A73" }, + { CPU_PART_CORTEX_A75, "Cortex-A75" }, + { CPU_PART_CORTEX_A76, "Cortex-A76" }, + { CPU_PART_CORTEX_A76AE, "Cortex-A76AE" }, + { CPU_PART_CORTEX_A77, "Cortex-A77" }, + { CPU_PART_NEOVERSE_N1, "Neoverse-N1" }, + CPU_PART_NONE, +}; + +/* Cavium */ +static const struct cpu_parts cpu_parts_cavium[] = { + { CPU_PART_THUNDERX, "ThunderX" }, + { CPU_PART_THUNDERX2, "ThunderX2" }, + CPU_PART_NONE, +}; + +/* APM / Ampere */ +static const struct cpu_parts cpu_parts_apm[] = { + { CPU_PART_EMAG8180, "eMAG 8180" }, + CPU_PART_NONE, +}; + +/* Unknown */ +static const struct cpu_parts cpu_parts_none[] = { + CPU_PART_NONE, +}; + +/* + * Implementers table. + */ +const struct cpu_implementers cpu_implementers[] = { + { CPU_IMPL_ARM, "ARM", cpu_parts_arm }, + { CPU_IMPL_BROADCOM, "Broadcom", cpu_parts_none }, + { CPU_IMPL_CAVIUM, "Cavium", cpu_parts_cavium }, + { CPU_IMPL_DEC, "DEC", cpu_parts_none }, + { CPU_IMPL_INFINEON, "IFX", cpu_parts_none }, + { CPU_IMPL_FREESCALE, "Freescale", cpu_parts_none }, + { CPU_IMPL_NVIDIA, "NVIDIA", cpu_parts_none }, + { CPU_IMPL_APM, "APM", cpu_parts_apm }, + { CPU_IMPL_QUALCOMM, "Qualcomm", cpu_parts_none }, + { CPU_IMPL_MARVELL, "Marvell", cpu_parts_none }, + { CPU_IMPL_INTEL, "Intel", cpu_parts_none }, + CPU_IMPLEMENTER_NONE, +}; + +#define MRS_TYPE_MASK 0xf +#define MRS_INVALID 0 +#define MRS_EXACT 1 +#define MRS_EXACT_VAL(x) (MRS_EXACT | ((x) << 4)) +#define MRS_EXACT_FIELD(x) ((x) >> 4) +#define MRS_LOWER 2 + +struct mrs_field_value { + uint64_t value; + const char *desc; +}; + +#define MRS_FIELD_VALUE(_value, _desc) \ + { \ + .value = (_value), \ + .desc = (_desc), \ + } + +#define MRS_FIELD_VALUE_NONE_IMPL(_reg, _field, _none, _impl) \ + MRS_FIELD_VALUE(_reg ## _ ## _field ## _ ## _none, ""), \ + MRS_FIELD_VALUE(_reg ## _ ## _field ## _ ## _impl, #_field) + +#define MRS_FIELD_VALUE_COUNT(_reg, _field, _desc) \ + MRS_FIELD_VALUE(0ul << _reg ## _ ## _field ## _SHIFT, "1 " _desc), \ + MRS_FIELD_VALUE(1ul << _reg ## _ ## _field ## _SHIFT, "2 " _desc "s"), \ + MRS_FIELD_VALUE(2ul << _reg ## _ ## _field ## _SHIFT, "3 " _desc "s"), \ + MRS_FIELD_VALUE(3ul << _reg ## _ ## _field ## _SHIFT, "4 " _desc "s"), \ + MRS_FIELD_VALUE(4ul << _reg ## _ ## _field ## _SHIFT, "5 " _desc "s"), \ + MRS_FIELD_VALUE(5ul << _reg ## _ ## _field ## _SHIFT, "6 " _desc "s"), \ + MRS_FIELD_VALUE(6ul << _reg ## _ ## _field ## _SHIFT, "7 " _desc "s"), \ + MRS_FIELD_VALUE(7ul << _reg ## _ ## _field ## _SHIFT, "8 " _desc "s"), \ + MRS_FIELD_VALUE(8ul << _reg ## _ ## _field ## _SHIFT, "9 " _desc "s"), \ + MRS_FIELD_VALUE(9ul << _reg ## _ ## _field ## _SHIFT, "10 "_desc "s"), \ + MRS_FIELD_VALUE(10ul<< _reg ## _ ## _field ## _SHIFT, "11 "_desc "s"), \ + MRS_FIELD_VALUE(11ul<< _reg ## _ ## _field ## _SHIFT, "12 "_desc "s"), \ + MRS_FIELD_VALUE(12ul<< _reg ## _ ## _field ## _SHIFT, "13 "_desc "s"), \ + MRS_FIELD_VALUE(13ul<< _reg ## _ ## _field ## _SHIFT, "14 "_desc "s"), \ + MRS_FIELD_VALUE(14ul<< _reg ## _ ## _field ## _SHIFT, "15 "_desc "s"), \ + MRS_FIELD_VALUE(15ul<< _reg ## _ ## _field ## _SHIFT, "16 "_desc "s") + +#define MRS_FIELD_VALUE_END { .desc = NULL } + +struct mrs_field { + const char *name; + struct mrs_field_value *values; + uint64_t mask; + bool sign; + u_int type; + u_int shift; +}; + +#define MRS_FIELD(_register, _name, _sign, _type, _values) \ + { \ + .name = #_name, \ + .sign = (_sign), \ + .type = (_type), \ + .shift = _register ## _ ## _name ## _SHIFT, \ + .mask = _register ## _ ## _name ## _MASK, \ + .values = (_values), \ + } + +#define MRS_FIELD_END { .type = MRS_INVALID, } + +/* ID_AA64AFR0_EL1 */ +static struct mrs_field id_aa64afr0_fields[] = { + MRS_FIELD_END, +}; + + +/* ID_AA64AFR1_EL1 */ +static struct mrs_field id_aa64afr1_fields[] = { + MRS_FIELD_END, +}; + + +/* ID_AA64DFR0_EL1 */ +static struct mrs_field_value id_aa64dfr0_pmsver[] = { + MRS_FIELD_VALUE(ID_AA64DFR0_PMSVer_NONE, ""), + MRS_FIELD_VALUE(ID_AA64DFR0_PMSVer_V1, "SPE"), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64dfr0_ctx_cmps[] = { + MRS_FIELD_VALUE_COUNT(ID_AA64DFR0, CTX_CMPs, "CTX BKPT"), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64dfr0_wrps[] = { + MRS_FIELD_VALUE_COUNT(ID_AA64DFR0, WRPs, "Watchpoint"), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64dfr0_brps[] = { + MRS_FIELD_VALUE_COUNT(ID_AA64DFR0, BRPs, "Breakpoint"), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64dfr0_pmuver[] = { + MRS_FIELD_VALUE(ID_AA64DFR0_PMUVer_NONE, ""), + MRS_FIELD_VALUE(ID_AA64DFR0_PMUVer_3, "PMUv3"), + MRS_FIELD_VALUE(ID_AA64DFR0_PMUVer_3_1, "PMUv3+16 bit evtCount"), + MRS_FIELD_VALUE(ID_AA64DFR0_PMUVer_IMPL, "IMPL PMU"), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64dfr0_tracever[] = { + MRS_FIELD_VALUE(ID_AA64DFR0_TraceVer_NONE, ""), + MRS_FIELD_VALUE(ID_AA64DFR0_TraceVer_IMPL, "Trace"), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64dfr0_debugver[] = { + MRS_FIELD_VALUE(ID_AA64DFR0_DebugVer_8, "Debugv8"), + MRS_FIELD_VALUE(ID_AA64DFR0_DebugVer_8_VHE, "Debugv8_VHE"), + MRS_FIELD_VALUE(ID_AA64DFR0_DebugVer_8_2, "Debugv8.2"), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field id_aa64dfr0_fields[] = { + MRS_FIELD(ID_AA64DFR0, PMSVer, false, MRS_EXACT, id_aa64dfr0_pmsver), + MRS_FIELD(ID_AA64DFR0, CTX_CMPs, false, MRS_EXACT, + id_aa64dfr0_ctx_cmps), + MRS_FIELD(ID_AA64DFR0, WRPs, false, MRS_EXACT, id_aa64dfr0_wrps), + MRS_FIELD(ID_AA64DFR0, BRPs, false, MRS_LOWER, id_aa64dfr0_brps), + MRS_FIELD(ID_AA64DFR0, PMUVer, false, MRS_EXACT, id_aa64dfr0_pmuver), + MRS_FIELD(ID_AA64DFR0, TraceVer, false, MRS_EXACT, + id_aa64dfr0_tracever), + MRS_FIELD(ID_AA64DFR0, DebugVer, false, MRS_EXACT_VAL(0x6), + id_aa64dfr0_debugver), + MRS_FIELD_END, +}; + + +/* ID_AA64DFR1 */ +static struct mrs_field id_aa64dfr1_fields[] = { + MRS_FIELD_END, +}; + + +/* ID_AA64ISAR0_EL1 */ +static struct mrs_field_value id_aa64isar0_rndr[] = { + MRS_FIELD_VALUE(ID_AA64ISAR0_RNDR_NONE, ""), + MRS_FIELD_VALUE(ID_AA64ISAR0_RNDR_IMPL, "RNG"), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64isar0_tlb[] = { + MRS_FIELD_VALUE(ID_AA64ISAR0_TLB_NONE, ""), + MRS_FIELD_VALUE(ID_AA64ISAR0_TLB_TLBIOS, "TLBI-OS"), + MRS_FIELD_VALUE(ID_AA64ISAR0_TLB_TLBIOSR, "TLBI-OSR"), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64isar0_ts[] = { + MRS_FIELD_VALUE(ID_AA64ISAR0_TS_NONE, ""), + MRS_FIELD_VALUE(ID_AA64ISAR0_TS_CondM_8_4, "CondM-8.4"), + MRS_FIELD_VALUE(ID_AA64ISAR0_TS_CondM_8_5, "CondM-8.5"), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64isar0_fhm[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR0, FHM, NONE, IMPL), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64isar0_dp[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR0, DP, NONE, IMPL), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64isar0_sm4[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR0, SM4, NONE, IMPL), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64isar0_sm3[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR0, SM3, NONE, IMPL), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64isar0_sha3[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR0, SHA3, NONE, IMPL), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64isar0_rdm[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR0, RDM, NONE, IMPL), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64isar0_atomic[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR0, Atomic, NONE, IMPL), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64isar0_crc32[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR0, CRC32, NONE, BASE), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64isar0_sha2[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR0, SHA2, NONE, BASE), + MRS_FIELD_VALUE(ID_AA64ISAR0_SHA2_512, "SHA2+SHA512"), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64isar0_sha1[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR0, SHA1, NONE, BASE), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64isar0_aes[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR0, AES, NONE, BASE), + MRS_FIELD_VALUE(ID_AA64ISAR0_AES_PMULL, "AES+PMULL"), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field id_aa64isar0_fields[] = { + MRS_FIELD(ID_AA64ISAR0, RNDR, false, MRS_LOWER, id_aa64isar0_rndr), + MRS_FIELD(ID_AA64ISAR0, TLB, false, MRS_LOWER, id_aa64isar0_tlb), + MRS_FIELD(ID_AA64ISAR0, TS, false, MRS_LOWER, id_aa64isar0_ts), + MRS_FIELD(ID_AA64ISAR0, FHM, false, MRS_LOWER, id_aa64isar0_fhm), + MRS_FIELD(ID_AA64ISAR0, DP, false, MRS_LOWER, id_aa64isar0_dp), + MRS_FIELD(ID_AA64ISAR0, SM4, false, MRS_LOWER, id_aa64isar0_sm4), + MRS_FIELD(ID_AA64ISAR0, SM3, false, MRS_LOWER, id_aa64isar0_sm3), + MRS_FIELD(ID_AA64ISAR0, SHA3, false, MRS_LOWER, id_aa64isar0_sha3), + MRS_FIELD(ID_AA64ISAR0, RDM, false, MRS_LOWER, id_aa64isar0_rdm), + MRS_FIELD(ID_AA64ISAR0, Atomic, false, MRS_LOWER, id_aa64isar0_atomic), + MRS_FIELD(ID_AA64ISAR0, CRC32, false, MRS_LOWER, id_aa64isar0_crc32), + MRS_FIELD(ID_AA64ISAR0, SHA2, false, MRS_LOWER, id_aa64isar0_sha2), + MRS_FIELD(ID_AA64ISAR0, SHA1, false, MRS_LOWER, id_aa64isar0_sha1), + MRS_FIELD(ID_AA64ISAR0, AES, false, MRS_LOWER, id_aa64isar0_aes), + MRS_FIELD_END, +}; + + +/* ID_AA64ISAR1_EL1 */ +static struct mrs_field_value id_aa64isar1_i8mm[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR1, I8MM, NONE, IMPL), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64isar1_dgh[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR1, DGH, NONE, IMPL), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64isar1_bf16[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR1, BF16, NONE, IMPL), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64isar1_specres[] = { + MRS_FIELD_VALUE(ID_AA64ISAR1_SPECRES_NONE, ""), + MRS_FIELD_VALUE(ID_AA64ISAR1_SPECRES_IMPL, "PredInv"), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64isar1_sb[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR1, SB, NONE, IMPL), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64isar1_frintts[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR1, FRINTTS, NONE, IMPL), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64isar1_gpi[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR1, GPI, NONE, IMPL), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64isar1_gpa[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR1, GPA, NONE, IMPL), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64isar1_lrcpc[] = { + MRS_FIELD_VALUE(ID_AA64ISAR1_LRCPC_NONE, ""), + MRS_FIELD_VALUE(ID_AA64ISAR1_LRCPC_RCPC_8_3, "RCPC-8.3"), + MRS_FIELD_VALUE(ID_AA64ISAR1_LRCPC_RCPC_8_4, "RCPC-8.4"), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64isar1_fcma[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR1, FCMA, NONE, IMPL), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64isar1_jscvt[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR1, JSCVT, NONE, IMPL), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64isar1_api[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR1, API, NONE, IMPL), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64isar1_apa[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR1, APA, NONE, IMPL), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64isar1_dpb[] = { + MRS_FIELD_VALUE(ID_AA64ISAR1_DPB_NONE, ""), + MRS_FIELD_VALUE(ID_AA64ISAR1_DPB_DCCVAP, "DCPoP"), + MRS_FIELD_VALUE(ID_AA64ISAR1_DPB_DCCVADP, "DCCVADP"), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field id_aa64isar1_fields[] = { + MRS_FIELD(ID_AA64ISAR1, I8MM, false, MRS_LOWER, id_aa64isar1_i8mm), + MRS_FIELD(ID_AA64ISAR1, DGH, false, MRS_LOWER, id_aa64isar1_dgh), + MRS_FIELD(ID_AA64ISAR1, BF16, false, MRS_LOWER, id_aa64isar1_bf16), + MRS_FIELD(ID_AA64ISAR1, SPECRES, false, MRS_LOWER, + id_aa64isar1_specres), + MRS_FIELD(ID_AA64ISAR1, SB, false, MRS_LOWER, id_aa64isar1_sb), + MRS_FIELD(ID_AA64ISAR1, FRINTTS, false, MRS_LOWER, + id_aa64isar1_frintts), + MRS_FIELD(ID_AA64ISAR1, GPI, false, MRS_EXACT, id_aa64isar1_gpi), + MRS_FIELD(ID_AA64ISAR1, GPA, false, MRS_EXACT, id_aa64isar1_gpa), + MRS_FIELD(ID_AA64ISAR1, LRCPC, false, MRS_LOWER, id_aa64isar1_lrcpc), + MRS_FIELD(ID_AA64ISAR1, FCMA, false, MRS_LOWER, id_aa64isar1_fcma), + MRS_FIELD(ID_AA64ISAR1, JSCVT, false, MRS_LOWER, id_aa64isar1_jscvt), + MRS_FIELD(ID_AA64ISAR1, API, false, MRS_EXACT, id_aa64isar1_api), + MRS_FIELD(ID_AA64ISAR1, APA, false, MRS_EXACT, id_aa64isar1_apa), + MRS_FIELD(ID_AA64ISAR1, DPB, false, MRS_LOWER, id_aa64isar1_dpb), + MRS_FIELD_END, +}; + + +/* ID_AA64MMFR0_EL1 */ +static struct mrs_field_value id_aa64mmfr0_tgran4[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR0, TGran4, NONE, IMPL), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64mmfr0_tgran64[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR0, TGran64, NONE, IMPL), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64mmfr0_tgran16[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR0, TGran16, NONE, IMPL), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64mmfr0_bigend_el0[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR0, BigEndEL0, FIXED, MIXED), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64mmfr0_snsmem[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR0, SNSMem, NONE, DISTINCT), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64mmfr0_bigend[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR0, BigEnd, FIXED, MIXED), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64mmfr0_asid_bits[] = { + MRS_FIELD_VALUE(ID_AA64MMFR0_ASIDBits_8, "8bit ASID"), + MRS_FIELD_VALUE(ID_AA64MMFR0_ASIDBits_16, "16bit ASID"), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64mmfr0_parange[] = { + MRS_FIELD_VALUE(ID_AA64MMFR0_PARange_4G, "4GB PA"), + MRS_FIELD_VALUE(ID_AA64MMFR0_PARange_64G, "64GB PA"), + MRS_FIELD_VALUE(ID_AA64MMFR0_PARange_1T, "1TB PA"), + MRS_FIELD_VALUE(ID_AA64MMFR0_PARange_4T, "4TB PA"), + MRS_FIELD_VALUE(ID_AA64MMFR0_PARange_16T, "16TB PA"), + MRS_FIELD_VALUE(ID_AA64MMFR0_PARange_256T, "256TB PA"), + MRS_FIELD_VALUE(ID_AA64MMFR0_PARange_4P, "4PB PA"), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field id_aa64mmfr0_fields[] = { + MRS_FIELD(ID_AA64MMFR0, TGran4, false, MRS_EXACT, id_aa64mmfr0_tgran4), + MRS_FIELD(ID_AA64MMFR0, TGran64, false, MRS_EXACT, + id_aa64mmfr0_tgran64), + MRS_FIELD(ID_AA64MMFR0, TGran16, false, MRS_EXACT, + id_aa64mmfr0_tgran16), + MRS_FIELD(ID_AA64MMFR0, BigEndEL0, false, MRS_EXACT, + id_aa64mmfr0_bigend_el0), + MRS_FIELD(ID_AA64MMFR0, SNSMem, false, MRS_EXACT, id_aa64mmfr0_snsmem), + MRS_FIELD(ID_AA64MMFR0, BigEnd, false, MRS_EXACT, id_aa64mmfr0_bigend), + MRS_FIELD(ID_AA64MMFR0, ASIDBits, false, MRS_EXACT, + id_aa64mmfr0_asid_bits), + MRS_FIELD(ID_AA64MMFR0, PARange, false, MRS_EXACT, + id_aa64mmfr0_parange), + MRS_FIELD_END, +}; + + +/* ID_AA64MMFR1_EL1 */ +static struct mrs_field_value id_aa64mmfr1_xnx[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR1, XNX, NONE, IMPL), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64mmfr1_specsei[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR1, SpecSEI, NONE, IMPL), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64mmfr1_pan[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR1, PAN, NONE, IMPL), + MRS_FIELD_VALUE(ID_AA64MMFR1_PAN_ATS1E1, "PAN+ATS1E1"), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64mmfr1_lo[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR1, LO, NONE, IMPL), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64mmfr1_hpds[] = { + MRS_FIELD_VALUE(ID_AA64MMFR1_HPDS_NONE, ""), + MRS_FIELD_VALUE(ID_AA64MMFR1_HPDS_HPD, "HPD"), + MRS_FIELD_VALUE(ID_AA64MMFR1_HPDS_TTPBHA, "HPD+TTPBHA"), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64mmfr1_vh[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR1, VH, NONE, IMPL), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64mmfr1_vmidbits[] = { + MRS_FIELD_VALUE(ID_AA64MMFR1_VMIDBits_8, "8bit VMID"), + MRS_FIELD_VALUE(ID_AA64MMFR1_VMIDBits_16, "16bit VMID"), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64mmfr1_hafdbs[] = { + MRS_FIELD_VALUE(ID_AA64MMFR1_HAFDBS_NONE, ""), + MRS_FIELD_VALUE(ID_AA64MMFR1_HAFDBS_AF, "HAF"), + MRS_FIELD_VALUE(ID_AA64MMFR1_HAFDBS_AF_DBS, "HAF+DS"), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field id_aa64mmfr1_fields[] = { + MRS_FIELD(ID_AA64MMFR1, XNX, false, MRS_EXACT, id_aa64mmfr1_xnx), + MRS_FIELD(ID_AA64MMFR1, SpecSEI, false, MRS_EXACT, + id_aa64mmfr1_specsei), + MRS_FIELD(ID_AA64MMFR1, PAN, false, MRS_EXACT, id_aa64mmfr1_pan), + MRS_FIELD(ID_AA64MMFR1, LO, false, MRS_EXACT, id_aa64mmfr1_lo), + MRS_FIELD(ID_AA64MMFR1, HPDS, false, MRS_EXACT, id_aa64mmfr1_hpds), + MRS_FIELD(ID_AA64MMFR1, VH, false, MRS_EXACT, id_aa64mmfr1_vh), + MRS_FIELD(ID_AA64MMFR1, VMIDBits, false, MRS_EXACT, + id_aa64mmfr1_vmidbits), + MRS_FIELD(ID_AA64MMFR1, HAFDBS, false, MRS_EXACT, id_aa64mmfr1_hafdbs), + MRS_FIELD_END, +}; + + +/* ID_AA64MMFR2_EL1 */ +static struct mrs_field_value id_aa64mmfr2_nv[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR2, NV, NONE, IMPL), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64mmfr2_ccidx[] = { + MRS_FIELD_VALUE(ID_AA64MMFR2_CCIDX_32, "32bit CCIDX"), + MRS_FIELD_VALUE(ID_AA64MMFR2_CCIDX_64, "64bit CCIDX"), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64mmfr2_varange[] = { + MRS_FIELD_VALUE(ID_AA64MMFR2_VARange_48, "48bit VA"), + MRS_FIELD_VALUE(ID_AA64MMFR2_VARange_52, "52bit VA"), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64mmfr2_iesb[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR2, IESB, NONE, IMPL), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64mmfr2_lsm[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR2, LSM, NONE, IMPL), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64mmfr2_uao[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR2, UAO, NONE, IMPL), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64mmfr2_cnp[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR2, CnP, NONE, IMPL), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field id_aa64mmfr2_fields[] = { + MRS_FIELD(ID_AA64MMFR2, NV, false, MRS_EXACT, id_aa64mmfr2_nv), + MRS_FIELD(ID_AA64MMFR2, CCIDX, false, MRS_EXACT, id_aa64mmfr2_ccidx), + MRS_FIELD(ID_AA64MMFR2, VARange, false, MRS_EXACT, + id_aa64mmfr2_varange), + MRS_FIELD(ID_AA64MMFR2, IESB, false, MRS_EXACT, id_aa64mmfr2_iesb), + MRS_FIELD(ID_AA64MMFR2, LSM, false, MRS_EXACT, id_aa64mmfr2_lsm), + MRS_FIELD(ID_AA64MMFR2, UAO, false, MRS_EXACT, id_aa64mmfr2_uao), + MRS_FIELD(ID_AA64MMFR2, CnP, false, MRS_EXACT, id_aa64mmfr2_cnp), + MRS_FIELD_END, +}; + + +/* ID_AA64PFR0_EL1 */ +static struct mrs_field_value id_aa64pfr0_csv3[] = { + MRS_FIELD_VALUE(ID_AA64PFR0_CSV3_NONE, ""), + MRS_FIELD_VALUE(ID_AA64PFR0_CSV3_ISOLATED, "CSV3"), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64pfr0_csv2[] = { + MRS_FIELD_VALUE(ID_AA64PFR0_CSV2_NONE, ""), + MRS_FIELD_VALUE(ID_AA64PFR0_CSV2_ISOLATED, "CSV2"), + MRS_FIELD_VALUE(ID_AA64PFR0_CSV2_SCXTNUM, "SCXTNUM"), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64pfr0_dit[] = { + MRS_FIELD_VALUE(ID_AA64PFR0_DIT_NONE, ""), + MRS_FIELD_VALUE(ID_AA64PFR0_DIT_PSTATE, "PSTATE.DIT"), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64pfr0_amu[] = { + MRS_FIELD_VALUE(ID_AA64PFR0_AMU_NONE, ""), + MRS_FIELD_VALUE(ID_AA64PFR0_AMU_V1, "AMUv1"), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64pfr0_mpam[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64PFR0, MPAM, NONE, IMPL), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64pfr0_sel2[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64PFR0, SEL2, NONE, IMPL), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64pfr0_sve[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64PFR0, SVE, NONE, IMPL), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64pfr0_ras[] = { + MRS_FIELD_VALUE(ID_AA64PFR0_RAS_NONE, ""), + MRS_FIELD_VALUE(ID_AA64PFR0_RAS_V1, "RASv1"), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64pfr0_gic[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64PFR0, GIC, CPUIF_NONE, CPUIF_EN), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64pfr0_advsimd[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64PFR0, AdvSIMD, NONE, IMPL), + MRS_FIELD_VALUE(ID_AA64PFR0_AdvSIMD_HP, "AdvSIMD+HP"), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64pfr0_fp[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64PFR0, FP, NONE, IMPL), + MRS_FIELD_VALUE(ID_AA64PFR0_FP_HP, "FP+HP"), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64pfr0_el3[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64PFR0, EL3, NONE, 64), + MRS_FIELD_VALUE(ID_AA64PFR0_EL3_64_32, "EL3 32"), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64pfr0_el2[] = { + MRS_FIELD_VALUE_NONE_IMPL(ID_AA64PFR0, EL2, NONE, 64), + MRS_FIELD_VALUE(ID_AA64PFR0_EL2_64_32, "EL2 32"), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64pfr0_el1[] = { + MRS_FIELD_VALUE(ID_AA64PFR0_EL1_64, "EL1"), + MRS_FIELD_VALUE(ID_AA64PFR0_EL1_64_32, "EL1 32"), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64pfr0_el0[] = { + MRS_FIELD_VALUE(ID_AA64PFR0_EL0_64, "EL0"), + MRS_FIELD_VALUE(ID_AA64PFR0_EL0_64_32, "EL0 32"), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field id_aa64pfr0_fields[] = { + MRS_FIELD(ID_AA64PFR0, CSV3, false, MRS_EXACT, id_aa64pfr0_csv3), + MRS_FIELD(ID_AA64PFR0, CSV2, false, MRS_EXACT, id_aa64pfr0_csv2), + MRS_FIELD(ID_AA64PFR0, DIT, false, MRS_EXACT, id_aa64pfr0_dit), + MRS_FIELD(ID_AA64PFR0, AMU, false, MRS_EXACT, id_aa64pfr0_amu), + MRS_FIELD(ID_AA64PFR0, MPAM, false, MRS_EXACT, id_aa64pfr0_mpam), + MRS_FIELD(ID_AA64PFR0, SEL2, false, MRS_EXACT, id_aa64pfr0_sel2), + MRS_FIELD(ID_AA64PFR0, SVE, false, MRS_EXACT, id_aa64pfr0_sve), + MRS_FIELD(ID_AA64PFR0, RAS, false, MRS_EXACT, id_aa64pfr0_ras), + MRS_FIELD(ID_AA64PFR0, GIC, false, MRS_EXACT, id_aa64pfr0_gic), + MRS_FIELD(ID_AA64PFR0, AdvSIMD, true, MRS_LOWER, id_aa64pfr0_advsimd), + MRS_FIELD(ID_AA64PFR0, FP, true, MRS_LOWER, id_aa64pfr0_fp), + MRS_FIELD(ID_AA64PFR0, EL3, false, MRS_EXACT, id_aa64pfr0_el3), + MRS_FIELD(ID_AA64PFR0, EL2, false, MRS_EXACT, id_aa64pfr0_el2), + MRS_FIELD(ID_AA64PFR0, EL1, false, MRS_LOWER, id_aa64pfr0_el1), + MRS_FIELD(ID_AA64PFR0, EL0, false, MRS_LOWER, id_aa64pfr0_el0), + MRS_FIELD_END, +}; + + +/* ID_AA64PFR1_EL1 */ +static struct mrs_field_value id_aa64pfr1_bt[] = { + MRS_FIELD_VALUE(ID_AA64PFR1_BT_NONE, ""), + MRS_FIELD_VALUE(ID_AA64PFR1_BT_IMPL, "BTI"), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64pfr1_ssbs[] = { + MRS_FIELD_VALUE(ID_AA64PFR1_SSBS_NONE, ""), + MRS_FIELD_VALUE(ID_AA64PFR1_SSBS_PSTATE, "PSTATE.SSBS"), + MRS_FIELD_VALUE(ID_AA64PFR1_SSBS_PSTATE_MSR, "PSTATE.SSBS MSR"), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field_value id_aa64pfr1_mte[] = { + MRS_FIELD_VALUE(ID_AA64PFR1_MTE_NONE, ""), + MRS_FIELD_VALUE(ID_AA64PFR1_MTE_IMPL_EL0, "MTE EL0"), + MRS_FIELD_VALUE(ID_AA64PFR1_MTE_IMPL, "MTE"), + MRS_FIELD_VALUE_END, +}; + +static struct mrs_field id_aa64pfr1_fields[] = { + MRS_FIELD(ID_AA64PFR1, BT, false, MRS_EXACT, id_aa64pfr1_bt), + MRS_FIELD(ID_AA64PFR1, SSBS, false, MRS_EXACT, id_aa64pfr1_ssbs), + MRS_FIELD(ID_AA64PFR1, MTE, false, MRS_EXACT, id_aa64pfr1_mte), + MRS_FIELD_END, +}; + +struct mrs_user_reg { + u_int reg; + u_int CRm; + u_int Op2; + size_t offset; + struct mrs_field *fields; +}; + +static struct mrs_user_reg user_regs[] = { + { /* id_aa64isar0_el1 */ + .reg = ID_AA64ISAR0_EL1, + .CRm = 6, + .Op2 = 0, + .offset = __offsetof(struct cpu_desc, id_aa64isar0), + .fields = id_aa64isar0_fields, + }, + { /* id_aa64isar1_el1 */ + .reg = ID_AA64ISAR1_EL1, + .CRm = 6, + .Op2 = 1, + .offset = __offsetof(struct cpu_desc, id_aa64isar1), + .fields = id_aa64isar1_fields, + }, + { /* id_aa64pfr0_el1 */ + .reg = ID_AA64PFR0_EL1, + .CRm = 4, + .Op2 = 0, + .offset = __offsetof(struct cpu_desc, id_aa64pfr0), + .fields = id_aa64pfr0_fields, + }, + { /* id_aa64pfr0_el1 */ + .reg = ID_AA64PFR1_EL1, + .CRm = 4, + .Op2 = 1, + .offset = __offsetof(struct cpu_desc, id_aa64pfr1), + .fields = id_aa64pfr1_fields, + }, + { /* id_aa64dfr0_el1 */ + .reg = ID_AA64DFR0_EL1, + .CRm = 5, + .Op2 = 0, + .offset = __offsetof(struct cpu_desc, id_aa64dfr0), + .fields = id_aa64dfr0_fields, + }, + { /* id_aa64mmfr0_el1 */ + .reg = ID_AA64MMFR0_EL1, + .CRm = 7, + .Op2 = 0, + .offset = __offsetof(struct cpu_desc, id_aa64mmfr0), + .fields = id_aa64mmfr0_fields, + }, +}; + +#define CPU_DESC_FIELD(desc, idx) \ + *(uint64_t *)((char *)&(desc) + user_regs[(idx)].offset) + +static int +user_mrs_handler(vm_offset_t va, uint32_t insn, struct trapframe *frame, + uint32_t esr) +{ + uint64_t value; + int CRm, Op2, i, reg; + + if ((insn & MRS_MASK) != MRS_VALUE) + return (0); + + /* + * We only emulate Op0 == 3, Op1 == 0, CRn == 0, CRm == {0, 4-7}. + * These are in the EL1 CPU identification space. + * CRm == 0 holds MIDR_EL1, MPIDR_EL1, and REVID_EL1. + * CRm == {4-7} holds the ID_AA64 registers. + * + * For full details see the ARMv8 ARM (ARM DDI 0487C.a) + * Table D9-2 System instruction encodings for non-Debug System + * register accesses. + */ + if (mrs_Op0(insn) != 3 || mrs_Op1(insn) != 0 || mrs_CRn(insn) != 0) + return (0); + + CRm = mrs_CRm(insn); + if (CRm > 7 || (CRm < 4 && CRm != 0)) + return (0); + + Op2 = mrs_Op2(insn); + value = 0; + + for (i = 0; i < nitems(user_regs); i++) { + if (user_regs[i].CRm == CRm && user_regs[i].Op2 == Op2) { + value = CPU_DESC_FIELD(user_cpu_desc, i); + break; + } + } + + if (CRm == 0) { + switch (Op2) { + case 0: + value = READ_SPECIALREG(midr_el1); + break; + case 5: + value = READ_SPECIALREG(mpidr_el1); + break; + case 6: + value = READ_SPECIALREG(revidr_el1); + break; + default: + return (0); + } + } + + /* + * We will handle this instruction, move to the next so we + * don't trap here again. + */ + frame->tf_elr += INSN_SIZE; + + reg = MRS_REGISTER(insn); + /* If reg is 31 then write to xzr, i.e. do nothing */ + if (reg == 31) + return (1); + + if (reg < nitems(frame->tf_x)) + frame->tf_x[reg] = value; + else if (reg == 30) + frame->tf_lr = value; + + return (1); +} + +bool +extract_user_id_field(u_int reg, u_int field_shift, uint8_t *val) +{ + uint64_t value; + int i; + + for (i = 0; i < nitems(user_regs); i++) { + if (user_regs[i].reg == reg) { + value = CPU_DESC_FIELD(user_cpu_desc, i); + *val = value >> field_shift; + return (true); + } + } + + return (false); +} + +bool +get_kernel_reg(u_int reg, uint64_t *val) +{ + int i; + + for (i = 0; i < nitems(user_regs); i++) { + if (user_regs[i].reg == reg) { + *val = CPU_DESC_FIELD(kern_cpu_desc, i); + return (true); + } + } + + return (false); +} + +static uint64_t +update_lower_register(uint64_t val, uint64_t new_val, u_int shift, + int width, bool sign) +{ + uint64_t mask; + uint64_t new_field, old_field; + bool update; + + KASSERT(width > 0 && width < 64, ("%s: Invalid width %d", __func__, + width)); + + mask = (1ul << width) - 1; + new_field = (new_val >> shift) & mask; + old_field = (val >> shift) & mask; + + update = false; + if (sign) { + /* + * The field is signed. Toggle the upper bit so the comparison + * works on unsigned values as this makes positive numbers, + * i.e. those with a 0 bit, larger than negative numbers, + * i.e. those with a 1 bit, in an unsigned comparison. + */ + if ((new_field ^ (1ul << (width - 1))) < + (old_field ^ (1ul << (width - 1)))) + update = true; + } else { + if (new_field < old_field) + update = true; + } + + if (update) { + val &= ~(mask << shift); + val |= new_field << shift; + } + + return (val); +} + +void +update_special_regs(u_int cpu) +{ + struct mrs_field *fields; + uint64_t user_reg, kern_reg, value; + int i, j; + + if (cpu == 0) { + /* Create a user visible cpu description with safe values */ + memset(&user_cpu_desc, 0, sizeof(user_cpu_desc)); + /* Safe values for these registers */ + user_cpu_desc.id_aa64pfr0 = ID_AA64PFR0_AdvSIMD_NONE | + ID_AA64PFR0_FP_NONE | ID_AA64PFR0_EL1_64 | + ID_AA64PFR0_EL0_64; + user_cpu_desc.id_aa64dfr0 = ID_AA64DFR0_DebugVer_8; + } + + for (i = 0; i < nitems(user_regs); i++) { + value = CPU_DESC_FIELD(cpu_desc[cpu], i); + if (cpu == 0) { + kern_reg = value; + user_reg = value; + } else { + kern_reg = CPU_DESC_FIELD(kern_cpu_desc, i); + user_reg = CPU_DESC_FIELD(user_cpu_desc, i); + } + + fields = user_regs[i].fields; + for (j = 0; fields[j].type != 0; j++) { + switch (fields[j].type & MRS_TYPE_MASK) { + case MRS_EXACT: + user_reg &= ~(0xful << fields[j].shift); + user_reg |= + (uint64_t)MRS_EXACT_FIELD(fields[j].type) << + fields[j].shift; + break; + case MRS_LOWER: + user_reg = update_lower_register(user_reg, + value, fields[j].shift, 4, fields[j].sign); + break; + default: + panic("Invalid field type: %d", fields[j].type); + } + kern_reg = update_lower_register(kern_reg, value, + fields[j].shift, 4, fields[j].sign); + } + + CPU_DESC_FIELD(kern_cpu_desc, i) = kern_reg; + CPU_DESC_FIELD(user_cpu_desc, i) = user_reg; + } +} + +/* HWCAP */ +extern u_long elf_hwcap; +bool __read_frequently lse_supported = false; + +bool __read_frequently icache_aliasing = false; +bool __read_frequently icache_vmid = false; + +int64_t dcache_line_size; /* The minimum D cache line size */ +int64_t icache_line_size; /* The minimum I cache line size */ +int64_t idcache_line_size; /* The minimum cache line size */ + +static void +identify_cpu_sysinit(void *dummy __unused) +{ + int cpu; + bool dic, idc; + + dic = (allow_dic != 0); + idc = (allow_idc != 0); + + CPU_FOREACH(cpu) { + check_cpu_regs(cpu); + if (cpu != 0) + update_special_regs(cpu); + + if (CTR_DIC_VAL(cpu_desc[cpu].ctr) == 0) + dic = false; + if (CTR_IDC_VAL(cpu_desc[cpu].ctr) == 0) + idc = false; + } + + /* Exposed to userspace as AT_HWCAP */ + elf_hwcap = parse_cpu_features_hwcap(); + + if (dic && idc) { + arm64_icache_sync_range = &arm64_dic_idc_icache_sync_range; + if (bootverbose) + printf("Enabling DIC & IDC ICache sync\n"); + } + + if ((elf_hwcap & HWCAP_ATOMICS) != 0) { + lse_supported = true; + if (bootverbose) + printf("Enabling LSE atomics in the kernel\n"); + } +#ifdef LSE_ATOMICS + if (!lse_supported) + panic("CPU does not support LSE atomic instructions"); +#endif + + install_undef_handler(true, user_mrs_handler); +} +SYSINIT(identify_cpu, SI_SUB_CPU, SI_ORDER_ANY, identify_cpu_sysinit, NULL); + +static void +cpu_features_sysinit(void *dummy __unused) +{ + u_int cpu; + + CPU_FOREACH(cpu) + print_cpu_features(cpu); +} +SYSINIT(cpu_features, SI_SUB_SMP, SI_ORDER_ANY, cpu_features_sysinit, NULL); + +static u_long +parse_cpu_features_hwcap(void) +{ + u_long hwcap = 0; + + if (ID_AA64ISAR0_DP_VAL(user_cpu_desc.id_aa64isar0) == + ID_AA64ISAR0_DP_IMPL) + hwcap |= HWCAP_ASIMDDP; + + if (ID_AA64ISAR0_SM4_VAL(user_cpu_desc.id_aa64isar0) == + ID_AA64ISAR0_SM4_IMPL) + hwcap |= HWCAP_SM4; + + if (ID_AA64ISAR0_SM3_VAL(user_cpu_desc.id_aa64isar0) == + ID_AA64ISAR0_SM3_IMPL) + hwcap |= HWCAP_SM3; + + if (ID_AA64ISAR0_RDM_VAL(user_cpu_desc.id_aa64isar0) == + ID_AA64ISAR0_RDM_IMPL) + hwcap |= HWCAP_ASIMDRDM; + + if (ID_AA64ISAR0_Atomic_VAL(user_cpu_desc.id_aa64isar0) == + ID_AA64ISAR0_Atomic_IMPL) + hwcap |= HWCAP_ATOMICS; + + if (ID_AA64ISAR0_CRC32_VAL(user_cpu_desc.id_aa64isar0) == + ID_AA64ISAR0_CRC32_BASE) + hwcap |= HWCAP_CRC32; + + switch (ID_AA64ISAR0_SHA2_VAL(user_cpu_desc.id_aa64isar0)) { + case ID_AA64ISAR0_SHA2_BASE: + hwcap |= HWCAP_SHA2; + break; + case ID_AA64ISAR0_SHA2_512: + hwcap |= HWCAP_SHA2 | HWCAP_SHA512; + break; + default: + break; + } + + if (ID_AA64ISAR0_SHA1_VAL(user_cpu_desc.id_aa64isar0)) + hwcap |= HWCAP_SHA1; + + switch (ID_AA64ISAR0_AES_VAL(user_cpu_desc.id_aa64isar0)) { + case ID_AA64ISAR0_AES_BASE: + hwcap |= HWCAP_AES; + break; + case ID_AA64ISAR0_AES_PMULL: + hwcap |= HWCAP_PMULL | HWCAP_AES; + break; + default: + break; + } + + if (ID_AA64ISAR1_LRCPC_VAL(user_cpu_desc.id_aa64isar1) == + ID_AA64ISAR1_LRCPC_RCPC_8_3) + hwcap |= HWCAP_LRCPC; + + if (ID_AA64ISAR1_FCMA_VAL(user_cpu_desc.id_aa64isar1) == + ID_AA64ISAR1_FCMA_IMPL) + hwcap |= HWCAP_FCMA; + + if (ID_AA64ISAR1_JSCVT_VAL(user_cpu_desc.id_aa64isar1) == + ID_AA64ISAR1_JSCVT_IMPL) + hwcap |= HWCAP_JSCVT; + + if (ID_AA64ISAR1_DPB_VAL(user_cpu_desc.id_aa64isar1) == + ID_AA64ISAR1_DPB_DCCVAP) + hwcap |= HWCAP_DCPOP; + + if (ID_AA64PFR0_SVE_VAL(user_cpu_desc.id_aa64pfr0) == + ID_AA64PFR0_SVE_IMPL) + hwcap |= HWCAP_SVE; + + switch (ID_AA64PFR0_AdvSIMD_VAL(user_cpu_desc.id_aa64pfr0)) { + case ID_AA64PFR0_AdvSIMD_IMPL: + hwcap |= HWCAP_ASIMD; + break; + case ID_AA64PFR0_AdvSIMD_HP: + hwcap |= HWCAP_ASIMD | HWCAP_ASIMDDP; + break; + default: + break; + } + + switch (ID_AA64PFR0_FP_VAL(user_cpu_desc.id_aa64pfr0)) { + case ID_AA64PFR0_FP_IMPL: + hwcap |= HWCAP_FP; + break; + case ID_AA64PFR0_FP_HP: + hwcap |= HWCAP_FP | HWCAP_FPHP; + break; + default: + break; + } + + return (hwcap); +} + +static void +print_ctr_fields(struct sbuf *sb, uint64_t reg, void *arg) +{ + + sbuf_printf(sb, "%u byte D-cacheline,", CTR_DLINE_SIZE(reg)); + sbuf_printf(sb, "%u byte I-cacheline,", CTR_ILINE_SIZE(reg)); + reg &= ~(CTR_DLINE_MASK | CTR_ILINE_MASK); + + switch(CTR_L1IP_VAL(reg)) { + case CTR_L1IP_VPIPT: + sbuf_printf(sb, "VPIPT"); + break; + case CTR_L1IP_AIVIVT: + sbuf_printf(sb, "AIVIVT"); + break; + case CTR_L1IP_VIPT: + sbuf_printf(sb, "VIPT"); + break; + case CTR_L1IP_PIPT: + sbuf_printf(sb, "PIPT"); + break; + } + sbuf_printf(sb, " ICache,"); + reg &= ~CTR_L1IP_MASK; + + sbuf_printf(sb, "%d byte ERG,", CTR_ERG_SIZE(reg)); + sbuf_printf(sb, "%d byte CWG", CTR_CWG_SIZE(reg)); + reg &= ~(CTR_ERG_MASK | CTR_CWG_MASK); + + if (CTR_IDC_VAL(reg) != 0) + sbuf_printf(sb, ",IDC"); + if (CTR_DIC_VAL(reg) != 0) + sbuf_printf(sb, ",DIC"); + reg &= ~(CTR_IDC_MASK | CTR_DIC_MASK); + reg &= ~CTR_RES1; + + if (reg != 0) + sbuf_printf(sb, ",%lx", reg); +} + +static void +print_register(struct sbuf *sb, const char *reg_name, uint64_t reg, + void (*print_fields)(struct sbuf *, uint64_t, void *), void *arg) +{ + + sbuf_printf(sb, "%29s = <", reg_name); + + print_fields(sb, reg, arg); + + sbuf_finish(sb); + printf("%s>\n", sbuf_data(sb)); + sbuf_clear(sb); +} + +static void +print_id_fields(struct sbuf *sb, uint64_t reg, void *arg) +{ + struct mrs_field *fields = arg; + struct mrs_field_value *fv; + int field, i, j, printed; + +#define SEP_STR ((printed++) == 0) ? "" : "," + printed = 0; + for (i = 0; fields[i].type != 0; i++) { + fv = fields[i].values; + + /* TODO: Handle with an unknown message */ + if (fv == NULL) + continue; + + field = (reg & fields[i].mask) >> fields[i].shift; + for (j = 0; fv[j].desc != NULL; j++) { + if ((fv[j].value >> fields[i].shift) != field) + continue; + + if (fv[j].desc[0] != '\0') + sbuf_printf(sb, "%s%s", SEP_STR, fv[j].desc); + break; + } + if (fv[j].desc == NULL) + sbuf_printf(sb, "%sUnknown %s(%x)", SEP_STR, + fields[i].name, field); + + reg &= ~(0xful << fields[i].shift); + } + + if (reg != 0) + sbuf_printf(sb, "%s%#lx", SEP_STR, reg); +#undef SEP_STR +} + +static void +print_id_register(struct sbuf *sb, const char *reg_name, uint64_t reg, + struct mrs_field *fields) +{ + + print_register(sb, reg_name, reg, print_id_fields, fields); +} + +static void +print_cpu_features(u_int cpu) +{ + struct sbuf *sb; + + sb = sbuf_new_auto(); + sbuf_printf(sb, "CPU%3d: %s %s r%dp%d", cpu, + cpu_desc[cpu].cpu_impl_name, cpu_desc[cpu].cpu_part_name, + cpu_desc[cpu].cpu_variant, cpu_desc[cpu].cpu_revision); + + sbuf_cat(sb, " affinity:"); + switch(cpu_aff_levels) { + default: + case 4: + sbuf_printf(sb, " %2d", CPU_AFF3(cpu_desc[cpu].mpidr)); + /* FALLTHROUGH */ + case 3: + sbuf_printf(sb, " %2d", CPU_AFF2(cpu_desc[cpu].mpidr)); + /* FALLTHROUGH */ + case 2: + sbuf_printf(sb, " %2d", CPU_AFF1(cpu_desc[cpu].mpidr)); + /* FALLTHROUGH */ + case 1: + case 0: /* On UP this will be zero */ + sbuf_printf(sb, " %2d", CPU_AFF0(cpu_desc[cpu].mpidr)); + break; + } + sbuf_finish(sb); + printf("%s\n", sbuf_data(sb)); + sbuf_clear(sb); + + /* + * There is a hardware errata where, if one CPU is performing a TLB + * invalidation while another is performing a store-exclusive the + * store-exclusive may return the wrong status. A workaround seems + * to be to use an IPI to invalidate on each CPU, however given the + * limited number of affected units (pass 1.1 is the evaluation + * hardware revision), and the lack of information from Cavium + * this has not been implemented. + * + * At the time of writing this the only information is from: + * https://lkml.org/lkml/2016/8/4/722 + */ + /* + * XXX: CPU_MATCH_ERRATA_CAVIUM_THUNDERX_1_1 on its own also + * triggers on pass 2.0+. + */ + if (cpu == 0 && CPU_VAR(PCPU_GET(midr)) == 0 && + CPU_MATCH_ERRATA_CAVIUM_THUNDERX_1_1) + printf("WARNING: ThunderX Pass 1.1 detected.\nThis has known " + "hardware bugs that may cause the incorrect operation of " + "atomic operations.\n"); + + /* Cache Type Register */ + if (cpu == 0 || (cpu_print_regs & PRINT_CTR_EL0) != 0) { + print_register(sb, "Cache Type", + cpu_desc[cpu].ctr, print_ctr_fields, NULL); + } + + /* AArch64 Instruction Set Attribute Register 0 */ + if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_ISAR0) != 0) + print_id_register(sb, "Instruction Set Attributes 0", + cpu_desc[cpu].id_aa64isar0, id_aa64isar0_fields); + + /* AArch64 Instruction Set Attribute Register 1 */ + if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_ISAR1) != 0) + print_id_register(sb, "Instruction Set Attributes 1", + cpu_desc[cpu].id_aa64isar1, id_aa64isar1_fields); + + /* AArch64 Processor Feature Register 0 */ + if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_PFR0) != 0) + print_id_register(sb, "Processor Features 0", + cpu_desc[cpu].id_aa64pfr0, id_aa64pfr0_fields); + + /* AArch64 Processor Feature Register 1 */ + if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_PFR1) != 0) + print_id_register(sb, "Processor Features 1", + cpu_desc[cpu].id_aa64pfr1, id_aa64pfr1_fields); + + /* AArch64 Memory Model Feature Register 0 */ + if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_MMFR0) != 0) + print_id_register(sb, "Memory Model Features 0", + cpu_desc[cpu].id_aa64mmfr0, id_aa64mmfr0_fields); + + /* AArch64 Memory Model Feature Register 1 */ + if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_MMFR1) != 0) + print_id_register(sb, "Memory Model Features 1", + cpu_desc[cpu].id_aa64mmfr1, id_aa64mmfr1_fields); + + /* AArch64 Memory Model Feature Register 2 */ + if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_MMFR2) != 0) + print_id_register(sb, "Memory Model Features 2", + cpu_desc[cpu].id_aa64mmfr2, id_aa64mmfr2_fields); + + /* AArch64 Debug Feature Register 0 */ + if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_DFR0) != 0) + print_id_register(sb, "Debug Features 0", + cpu_desc[cpu].id_aa64dfr0, id_aa64dfr0_fields); + + /* AArch64 Memory Model Feature Register 1 */ + if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_DFR1) != 0) + print_id_register(sb, "Debug Features 1", + cpu_desc[cpu].id_aa64dfr1, id_aa64dfr1_fields); + + /* AArch64 Auxiliary Feature Register 0 */ + if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_AFR0) != 0) + print_id_register(sb, "Auxiliary Features 0", + cpu_desc[cpu].id_aa64afr0, id_aa64afr0_fields); + + /* AArch64 Auxiliary Feature Register 1 */ + if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_AFR1) != 0) + print_id_register(sb, "Auxiliary Features 1", + cpu_desc[cpu].id_aa64afr1, id_aa64afr1_fields); + + sbuf_delete(sb); + sb = NULL; +#undef SEP_STR +} + +void +identify_cache(uint64_t ctr) +{ + + /* Identify the L1 cache type */ + switch (CTR_L1IP_VAL(ctr)) { + case CTR_L1IP_PIPT: + break; + case CTR_L1IP_VPIPT: + icache_vmid = true; + break; + default: + case CTR_L1IP_VIPT: + icache_aliasing = true; + break; + } + + if (dcache_line_size == 0) { + KASSERT(icache_line_size == 0, ("%s: i-cacheline size set: %ld", + __func__, icache_line_size)); + + /* Get the D cache line size */ + dcache_line_size = CTR_DLINE_SIZE(ctr); + /* And the same for the I cache */ + icache_line_size = CTR_ILINE_SIZE(ctr); + + idcache_line_size = MIN(dcache_line_size, icache_line_size); + } + + if (dcache_line_size != CTR_DLINE_SIZE(ctr)) { + printf("WARNING: D-cacheline size mismatch %ld != %d\n", + dcache_line_size, CTR_DLINE_SIZE(ctr)); + } + + if (icache_line_size != CTR_ILINE_SIZE(ctr)) { + printf("WARNING: I-cacheline size mismatch %ld != %d\n", + icache_line_size, CTR_ILINE_SIZE(ctr)); + } +} + +void +identify_cpu(u_int cpu) +{ + u_int midr; + u_int impl_id; + u_int part_id; + size_t i; + const struct cpu_parts *cpu_partsp = NULL; + + midr = get_midr(); + + impl_id = CPU_IMPL(midr); + for (i = 0; i < nitems(cpu_implementers); i++) { + if (impl_id == cpu_implementers[i].impl_id || + cpu_implementers[i].impl_id == 0) { + cpu_desc[cpu].cpu_impl = impl_id; + cpu_desc[cpu].cpu_impl_name = + cpu_implementers[i].impl_name; + cpu_partsp = cpu_implementers[i].cpu_parts; + break; + } + } + + part_id = CPU_PART(midr); + for (i = 0; &cpu_partsp[i] != NULL; i++) { + if (part_id == cpu_partsp[i].part_id || + cpu_partsp[i].part_id == 0) { + cpu_desc[cpu].cpu_part_num = part_id; + cpu_desc[cpu].cpu_part_name = cpu_partsp[i].part_name; + break; + } + } + + cpu_desc[cpu].cpu_revision = CPU_REV(midr); + cpu_desc[cpu].cpu_variant = CPU_VAR(midr); + + snprintf(cpu_model, sizeof(cpu_model), "%s %s r%dp%d", + cpu_desc[cpu].cpu_impl_name, cpu_desc[cpu].cpu_part_name, + cpu_desc[cpu].cpu_variant, cpu_desc[cpu].cpu_revision); + + /* Save affinity for current CPU */ + cpu_desc[cpu].mpidr = get_mpidr(); + CPU_AFFINITY(cpu) = cpu_desc[cpu].mpidr & CPU_AFF_MASK; + + cpu_desc[cpu].ctr = READ_SPECIALREG(ctr_el0); + cpu_desc[cpu].id_aa64dfr0 = READ_SPECIALREG(id_aa64dfr0_el1); + cpu_desc[cpu].id_aa64dfr1 = READ_SPECIALREG(id_aa64dfr1_el1); + cpu_desc[cpu].id_aa64isar0 = READ_SPECIALREG(id_aa64isar0_el1); + cpu_desc[cpu].id_aa64isar1 = READ_SPECIALREG(id_aa64isar1_el1); + cpu_desc[cpu].id_aa64mmfr0 = READ_SPECIALREG(id_aa64mmfr0_el1); + cpu_desc[cpu].id_aa64mmfr1 = READ_SPECIALREG(id_aa64mmfr1_el1); + cpu_desc[cpu].id_aa64mmfr2 = READ_SPECIALREG(id_aa64mmfr2_el1); + cpu_desc[cpu].id_aa64pfr0 = READ_SPECIALREG(id_aa64pfr0_el1); + cpu_desc[cpu].id_aa64pfr1 = READ_SPECIALREG(id_aa64pfr1_el1); +} + +static void +check_cpu_regs(u_int cpu) +{ + + switch (cpu_aff_levels) { + case 0: + if (CPU_AFF0(cpu_desc[cpu].mpidr) != + CPU_AFF0(cpu_desc[0].mpidr)) + cpu_aff_levels = 1; + /* FALLTHROUGH */ + case 1: + if (CPU_AFF1(cpu_desc[cpu].mpidr) != + CPU_AFF1(cpu_desc[0].mpidr)) + cpu_aff_levels = 2; + /* FALLTHROUGH */ + case 2: + if (CPU_AFF2(cpu_desc[cpu].mpidr) != + CPU_AFF2(cpu_desc[0].mpidr)) + cpu_aff_levels = 3; + /* FALLTHROUGH */ + case 3: + if (CPU_AFF3(cpu_desc[cpu].mpidr) != + CPU_AFF3(cpu_desc[0].mpidr)) + cpu_aff_levels = 4; + break; + } + + if (cpu_desc[cpu].id_aa64afr0 != cpu_desc[0].id_aa64afr0) + cpu_print_regs |= PRINT_ID_AA64_AFR0; + if (cpu_desc[cpu].id_aa64afr1 != cpu_desc[0].id_aa64afr1) + cpu_print_regs |= PRINT_ID_AA64_AFR1; + + if (cpu_desc[cpu].id_aa64dfr0 != cpu_desc[0].id_aa64dfr0) + cpu_print_regs |= PRINT_ID_AA64_DFR0; + if (cpu_desc[cpu].id_aa64dfr1 != cpu_desc[0].id_aa64dfr1) + cpu_print_regs |= PRINT_ID_AA64_DFR1; + + if (cpu_desc[cpu].id_aa64isar0 != cpu_desc[0].id_aa64isar0) + cpu_print_regs |= PRINT_ID_AA64_ISAR0; + if (cpu_desc[cpu].id_aa64isar1 != cpu_desc[0].id_aa64isar1) + cpu_print_regs |= PRINT_ID_AA64_ISAR1; + + if (cpu_desc[cpu].id_aa64mmfr0 != cpu_desc[0].id_aa64mmfr0) + cpu_print_regs |= PRINT_ID_AA64_MMFR0; + if (cpu_desc[cpu].id_aa64mmfr1 != cpu_desc[0].id_aa64mmfr1) + cpu_print_regs |= PRINT_ID_AA64_MMFR1; + if (cpu_desc[cpu].id_aa64mmfr2 != cpu_desc[0].id_aa64mmfr2) + cpu_print_regs |= PRINT_ID_AA64_MMFR2; + + if (cpu_desc[cpu].id_aa64pfr0 != cpu_desc[0].id_aa64pfr0) + cpu_print_regs |= PRINT_ID_AA64_PFR0; + if (cpu_desc[cpu].id_aa64pfr1 != cpu_desc[0].id_aa64pfr1) + cpu_print_regs |= PRINT_ID_AA64_PFR1; + + if (cpu_desc[cpu].ctr != cpu_desc[0].ctr) { + /* + * If the cache type register is different we may + * have a different l1 cache type. + */ + identify_cache(cpu_desc[cpu].ctr); + cpu_print_regs |= PRINT_CTR_EL0; + } +} diff --git a/sys/arm64/arm64/in_cksum.c b/sys/arm64/arm64/in_cksum.c new file mode 100644 index 000000000000..ae02e91d9203 --- /dev/null +++ b/sys/arm64/arm64/in_cksum.c @@ -0,0 +1,241 @@ +/* $NetBSD: in_cksum.c,v 1.7 1997/09/02 13:18:15 thorpej Exp $ */ + +/*- + * Copyright (c) 1988, 1992, 1993 + * The Regents of the University of California. All rights reserved. + * Copyright (c) 1996 + * Matt Thomas <matt@3am-software.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93 + */ + +#include <sys/cdefs.h> /* RCS ID & Copyright macro defns */ +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/mbuf.h> +#include <sys/systm.h> +#include <netinet/in_systm.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <machine/in_cksum.h> + +/* + * Checksum routine for Internet Protocol family headers + * (Portable Alpha version). + * + * This routine is very heavily used in the network + * code and should be modified for each CPU to be as fast as possible. + */ + +#define ADDCARRY(x) (x > 65535 ? x -= 65535 : x) +#define REDUCE32 \ + { \ + q_util.q = sum; \ + sum = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \ + } +#define REDUCE16 \ + { \ + q_util.q = sum; \ + l_util.l = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \ + sum = l_util.s[0] + l_util.s[1]; \ + ADDCARRY(sum); \ + } + +static const u_int32_t in_masks[] = { + /*0 bytes*/ /*1 byte*/ /*2 bytes*/ /*3 bytes*/ + 0x00000000, 0x000000FF, 0x0000FFFF, 0x00FFFFFF, /* offset 0 */ + 0x00000000, 0x0000FF00, 0x00FFFF00, 0xFFFFFF00, /* offset 1 */ + 0x00000000, 0x00FF0000, 0xFFFF0000, 0xFFFF0000, /* offset 2 */ + 0x00000000, 0xFF000000, 0xFF000000, 0xFF000000, /* offset 3 */ +}; + +union l_util { + u_int16_t s[2]; + u_int32_t l; +}; +union q_util { + u_int16_t s[4]; + u_int32_t l[2]; + u_int64_t q; +}; + +static u_int64_t +in_cksumdata(const void *buf, int len) +{ + const u_int32_t *lw = (const u_int32_t *) buf; + u_int64_t sum = 0; + u_int64_t prefilled; + int offset; + union q_util q_util; + + if ((3 & (long) lw) == 0 && len == 20) { + sum = (u_int64_t) lw[0] + lw[1] + lw[2] + lw[3] + lw[4]; + REDUCE32; + return sum; + } + + if ((offset = 3 & (long) lw) != 0) { + const u_int32_t *masks = in_masks + (offset << 2); + lw = (u_int32_t *) (((long) lw) - offset); + sum = *lw++ & masks[len >= 3 ? 3 : len]; + len -= 4 - offset; + if (len <= 0) { + REDUCE32; + return sum; + } + } +#if 0 + /* + * Force to cache line boundary. + */ + offset = 32 - (0x1f & (long) lw); + if (offset < 32 && len > offset) { + len -= offset; + if (4 & offset) { + sum += (u_int64_t) lw[0]; + lw += 1; + } + if (8 & offset) { + sum += (u_int64_t) lw[0] + lw[1]; + lw += 2; + } + if (16 & offset) { + sum += (u_int64_t) lw[0] + lw[1] + lw[2] + lw[3]; + lw += 4; + } + } +#endif + /* + * access prefilling to start load of next cache line. + * then add current cache line + * save result of prefilling for loop iteration. + */ + prefilled = lw[0]; + while ((len -= 32) >= 4) { + u_int64_t prefilling = lw[8]; + sum += prefilled + lw[1] + lw[2] + lw[3] + + lw[4] + lw[5] + lw[6] + lw[7]; + lw += 8; + prefilled = prefilling; + } + if (len >= 0) { + sum += prefilled + lw[1] + lw[2] + lw[3] + + lw[4] + lw[5] + lw[6] + lw[7]; + lw += 8; + } else { + len += 32; + } + while ((len -= 16) >= 0) { + sum += (u_int64_t) lw[0] + lw[1] + lw[2] + lw[3]; + lw += 4; + } + len += 16; + while ((len -= 4) >= 0) { + sum += (u_int64_t) *lw++; + } + len += 4; + if (len > 0) + sum += (u_int64_t) (in_masks[len] & *lw); + REDUCE32; + return sum; +} + +u_short +in_addword(u_short a, u_short b) +{ + u_int64_t sum = a + b; + + ADDCARRY(sum); + return (sum); +} + +u_short +in_pseudo(u_int32_t a, u_int32_t b, u_int32_t c) +{ + u_int64_t sum; + union q_util q_util; + union l_util l_util; + + sum = (u_int64_t) a + b + c; + REDUCE16; + return (sum); +} + +u_short +in_cksum_skip(struct mbuf *m, int len, int skip) +{ + u_int64_t sum = 0; + int mlen = 0; + int clen = 0; + caddr_t addr; + union q_util q_util; + union l_util l_util; + + len -= skip; + for (; skip && m; m = m->m_next) { + if (m->m_len > skip) { + mlen = m->m_len - skip; + addr = mtod(m, caddr_t) + skip; + goto skip_start; + } else { + skip -= m->m_len; + } + } + + for (; m && len; m = m->m_next) { + if (m->m_len == 0) + continue; + mlen = m->m_len; + addr = mtod(m, caddr_t); +skip_start: + if (len < mlen) + mlen = len; + if ((clen ^ (long) addr) & 1) + sum += in_cksumdata(addr, mlen) << 8; + else + sum += in_cksumdata(addr, mlen); + + clen += mlen; + len -= mlen; + } + REDUCE16; + return (~sum & 0xffff); +} + +u_int in_cksum_hdr(const struct ip *ip) +{ + u_int64_t sum = in_cksumdata(ip, sizeof(struct ip)); + union q_util q_util; + union l_util l_util; + REDUCE16; + return (~sum & 0xffff); +} diff --git a/sys/arm64/arm64/locore.S b/sys/arm64/arm64/locore.S new file mode 100644 index 000000000000..b9147df32815 --- /dev/null +++ b/sys/arm64/arm64/locore.S @@ -0,0 +1,859 @@ +/*- + * Copyright (c) 2012-2014 Andrew Turner + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include "assym.inc" +#include "opt_kstack_pages.h" +#include <sys/syscall.h> +#include <machine/asm.h> +#include <machine/armreg.h> +#include <machine/hypervisor.h> +#include <machine/param.h> +#include <machine/pte.h> +#include <machine/vm.h> +#include <machine/vmparam.h> + +#define VIRT_BITS 48 +#define DMAP_TABLES ((DMAP_MAX_ADDRESS - DMAP_MIN_ADDRESS) >> L0_SHIFT) + + .globl kernbase + .set kernbase, KERNBASE + + +/* U-Boot booti related constants. */ +#if defined(LINUX_BOOT_ABI) +#define FDT_MAGIC 0xEDFE0DD0 /* FDT blob Magic */ + +#ifndef UBOOT_IMAGE_OFFSET +#define UBOOT_IMAGE_OFFSET 0 /* Image offset from start of */ +#endif /* 2 MiB page */ + +#ifndef UBOOT_IMAGE_SIZE /* Total size of image */ +#define UBOOT_IMAGE_SIZE _end - _start +#endif + +#ifndef UBOOT_IMAGE_FLAGS +#define UBOOT_IMAGE_FLAGS 0 /* LE kernel, unspecified */ +#endif /* page size */ +#endif /* defined(LINUX_BOOT_ABI) */ + +/* + * We assume: + * MMU on with an identity map, or off + * D-Cache: off + * I-Cache: on or off + * We are loaded at a 2MiB aligned address + */ + + .text + .globl _start +_start: +#if defined(LINUX_BOOT_ABI) + /* U-boot image header */ + b 1f /* code 0 */ + .long 0 /* code 1 */ + .quad UBOOT_IMAGE_OFFSET /* Image offset in 2 MiB page, LE */ + .quad UBOOT_IMAGE_SIZE /* Image size, LE */ + .quad UBOOT_IMAGE_FLAGS /* Flags for kernel. LE */ + .quad 0 /* Reserved */ + .quad 0 /* Reserved */ + .quad 0 /* Reserved */ + .long 0x644d5241 /* Magic "ARM\x64", LE */ + .long 0 /* Reserved for PE COFF offset*/ +1: +#endif /* defined(LINUX_BOOT_ABI) */ + + /* Drop to EL1 */ + bl drop_to_el1 + + /* + * Disable the MMU. We may have entered the kernel with it on and + * will need to update the tables later. If this has been set up + * with anything other than a VA == PA map then this will fail, + * but in this case the code to find where we are running from + * would have also failed. + */ + dsb sy + mrs x2, sctlr_el1 + bic x2, x2, SCTLR_M + msr sctlr_el1, x2 + isb + + /* Set the context id */ + msr contextidr_el1, xzr + + /* Get the virt -> phys offset */ + bl get_virt_delta + + /* + * At this point: + * x29 = PA - VA + * x28 = Our physical load address + */ + + /* Create the page tables */ + bl create_pagetables + + /* + * At this point: + * x27 = TTBR0 table + * x26 = Kernel L1 table + * x24 = TTBR1 table + */ + + /* Enable the mmu */ + bl start_mmu + + /* Load the new ttbr0 pagetable */ + adr x27, pagetable_l0_ttbr0 + + /* Jump to the virtual address space */ + ldr x15, .Lvirtdone + br x15 + +virtdone: + /* Set up the stack */ + adr x25, initstack_end + mov sp, x25 + sub sp, sp, #PCB_SIZE + + /* Zero the BSS */ + ldr x15, .Lbss + ldr x14, .Lend +1: + str xzr, [x15], #8 + cmp x15, x14 + b.lo 1b + + /* Backup the module pointer */ + mov x1, x0 + + /* Make the page table base a virtual address */ + sub x26, x26, x29 + sub x24, x24, x29 + + sub sp, sp, #BOOTPARAMS_SIZE + mov x0, sp + + /* Degate the delda so it is VA -> PA */ + neg x29, x29 + + str x1, [x0, #BP_MODULEP] + str x26, [x0, #BP_KERN_L1PT] + str x29, [x0, #BP_KERN_DELTA] + adr x25, initstack + str x25, [x0, #BP_KERN_STACK] + str x24, [x0, #BP_KERN_L0PT] + str x23, [x0, #BP_BOOT_EL] + str x27, [x0, 40] /* kern_ttbr0 */ + + /* trace back starts here */ + mov fp, #0 + /* Branch to C code */ + bl initarm + bl mi_startup + + /* We should not get here */ + brk 0 + + .align 3 +.Lvirtdone: + .quad virtdone +.Lbss: + .quad __bss_start +.Lend: + .quad _end + +#ifdef SMP +/* + * mpentry(unsigned long) + * + * Called by a core when it is being brought online. + * The data in x0 is passed straight to init_secondary. + */ +ENTRY(mpentry) + /* Disable interrupts */ + msr daifset, #2 + + /* Drop to EL1 */ + bl drop_to_el1 + + /* Set the context id */ + msr contextidr_el1, xzr + + /* Load the kernel page table */ + adr x24, pagetable_l0_ttbr1 + /* Load the identity page table */ + adr x27, pagetable_l0_ttbr0_boostrap + + /* Enable the mmu */ + bl start_mmu + + /* Load the new ttbr0 pagetable */ + adr x27, pagetable_l0_ttbr0 + + /* Jump to the virtual address space */ + ldr x15, =mp_virtdone + br x15 + +mp_virtdone: + /* Start using the AP boot stack */ + ldr x4, =bootstack + ldr x4, [x4] + mov sp, x4 + + /* Load the kernel ttbr0 pagetable */ + msr ttbr0_el1, x27 + isb + + /* Invalidate the TLB */ + tlbi vmalle1 + dsb sy + isb + + b init_secondary +END(mpentry) +#endif + +/* + * If we are started in EL2, configure the required hypervisor + * registers and drop to EL1. + */ +drop_to_el1: + mrs x23, CurrentEL + lsr x23, x23, #2 + cmp x23, #0x2 + b.eq 1f + ret +1: + /* Configure the Hypervisor */ + mov x2, #(HCR_RW) + msr hcr_el2, x2 + + /* Load the Virtualization Process ID Register */ + mrs x2, midr_el1 + msr vpidr_el2, x2 + + /* Load the Virtualization Multiprocess ID Register */ + mrs x2, mpidr_el1 + msr vmpidr_el2, x2 + + /* Set the bits that need to be 1 in sctlr_el1 */ + ldr x2, .Lsctlr_res1 + msr sctlr_el1, x2 + + /* Don't trap to EL2 for exceptions */ + mov x2, #CPTR_RES1 + msr cptr_el2, x2 + + /* Don't trap to EL2 for CP15 traps */ + msr hstr_el2, xzr + + /* Enable access to the physical timers at EL1 */ + mrs x2, cnthctl_el2 + orr x2, x2, #(CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN) + msr cnthctl_el2, x2 + + /* Set the counter offset to a known value */ + msr cntvoff_el2, xzr + + /* Hypervisor trap functions */ + adr x2, hyp_vectors + msr vbar_el2, x2 + + mov x2, #(PSR_F | PSR_I | PSR_A | PSR_D | PSR_M_EL1h) + msr spsr_el2, x2 + + /* Configure GICv3 CPU interface */ + mrs x2, id_aa64pfr0_el1 + /* Extract GIC bits from the register */ + ubfx x2, x2, #ID_AA64PFR0_GIC_SHIFT, #ID_AA64PFR0_GIC_BITS + /* GIC[3:0] == 0001 - GIC CPU interface via special regs. supported */ + cmp x2, #(ID_AA64PFR0_GIC_CPUIF_EN >> ID_AA64PFR0_GIC_SHIFT) + b.ne 2f + + mrs x2, icc_sre_el2 + orr x2, x2, #ICC_SRE_EL2_EN /* Enable access from insecure EL1 */ + orr x2, x2, #ICC_SRE_EL2_SRE /* Enable system registers */ + msr icc_sre_el2, x2 +2: + + /* Set the address to return to our return address */ + msr elr_el2, x30 + isb + + eret + + .align 3 +.Lsctlr_res1: + .quad SCTLR_RES1 + +#define VECT_EMPTY \ + .align 7; \ + 1: b 1b + + .align 11 +hyp_vectors: + VECT_EMPTY /* Synchronous EL2t */ + VECT_EMPTY /* IRQ EL2t */ + VECT_EMPTY /* FIQ EL2t */ + VECT_EMPTY /* Error EL2t */ + + VECT_EMPTY /* Synchronous EL2h */ + VECT_EMPTY /* IRQ EL2h */ + VECT_EMPTY /* FIQ EL2h */ + VECT_EMPTY /* Error EL2h */ + + VECT_EMPTY /* Synchronous 64-bit EL1 */ + VECT_EMPTY /* IRQ 64-bit EL1 */ + VECT_EMPTY /* FIQ 64-bit EL1 */ + VECT_EMPTY /* Error 64-bit EL1 */ + + VECT_EMPTY /* Synchronous 32-bit EL1 */ + VECT_EMPTY /* IRQ 32-bit EL1 */ + VECT_EMPTY /* FIQ 32-bit EL1 */ + VECT_EMPTY /* Error 32-bit EL1 */ + +/* + * Get the delta between the physical address we were loaded to and the + * virtual address we expect to run from. This is used when building the + * initial page table. + */ +get_virt_delta: + /* Load the physical address of virt_map */ + adr x29, virt_map + /* Load the virtual address of virt_map stored in virt_map */ + ldr x28, [x29] + /* Find PA - VA as PA' = VA' - VA + PA = VA' + (PA - VA) = VA' + x29 */ + sub x29, x29, x28 + /* Find the load address for the kernel */ + mov x28, #(KERNBASE) + add x28, x28, x29 + ret + + .align 3 +virt_map: + .quad virt_map + +/* + * This builds the page tables containing the identity map, and the kernel + * virtual map. + * + * It relys on: + * We were loaded to an address that is on a 2MiB boundary + * All the memory must not cross a 1GiB boundaty + * x28 contains the physical address we were loaded from + * + * TODO: This is out of date. + * There are at least 5 pages before that address for the page tables + * The pages used are: + * - The Kernel L2 table + * - The Kernel L1 table + * - The Kernel L0 table (TTBR1) + * - The identity (PA = VA) L1 table + * - The identity (PA = VA) L0 table (TTBR0) + * - The DMAP L1 tables + */ +create_pagetables: + /* Save the Link register */ + mov x5, x30 + + /* Clean the page table */ + adr x6, pagetable + mov x26, x6 + adr x27, pagetable_end +1: + stp xzr, xzr, [x6], #16 + stp xzr, xzr, [x6], #16 + stp xzr, xzr, [x6], #16 + stp xzr, xzr, [x6], #16 + cmp x6, x27 + b.lo 1b + + /* + * Build the TTBR1 maps. + */ + + /* Find the size of the kernel */ + mov x6, #(KERNBASE) + +#if defined(LINUX_BOOT_ABI) + /* X19 is used as 'map FDT data' flag */ + mov x19, xzr + + /* No modules or FDT pointer ? */ + cbz x0, booti_no_fdt + + /* Test if modulep points to modules descriptor or to FDT */ + ldr w8, [x0] + ldr w7, =FDT_MAGIC + cmp w7, w8 + b.eq booti_fdt +#endif + + /* Booted with modules pointer */ + /* Find modulep - begin */ + sub x8, x0, x6 + /* Add two 2MiB pages for the module data and round up */ + ldr x7, =(3 * L2_SIZE - 1) + add x8, x8, x7 + b common + +#if defined(LINUX_BOOT_ABI) +booti_fdt: + /* Booted by U-Boot booti with FDT data */ + /* Set 'map FDT data' flag */ + mov x19, #1 + +booti_no_fdt: + /* Booted by U-Boot booti without FTD data */ + /* Find the end - begin */ + ldr x7, .Lend + sub x8, x7, x6 + + /* + * Add one 2MiB page for copy of FDT data (maximum FDT size), + * one for metadata and round up + */ + ldr x7, =(3 * L2_SIZE - 1) + add x8, x8, x7 +#endif + +common: + /* Get the number of l2 pages to allocate, rounded down */ + lsr x10, x8, #(L2_SHIFT) + + /* Create the kernel space L2 table */ + mov x6, x26 + mov x7, #VM_MEMATTR_WRITE_BACK + mov x8, #(KERNBASE & L2_BLOCK_MASK) + mov x9, x28 + bl build_l2_block_pagetable + + /* Move to the l1 table */ + add x26, x26, #PAGE_SIZE + + /* Link the l1 -> l2 table */ + mov x9, x6 + mov x6, x26 + bl link_l1_pagetable + + /* Move to the l0 table */ + add x24, x26, #PAGE_SIZE + + /* Link the l0 -> l1 table */ + mov x9, x6 + mov x6, x24 + mov x10, #1 + bl link_l0_pagetable + + /* Link the DMAP tables */ + ldr x8, =DMAP_MIN_ADDRESS + adr x9, pagetable_dmap; + mov x10, #DMAP_TABLES + bl link_l0_pagetable + + /* + * Build the TTBR0 maps. As TTBR0 maps, they must specify ATTR_S1_nG. + * They are only needed early on, so the VA = PA map is uncached. + */ + add x27, x24, #PAGE_SIZE + + mov x6, x27 /* The initial page table */ +#if defined(SOCDEV_PA) && defined(SOCDEV_VA) + /* Create a table for the UART */ + mov x7, #(ATTR_S1_nG | ATTR_S1_IDX(VM_MEMATTR_DEVICE)) + mov x8, #(SOCDEV_VA) /* VA start */ + mov x9, #(SOCDEV_PA) /* PA start */ + mov x10, #1 + bl build_l1_block_pagetable +#endif + +#if defined(LINUX_BOOT_ABI) + /* Map FDT data ? */ + cbz x19, 1f + + /* Create the identity mapping for FDT data (2 MiB max) */ + mov x7, #(ATTR_S1_nG | ATTR_S1_IDX(VM_MEMATTR_UNCACHEABLE)) + mov x9, x0 + mov x8, x0 /* VA start (== PA start) */ + mov x10, #1 + bl build_l1_block_pagetable + +1: +#endif + + /* Create the VA = PA map */ + mov x7, #(ATTR_S1_nG | ATTR_S1_IDX(VM_MEMATTR_UNCACHEABLE)) + mov x9, x27 + mov x8, x9 /* VA start (== PA start) */ + mov x10, #1 + bl build_l1_block_pagetable + + /* Move to the l0 table */ + add x27, x27, #PAGE_SIZE + + /* Link the l0 -> l1 table */ + mov x9, x6 + mov x6, x27 + mov x10, #1 + bl link_l0_pagetable + + /* Restore the Link register */ + mov x30, x5 + ret + +/* + * Builds an L0 -> L1 table descriptor + * + * This is a link for a 512GiB block of memory with up to 1GiB regions mapped + * within it by build_l1_block_pagetable. + * + * x6 = L0 table + * x8 = Virtual Address + * x9 = L1 PA (trashed) + * x10 = Entry count + * x11, x12 and x13 are trashed + */ +link_l0_pagetable: + /* + * Link an L0 -> L1 table entry. + */ + /* Find the table index */ + lsr x11, x8, #L0_SHIFT + and x11, x11, #L0_ADDR_MASK + + /* Build the L0 block entry */ + mov x12, #L0_TABLE + + /* Only use the output address bits */ + lsr x9, x9, #PAGE_SHIFT +1: orr x13, x12, x9, lsl #PAGE_SHIFT + + /* Store the entry */ + str x13, [x6, x11, lsl #3] + + sub x10, x10, #1 + add x11, x11, #1 + add x9, x9, #1 + cbnz x10, 1b + + ret + +/* + * Builds an L1 -> L2 table descriptor + * + * This is a link for a 1GiB block of memory with up to 2MiB regions mapped + * within it by build_l2_block_pagetable. + * + * x6 = L1 table + * x8 = Virtual Address + * x9 = L2 PA (trashed) + * x11, x12 and x13 are trashed + */ +link_l1_pagetable: + /* + * Link an L1 -> L2 table entry. + */ + /* Find the table index */ + lsr x11, x8, #L1_SHIFT + and x11, x11, #Ln_ADDR_MASK + + /* Build the L1 block entry */ + mov x12, #L1_TABLE + + /* Only use the output address bits */ + lsr x9, x9, #PAGE_SHIFT + orr x13, x12, x9, lsl #PAGE_SHIFT + + /* Store the entry */ + str x13, [x6, x11, lsl #3] + + ret + +/* + * Builds count 1 GiB page table entry + * x6 = L1 table + * x7 = Variable lower block attributes + * x8 = VA start + * x9 = PA start (trashed) + * x10 = Entry count + * x11, x12 and x13 are trashed + */ +build_l1_block_pagetable: + /* + * Build the L1 table entry. + */ + /* Find the table index */ + lsr x11, x8, #L1_SHIFT + and x11, x11, #Ln_ADDR_MASK + + /* Build the L1 block entry */ + orr x12, x7, #L1_BLOCK + orr x12, x12, #(ATTR_AF) +#ifdef SMP + orr x12, x12, ATTR_SH(ATTR_SH_IS) +#endif + + /* Only use the output address bits */ + lsr x9, x9, #L1_SHIFT + + /* Set the physical address for this virtual address */ +1: orr x13, x12, x9, lsl #L1_SHIFT + + /* Store the entry */ + str x13, [x6, x11, lsl #3] + + sub x10, x10, #1 + add x11, x11, #1 + add x9, x9, #1 + cbnz x10, 1b + + ret + +/* + * Builds count 2 MiB page table entry + * x6 = L2 table + * x7 = Type (0 = Device, 1 = Normal) + * x8 = VA start + * x9 = PA start (trashed) + * x10 = Entry count + * x11, x12 and x13 are trashed + */ +build_l2_block_pagetable: + /* + * Build the L2 table entry. + */ + /* Find the table index */ + lsr x11, x8, #L2_SHIFT + and x11, x11, #Ln_ADDR_MASK + + /* Build the L2 block entry */ + lsl x12, x7, #2 + orr x12, x12, #L2_BLOCK + orr x12, x12, #(ATTR_AF) + orr x12, x12, #(ATTR_S1_UXN) +#ifdef SMP + orr x12, x12, ATTR_SH(ATTR_SH_IS) +#endif + + /* Only use the output address bits */ + lsr x9, x9, #L2_SHIFT + + /* Set the physical address for this virtual address */ +1: orr x13, x12, x9, lsl #L2_SHIFT + + /* Store the entry */ + str x13, [x6, x11, lsl #3] + + sub x10, x10, #1 + add x11, x11, #1 + add x9, x9, #1 + cbnz x10, 1b + + ret + +start_mmu: + dsb sy + + /* Load the exception vectors */ + ldr x2, =exception_vectors + msr vbar_el1, x2 + + /* Load ttbr0 and ttbr1 */ + msr ttbr0_el1, x27 + msr ttbr1_el1, x24 + isb + + /* Clear the Monitor Debug System control register */ + msr mdscr_el1, xzr + + /* Invalidate the TLB */ + tlbi vmalle1is + dsb ish + isb + + ldr x2, mair + msr mair_el1, x2 + + /* + * Setup TCR according to the PARange and ASIDBits fields + * from ID_AA64MMFR0_EL1 and the HAFDBS field from the + * ID_AA64MMFR1_EL1. More precisely, set TCR_EL1.AS + * to 1 only if the ASIDBits field equals 0b0010. + */ + ldr x2, tcr + mrs x3, id_aa64mmfr0_el1 + + /* Copy the bottom 3 bits from id_aa64mmfr0_el1 into TCR.IPS */ + bfi x2, x3, #(TCR_IPS_SHIFT), #(TCR_IPS_WIDTH) + and x3, x3, #(ID_AA64MMFR0_ASIDBits_MASK) + + /* Check if the HW supports 16 bit ASIDS */ + cmp x3, #(ID_AA64MMFR0_ASIDBits_16) + /* If so x3 == 1, else x3 == 0 */ + cset x3, eq + /* Set TCR.AS with x3 */ + bfi x2, x3, #(TCR_ASID_SHIFT), #(TCR_ASID_WIDTH) + + /* + * Check if the HW supports access flag and dirty state updates, + * and set TCR_EL1.HA and TCR_EL1.HD accordingly. + */ + mrs x3, id_aa64mmfr1_el1 + and x3, x3, #(ID_AA64MMFR1_HAFDBS_MASK) + cmp x3, #1 + b.ne 1f + orr x2, x2, #(TCR_HA) + b 2f +1: + cmp x3, #2 + b.ne 2f + orr x2, x2, #(TCR_HA | TCR_HD) +2: + msr tcr_el1, x2 + + /* + * Setup SCTLR. + */ + ldr x2, sctlr_set + ldr x3, sctlr_clear + mrs x1, sctlr_el1 + bic x1, x1, x3 /* Clear the required bits */ + orr x1, x1, x2 /* Set the required bits */ + msr sctlr_el1, x1 + isb + + ret + + .align 3 +mair: + .quad MAIR_ATTR(MAIR_DEVICE_nGnRnE, VM_MEMATTR_DEVICE) | \ + MAIR_ATTR(MAIR_NORMAL_NC, VM_MEMATTR_UNCACHEABLE) | \ + MAIR_ATTR(MAIR_NORMAL_WB, VM_MEMATTR_WRITE_BACK) | \ + MAIR_ATTR(MAIR_NORMAL_WT, VM_MEMATTR_WRITE_THROUGH) +tcr: + .quad (TCR_TxSZ(64 - VIRT_BITS) | TCR_TG1_4K | \ + TCR_CACHE_ATTRS | TCR_SMP_ATTRS) +sctlr_set: + /* Bits to set */ + .quad (SCTLR_LSMAOE | SCTLR_nTLSMD | SCTLR_UCI | SCTLR_SPAN | \ + SCTLR_nTWE | SCTLR_nTWI | SCTLR_UCT | SCTLR_DZE | \ + SCTLR_I | SCTLR_SED | SCTLR_SA0 | SCTLR_SA | SCTLR_C | \ + SCTLR_M | SCTLR_CP15BEN) +sctlr_clear: + /* Bits to clear */ + .quad (SCTLR_EE | SCTLR_EOE | SCTLR_IESB | SCTLR_WXN | SCTLR_UMA | \ + SCTLR_ITD | SCTLR_A) + + .globl abort +abort: + b abort + + //.section .init_pagetable + .align 12 /* 4KiB aligned */ + /* + * 6 initial tables (in the following order): + * L2 for kernel (High addresses) + * L1 for kernel + * L0 for kernel + * L1 bootstrap for user (Low addresses) + * L0 bootstrap for user + * L0 for user + */ +pagetable: + .space PAGE_SIZE +pagetable_l1_ttbr1: + .space PAGE_SIZE +pagetable_l0_ttbr1: + .space PAGE_SIZE +pagetable_l1_ttbr0_bootstrap: + .space PAGE_SIZE +pagetable_l0_ttbr0_boostrap: + .space PAGE_SIZE +pagetable_l0_ttbr0: + .space PAGE_SIZE + + .globl pagetable_dmap +pagetable_dmap: + .space PAGE_SIZE * DMAP_TABLES +pagetable_end: + +el2_pagetable: + .space PAGE_SIZE + + .globl init_pt_va +init_pt_va: + .quad pagetable /* XXX: Keep page tables VA */ + + .align 4 +initstack: + .space (PAGE_SIZE * KSTACK_PAGES) +initstack_end: + + +ENTRY(sigcode) + mov x0, sp + add x0, x0, #SF_UC + +1: + mov x8, #SYS_sigreturn + svc 0 + + /* sigreturn failed, exit */ + mov x8, #SYS_exit + svc 0 + + b 1b +END(sigcode) + /* This may be copied to the stack, keep it 16-byte aligned */ + .align 3 +esigcode: + + .data + .align 3 + .global szsigcode +szsigcode: + .quad esigcode - sigcode + +ENTRY(aarch32_sigcode) + .word 0xe1a0000d // mov r0, sp + .word 0xe2800040 // add r0, r0, #SIGF_UC + .word 0xe59f700c // ldr r7, [pc, #12] + .word 0xef000000 // swi #0 + .word 0xe59f7008 // ldr r7, [pc, #8] + .word 0xef000000 // swi #0 + .word 0xeafffffa // b . - 16 +END(aarch32_sigcode) + .word SYS_sigreturn + .word SYS_exit + .align 3 +aarch32_esigcode: + .data + .global sz_aarch32_sigcode +sz_aarch32_sigcode: + .quad aarch32_esigcode - aarch32_sigcode diff --git a/sys/arm64/arm64/machdep.c b/sys/arm64/arm64/machdep.c new file mode 100644 index 000000000000..cb8d33ff57d5 --- /dev/null +++ b/sys/arm64/arm64/machdep.c @@ -0,0 +1,1375 @@ +/*- + * Copyright (c) 2014 Andrew Turner + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include "opt_acpi.h" +#include "opt_platform.h" +#include "opt_ddb.h" + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/buf.h> +#include <sys/bus.h> +#include <sys/cons.h> +#include <sys/cpu.h> +#include <sys/csan.h> +#include <sys/devmap.h> +#include <sys/efi.h> +#include <sys/exec.h> +#include <sys/imgact.h> +#include <sys/kdb.h> +#include <sys/kernel.h> +#include <sys/ktr.h> +#include <sys/limits.h> +#include <sys/linker.h> +#include <sys/msgbuf.h> +#include <sys/pcpu.h> +#include <sys/physmem.h> +#include <sys/proc.h> +#include <sys/ptrace.h> +#include <sys/reboot.h> +#include <sys/rwlock.h> +#include <sys/sched.h> +#include <sys/signalvar.h> +#include <sys/syscallsubr.h> +#include <sys/sysent.h> +#include <sys/sysproto.h> +#include <sys/ucontext.h> +#include <sys/vdso.h> +#include <sys/vmmeter.h> + +#include <vm/vm.h> +#include <vm/vm_param.h> +#include <vm/vm_kern.h> +#include <vm/vm_object.h> +#include <vm/vm_page.h> +#include <vm/vm_phys.h> +#include <vm/pmap.h> +#include <vm/vm_map.h> +#include <vm/vm_pager.h> + +#include <machine/armreg.h> +#include <machine/cpu.h> +#include <machine/debug_monitor.h> +#include <machine/kdb.h> +#include <machine/machdep.h> +#include <machine/metadata.h> +#include <machine/md_var.h> +#include <machine/pcb.h> +#include <machine/reg.h> +#include <machine/undefined.h> +#include <machine/vmparam.h> + +#ifdef VFP +#include <machine/vfp.h> +#endif + +#ifdef DEV_ACPI +#include <contrib/dev/acpica/include/acpi.h> +#include <machine/acpica_machdep.h> +#endif + +#ifdef FDT +#include <dev/fdt/fdt_common.h> +#include <dev/ofw/openfirm.h> +#endif + +static void get_fpcontext(struct thread *td, mcontext_t *mcp); +static void set_fpcontext(struct thread *td, mcontext_t *mcp); + +enum arm64_bus arm64_bus_method = ARM64_BUS_NONE; + +struct pcpu __pcpu[MAXCPU]; + +static struct trapframe proc0_tf; + +int early_boot = 1; +int cold = 1; +static int boot_el; + +struct kva_md_info kmi; + +int64_t dczva_line_size; /* The size of cache line the dc zva zeroes */ +int has_pan; + +/* + * Physical address of the EFI System Table. Stashed from the metadata hints + * passed into the kernel and used by the EFI code to call runtime services. + */ +vm_paddr_t efi_systbl_phys; +static struct efi_map_header *efihdr; + +/* pagezero_* implementations are provided in support.S */ +void pagezero_simple(void *); +void pagezero_cache(void *); + +/* pagezero_simple is default pagezero */ +void (*pagezero)(void *p) = pagezero_simple; + +int (*apei_nmi)(void); + +static void +pan_setup(void) +{ + uint64_t id_aa64mfr1; + + id_aa64mfr1 = READ_SPECIALREG(id_aa64mmfr1_el1); + if (ID_AA64MMFR1_PAN_VAL(id_aa64mfr1) != ID_AA64MMFR1_PAN_NONE) + has_pan = 1; +} + +void +pan_enable(void) +{ + + /* + * The LLVM integrated assembler doesn't understand the PAN + * PSTATE field. Because of this we need to manually create + * the instruction in an asm block. This is equivalent to: + * msr pan, #1 + * + * This sets the PAN bit, stopping the kernel from accessing + * memory when userspace can also access it unless the kernel + * uses the userspace load/store instructions. + */ + if (has_pan) { + WRITE_SPECIALREG(sctlr_el1, + READ_SPECIALREG(sctlr_el1) & ~SCTLR_SPAN); + __asm __volatile(".inst 0xd500409f | (0x1 << 8)"); + } +} + +bool +has_hyp(void) +{ + + return (boot_el == 2); +} + +static void +cpu_startup(void *dummy) +{ + vm_paddr_t size; + int i; + + printf("real memory = %ju (%ju MB)\n", ptoa((uintmax_t)realmem), + ptoa((uintmax_t)realmem) / 1024 / 1024); + + if (bootverbose) { + printf("Physical memory chunk(s):\n"); + for (i = 0; phys_avail[i + 1] != 0; i += 2) { + size = phys_avail[i + 1] - phys_avail[i]; + printf("%#016jx - %#016jx, %ju bytes (%ju pages)\n", + (uintmax_t)phys_avail[i], + (uintmax_t)phys_avail[i + 1] - 1, + (uintmax_t)size, (uintmax_t)size / PAGE_SIZE); + } + } + + printf("avail memory = %ju (%ju MB)\n", + ptoa((uintmax_t)vm_free_count()), + ptoa((uintmax_t)vm_free_count()) / 1024 / 1024); + + undef_init(); + install_cpu_errata(); + + vm_ksubmap_init(&kmi); + bufinit(); + vm_pager_bufferinit(); +} + +SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); + +static void +late_ifunc_resolve(void *dummy __unused) +{ + link_elf_late_ireloc(); +} +SYSINIT(late_ifunc_resolve, SI_SUB_CPU, SI_ORDER_ANY, late_ifunc_resolve, NULL); + +int +cpu_idle_wakeup(int cpu) +{ + + return (0); +} + +int +fill_regs(struct thread *td, struct reg *regs) +{ + struct trapframe *frame; + + frame = td->td_frame; + regs->sp = frame->tf_sp; + regs->lr = frame->tf_lr; + regs->elr = frame->tf_elr; + regs->spsr = frame->tf_spsr; + + memcpy(regs->x, frame->tf_x, sizeof(regs->x)); + +#ifdef COMPAT_FREEBSD32 + /* + * We may be called here for a 32bits process, if we're using a + * 64bits debugger. If so, put PC and SPSR where it expects it. + */ + if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { + regs->x[15] = frame->tf_elr; + regs->x[16] = frame->tf_spsr; + } +#endif + return (0); +} + +int +set_regs(struct thread *td, struct reg *regs) +{ + struct trapframe *frame; + + frame = td->td_frame; + frame->tf_sp = regs->sp; + frame->tf_lr = regs->lr; + frame->tf_elr = regs->elr; + frame->tf_spsr &= ~PSR_FLAGS; + frame->tf_spsr |= regs->spsr & PSR_FLAGS; + + memcpy(frame->tf_x, regs->x, sizeof(frame->tf_x)); + +#ifdef COMPAT_FREEBSD32 + if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { + /* + * We may be called for a 32bits process if we're using + * a 64bits debugger. If so, get PC and SPSR from where + * it put it. + */ + frame->tf_elr = regs->x[15]; + frame->tf_spsr = regs->x[16] & PSR_FLAGS; + } +#endif + return (0); +} + +int +fill_fpregs(struct thread *td, struct fpreg *regs) +{ +#ifdef VFP + struct pcb *pcb; + + pcb = td->td_pcb; + if ((pcb->pcb_fpflags & PCB_FP_STARTED) != 0) { + /* + * If we have just been running VFP instructions we will + * need to save the state to memcpy it below. + */ + if (td == curthread) + vfp_save_state(td, pcb); + + KASSERT(pcb->pcb_fpusaved == &pcb->pcb_fpustate, + ("Called fill_fpregs while the kernel is using the VFP")); + memcpy(regs->fp_q, pcb->pcb_fpustate.vfp_regs, + sizeof(regs->fp_q)); + regs->fp_cr = pcb->pcb_fpustate.vfp_fpcr; + regs->fp_sr = pcb->pcb_fpustate.vfp_fpsr; + } else +#endif + memset(regs, 0, sizeof(*regs)); + return (0); +} + +int +set_fpregs(struct thread *td, struct fpreg *regs) +{ +#ifdef VFP + struct pcb *pcb; + + pcb = td->td_pcb; + KASSERT(pcb->pcb_fpusaved == &pcb->pcb_fpustate, + ("Called set_fpregs while the kernel is using the VFP")); + memcpy(pcb->pcb_fpustate.vfp_regs, regs->fp_q, sizeof(regs->fp_q)); + pcb->pcb_fpustate.vfp_fpcr = regs->fp_cr; + pcb->pcb_fpustate.vfp_fpsr = regs->fp_sr; +#endif + return (0); +} + +int +fill_dbregs(struct thread *td, struct dbreg *regs) +{ + struct debug_monitor_state *monitor; + int count, i; + uint8_t debug_ver, nbkpts; + + memset(regs, 0, sizeof(*regs)); + + extract_user_id_field(ID_AA64DFR0_EL1, ID_AA64DFR0_DebugVer_SHIFT, + &debug_ver); + extract_user_id_field(ID_AA64DFR0_EL1, ID_AA64DFR0_BRPs_SHIFT, + &nbkpts); + + /* + * The BRPs field contains the number of breakpoints - 1. Armv8-A + * allows the hardware to provide 2-16 breakpoints so this won't + * overflow an 8 bit value. + */ + count = nbkpts + 1; + + regs->db_info = debug_ver; + regs->db_info <<= 8; + regs->db_info |= count; + + monitor = &td->td_pcb->pcb_dbg_regs; + if ((monitor->dbg_flags & DBGMON_ENABLED) != 0) { + for (i = 0; i < count; i++) { + regs->db_regs[i].dbr_addr = monitor->dbg_bvr[i]; + regs->db_regs[i].dbr_ctrl = monitor->dbg_bcr[i]; + } + } + + return (0); +} + +int +set_dbregs(struct thread *td, struct dbreg *regs) +{ + struct debug_monitor_state *monitor; + int count; + int i; + + monitor = &td->td_pcb->pcb_dbg_regs; + count = 0; + monitor->dbg_enable_count = 0; + for (i = 0; i < DBG_BRP_MAX; i++) { + /* TODO: Check these values */ + monitor->dbg_bvr[i] = regs->db_regs[i].dbr_addr; + monitor->dbg_bcr[i] = regs->db_regs[i].dbr_ctrl; + if ((monitor->dbg_bcr[i] & 1) != 0) + monitor->dbg_enable_count++; + } + if (monitor->dbg_enable_count > 0) + monitor->dbg_flags |= DBGMON_ENABLED; + + return (0); +} + +#ifdef COMPAT_FREEBSD32 +int +fill_regs32(struct thread *td, struct reg32 *regs) +{ + int i; + struct trapframe *tf; + + tf = td->td_frame; + for (i = 0; i < 13; i++) + regs->r[i] = tf->tf_x[i]; + /* For arm32, SP is r13 and LR is r14 */ + regs->r_sp = tf->tf_x[13]; + regs->r_lr = tf->tf_x[14]; + regs->r_pc = tf->tf_elr; + regs->r_cpsr = tf->tf_spsr; + + return (0); +} + +int +set_regs32(struct thread *td, struct reg32 *regs) +{ + int i; + struct trapframe *tf; + + tf = td->td_frame; + for (i = 0; i < 13; i++) + tf->tf_x[i] = regs->r[i]; + /* For arm 32, SP is r13 an LR is r14 */ + tf->tf_x[13] = regs->r_sp; + tf->tf_x[14] = regs->r_lr; + tf->tf_elr = regs->r_pc; + tf->tf_spsr = regs->r_cpsr; + + return (0); +} + +int +fill_fpregs32(struct thread *td, struct fpreg32 *regs) +{ + + printf("ARM64TODO: fill_fpregs32"); + return (EDOOFUS); +} + +int +set_fpregs32(struct thread *td, struct fpreg32 *regs) +{ + + printf("ARM64TODO: set_fpregs32"); + return (EDOOFUS); +} + +int +fill_dbregs32(struct thread *td, struct dbreg32 *regs) +{ + + printf("ARM64TODO: fill_dbregs32"); + return (EDOOFUS); +} + +int +set_dbregs32(struct thread *td, struct dbreg32 *regs) +{ + + printf("ARM64TODO: set_dbregs32"); + return (EDOOFUS); +} +#endif + +int +ptrace_set_pc(struct thread *td, u_long addr) +{ + + td->td_frame->tf_elr = addr; + return (0); +} + +int +ptrace_single_step(struct thread *td) +{ + + td->td_frame->tf_spsr |= PSR_SS; + td->td_pcb->pcb_flags |= PCB_SINGLE_STEP; + return (0); +} + +int +ptrace_clear_single_step(struct thread *td) +{ + + td->td_frame->tf_spsr &= ~PSR_SS; + td->td_pcb->pcb_flags &= ~PCB_SINGLE_STEP; + return (0); +} + +void +exec_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack) +{ + struct trapframe *tf = td->td_frame; + + memset(tf, 0, sizeof(struct trapframe)); + + tf->tf_x[0] = stack; + tf->tf_sp = STACKALIGN(stack); + tf->tf_lr = imgp->entry_addr; + tf->tf_elr = imgp->entry_addr; +} + +/* Sanity check these are the same size, they will be memcpy'd to and fro */ +CTASSERT(sizeof(((struct trapframe *)0)->tf_x) == + sizeof((struct gpregs *)0)->gp_x); +CTASSERT(sizeof(((struct trapframe *)0)->tf_x) == + sizeof((struct reg *)0)->x); + +int +get_mcontext(struct thread *td, mcontext_t *mcp, int clear_ret) +{ + struct trapframe *tf = td->td_frame; + + if (clear_ret & GET_MC_CLEAR_RET) { + mcp->mc_gpregs.gp_x[0] = 0; + mcp->mc_gpregs.gp_spsr = tf->tf_spsr & ~PSR_C; + } else { + mcp->mc_gpregs.gp_x[0] = tf->tf_x[0]; + mcp->mc_gpregs.gp_spsr = tf->tf_spsr; + } + + memcpy(&mcp->mc_gpregs.gp_x[1], &tf->tf_x[1], + sizeof(mcp->mc_gpregs.gp_x[1]) * (nitems(mcp->mc_gpregs.gp_x) - 1)); + + mcp->mc_gpregs.gp_sp = tf->tf_sp; + mcp->mc_gpregs.gp_lr = tf->tf_lr; + mcp->mc_gpregs.gp_elr = tf->tf_elr; + get_fpcontext(td, mcp); + + return (0); +} + +int +set_mcontext(struct thread *td, mcontext_t *mcp) +{ + struct trapframe *tf = td->td_frame; + uint32_t spsr; + + spsr = mcp->mc_gpregs.gp_spsr; + if ((spsr & PSR_M_MASK) != PSR_M_EL0t || + (spsr & PSR_AARCH32) != 0 || + (spsr & PSR_DAIF) != (td->td_frame->tf_spsr & PSR_DAIF)) + return (EINVAL); + + memcpy(tf->tf_x, mcp->mc_gpregs.gp_x, sizeof(tf->tf_x)); + + tf->tf_sp = mcp->mc_gpregs.gp_sp; + tf->tf_lr = mcp->mc_gpregs.gp_lr; + tf->tf_elr = mcp->mc_gpregs.gp_elr; + tf->tf_spsr = mcp->mc_gpregs.gp_spsr; + set_fpcontext(td, mcp); + + return (0); +} + +static void +get_fpcontext(struct thread *td, mcontext_t *mcp) +{ +#ifdef VFP + struct pcb *curpcb; + + critical_enter(); + + curpcb = curthread->td_pcb; + + if ((curpcb->pcb_fpflags & PCB_FP_STARTED) != 0) { + /* + * If we have just been running VFP instructions we will + * need to save the state to memcpy it below. + */ + vfp_save_state(td, curpcb); + + KASSERT(curpcb->pcb_fpusaved == &curpcb->pcb_fpustate, + ("Called get_fpcontext while the kernel is using the VFP")); + KASSERT((curpcb->pcb_fpflags & ~PCB_FP_USERMASK) == 0, + ("Non-userspace FPU flags set in get_fpcontext")); + memcpy(mcp->mc_fpregs.fp_q, curpcb->pcb_fpustate.vfp_regs, + sizeof(mcp->mc_fpregs)); + mcp->mc_fpregs.fp_cr = curpcb->pcb_fpustate.vfp_fpcr; + mcp->mc_fpregs.fp_sr = curpcb->pcb_fpustate.vfp_fpsr; + mcp->mc_fpregs.fp_flags = curpcb->pcb_fpflags; + mcp->mc_flags |= _MC_FP_VALID; + } + + critical_exit(); +#endif +} + +static void +set_fpcontext(struct thread *td, mcontext_t *mcp) +{ +#ifdef VFP + struct pcb *curpcb; + + critical_enter(); + + if ((mcp->mc_flags & _MC_FP_VALID) != 0) { + curpcb = curthread->td_pcb; + + /* + * Discard any vfp state for the current thread, we + * are about to override it. + */ + vfp_discard(td); + + KASSERT(curpcb->pcb_fpusaved == &curpcb->pcb_fpustate, + ("Called set_fpcontext while the kernel is using the VFP")); + memcpy(curpcb->pcb_fpustate.vfp_regs, mcp->mc_fpregs.fp_q, + sizeof(mcp->mc_fpregs)); + curpcb->pcb_fpustate.vfp_fpcr = mcp->mc_fpregs.fp_cr; + curpcb->pcb_fpustate.vfp_fpsr = mcp->mc_fpregs.fp_sr; + curpcb->pcb_fpflags = mcp->mc_fpregs.fp_flags & PCB_FP_USERMASK; + } + + critical_exit(); +#endif +} + +void +cpu_idle(int busy) +{ + + spinlock_enter(); + if (!busy) + cpu_idleclock(); + if (!sched_runnable()) + __asm __volatile( + "dsb sy \n" + "wfi \n"); + if (!busy) + cpu_activeclock(); + spinlock_exit(); +} + +void +cpu_halt(void) +{ + + /* We should have shutdown by now, if not enter a low power sleep */ + intr_disable(); + while (1) { + __asm __volatile("wfi"); + } +} + +/* + * Flush the D-cache for non-DMA I/O so that the I-cache can + * be made coherent later. + */ +void +cpu_flush_dcache(void *ptr, size_t len) +{ + + /* ARM64TODO TBD */ +} + +/* Get current clock frequency for the given CPU ID. */ +int +cpu_est_clockrate(int cpu_id, uint64_t *rate) +{ + struct pcpu *pc; + + pc = pcpu_find(cpu_id); + if (pc == NULL || rate == NULL) + return (EINVAL); + + if (pc->pc_clock == 0) + return (EOPNOTSUPP); + + *rate = pc->pc_clock; + return (0); +} + +void +cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) +{ + + pcpu->pc_acpi_id = 0xffffffff; +} + +void +spinlock_enter(void) +{ + struct thread *td; + register_t daif; + + td = curthread; + if (td->td_md.md_spinlock_count == 0) { + daif = intr_disable(); + td->td_md.md_spinlock_count = 1; + td->td_md.md_saved_daif = daif; + critical_enter(); + } else + td->td_md.md_spinlock_count++; +} + +void +spinlock_exit(void) +{ + struct thread *td; + register_t daif; + + td = curthread; + daif = td->td_md.md_saved_daif; + td->td_md.md_spinlock_count--; + if (td->td_md.md_spinlock_count == 0) { + critical_exit(); + intr_restore(daif); + } +} + +#ifndef _SYS_SYSPROTO_H_ +struct sigreturn_args { + ucontext_t *ucp; +}; +#endif + +int +sys_sigreturn(struct thread *td, struct sigreturn_args *uap) +{ + ucontext_t uc; + int error; + + if (copyin(uap->sigcntxp, &uc, sizeof(uc))) + return (EFAULT); + + error = set_mcontext(td, &uc.uc_mcontext); + if (error != 0) + return (error); + + /* Restore signal mask. */ + kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); + + return (EJUSTRETURN); +} + +/* + * Construct a PCB from a trapframe. This is called from kdb_trap() where + * we want to start a backtrace from the function that caused us to enter + * the debugger. We have the context in the trapframe, but base the trace + * on the PCB. The PCB doesn't have to be perfect, as long as it contains + * enough for a backtrace. + */ +void +makectx(struct trapframe *tf, struct pcb *pcb) +{ + int i; + + for (i = 0; i < PCB_LR; i++) + pcb->pcb_x[i] = tf->tf_x[i]; + + pcb->pcb_x[PCB_LR] = tf->tf_lr; + pcb->pcb_pc = tf->tf_elr; + pcb->pcb_sp = tf->tf_sp; +} + +void +sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) +{ + struct thread *td; + struct proc *p; + struct trapframe *tf; + struct sigframe *fp, frame; + struct sigacts *psp; + struct sysentvec *sysent; + int onstack, sig; + + td = curthread; + p = td->td_proc; + PROC_LOCK_ASSERT(p, MA_OWNED); + + sig = ksi->ksi_signo; + psp = p->p_sigacts; + mtx_assert(&psp->ps_mtx, MA_OWNED); + + tf = td->td_frame; + onstack = sigonstack(tf->tf_sp); + + CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm, + catcher, sig); + + /* Allocate and validate space for the signal handler context. */ + if ((td->td_pflags & TDP_ALTSTACK) != 0 && !onstack && + SIGISMEMBER(psp->ps_sigonstack, sig)) { + fp = (struct sigframe *)((uintptr_t)td->td_sigstk.ss_sp + + td->td_sigstk.ss_size); +#if defined(COMPAT_43) + td->td_sigstk.ss_flags |= SS_ONSTACK; +#endif + } else { + fp = (struct sigframe *)td->td_frame->tf_sp; + } + + /* Make room, keeping the stack aligned */ + fp--; + fp = (struct sigframe *)STACKALIGN(fp); + + /* Fill in the frame to copy out */ + bzero(&frame, sizeof(frame)); + get_mcontext(td, &frame.sf_uc.uc_mcontext, 0); + frame.sf_si = ksi->ksi_info; + frame.sf_uc.uc_sigmask = *mask; + frame.sf_uc.uc_stack = td->td_sigstk; + frame.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) != 0 ? + (onstack ? SS_ONSTACK : 0) : SS_DISABLE; + mtx_unlock(&psp->ps_mtx); + PROC_UNLOCK(td->td_proc); + + /* Copy the sigframe out to the user's stack. */ + if (copyout(&frame, fp, sizeof(*fp)) != 0) { + /* Process has trashed its stack. Kill it. */ + CTR2(KTR_SIG, "sendsig: sigexit td=%p fp=%p", td, fp); + PROC_LOCK(p); + sigexit(td, SIGILL); + } + + tf->tf_x[0]= sig; + tf->tf_x[1] = (register_t)&fp->sf_si; + tf->tf_x[2] = (register_t)&fp->sf_uc; + + tf->tf_elr = (register_t)catcher; + tf->tf_sp = (register_t)fp; + sysent = p->p_sysent; + if (sysent->sv_sigcode_base != 0) + tf->tf_lr = (register_t)sysent->sv_sigcode_base; + else + tf->tf_lr = (register_t)(sysent->sv_psstrings - + *(sysent->sv_szsigcode)); + + CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->tf_elr, + tf->tf_sp); + + PROC_LOCK(p); + mtx_lock(&psp->ps_mtx); +} + +static void +init_proc0(vm_offset_t kstack) +{ + struct pcpu *pcpup = &__pcpu[0]; + + proc_linkup0(&proc0, &thread0); + thread0.td_kstack = kstack; + thread0.td_kstack_pages = KSTACK_PAGES; + thread0.td_pcb = (struct pcb *)(thread0.td_kstack + + thread0.td_kstack_pages * PAGE_SIZE) - 1; + thread0.td_pcb->pcb_fpflags = 0; + thread0.td_pcb->pcb_fpusaved = &thread0.td_pcb->pcb_fpustate; + thread0.td_pcb->pcb_vfpcpu = UINT_MAX; + thread0.td_frame = &proc0_tf; + pcpup->pc_curpcb = thread0.td_pcb; +} + +typedef struct { + uint32_t type; + uint64_t phys_start; + uint64_t virt_start; + uint64_t num_pages; + uint64_t attr; +} EFI_MEMORY_DESCRIPTOR; + +typedef void (*efi_map_entry_cb)(struct efi_md *); + +static void +foreach_efi_map_entry(struct efi_map_header *efihdr, efi_map_entry_cb cb) +{ + struct efi_md *map, *p; + size_t efisz; + int ndesc, i; + + /* + * Memory map data provided by UEFI via the GetMemoryMap + * Boot Services API. + */ + efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf; + map = (struct efi_md *)((uint8_t *)efihdr + efisz); + + if (efihdr->descriptor_size == 0) + return; + ndesc = efihdr->memory_size / efihdr->descriptor_size; + + for (i = 0, p = map; i < ndesc; i++, + p = efi_next_descriptor(p, efihdr->descriptor_size)) { + cb(p); + } +} + +static void +exclude_efi_map_entry(struct efi_md *p) +{ + + switch (p->md_type) { + case EFI_MD_TYPE_CODE: + case EFI_MD_TYPE_DATA: + case EFI_MD_TYPE_BS_CODE: + case EFI_MD_TYPE_BS_DATA: + case EFI_MD_TYPE_FREE: + /* + * We're allowed to use any entry with these types. + */ + break; + default: + physmem_exclude_region(p->md_phys, p->md_pages * PAGE_SIZE, + EXFLAG_NOALLOC); + } +} + +static void +exclude_efi_map_entries(struct efi_map_header *efihdr) +{ + + foreach_efi_map_entry(efihdr, exclude_efi_map_entry); +} + +static void +add_efi_map_entry(struct efi_md *p) +{ + + switch (p->md_type) { + case EFI_MD_TYPE_RT_DATA: + /* + * Runtime data will be excluded after the DMAP + * region is created to stop it from being added + * to phys_avail. + */ + case EFI_MD_TYPE_CODE: + case EFI_MD_TYPE_DATA: + case EFI_MD_TYPE_BS_CODE: + case EFI_MD_TYPE_BS_DATA: + case EFI_MD_TYPE_FREE: + /* + * We're allowed to use any entry with these types. + */ + physmem_hardware_region(p->md_phys, + p->md_pages * PAGE_SIZE); + break; + } +} + +static void +add_efi_map_entries(struct efi_map_header *efihdr) +{ + + foreach_efi_map_entry(efihdr, add_efi_map_entry); +} + +static void +print_efi_map_entry(struct efi_md *p) +{ + const char *type; + static const char *types[] = { + "Reserved", + "LoaderCode", + "LoaderData", + "BootServicesCode", + "BootServicesData", + "RuntimeServicesCode", + "RuntimeServicesData", + "ConventionalMemory", + "UnusableMemory", + "ACPIReclaimMemory", + "ACPIMemoryNVS", + "MemoryMappedIO", + "MemoryMappedIOPortSpace", + "PalCode", + "PersistentMemory" + }; + + if (p->md_type < nitems(types)) + type = types[p->md_type]; + else + type = "<INVALID>"; + printf("%23s %012lx %12p %08lx ", type, p->md_phys, + p->md_virt, p->md_pages); + if (p->md_attr & EFI_MD_ATTR_UC) + printf("UC "); + if (p->md_attr & EFI_MD_ATTR_WC) + printf("WC "); + if (p->md_attr & EFI_MD_ATTR_WT) + printf("WT "); + if (p->md_attr & EFI_MD_ATTR_WB) + printf("WB "); + if (p->md_attr & EFI_MD_ATTR_UCE) + printf("UCE "); + if (p->md_attr & EFI_MD_ATTR_WP) + printf("WP "); + if (p->md_attr & EFI_MD_ATTR_RP) + printf("RP "); + if (p->md_attr & EFI_MD_ATTR_XP) + printf("XP "); + if (p->md_attr & EFI_MD_ATTR_NV) + printf("NV "); + if (p->md_attr & EFI_MD_ATTR_MORE_RELIABLE) + printf("MORE_RELIABLE "); + if (p->md_attr & EFI_MD_ATTR_RO) + printf("RO "); + if (p->md_attr & EFI_MD_ATTR_RT) + printf("RUNTIME"); + printf("\n"); +} + +static void +print_efi_map_entries(struct efi_map_header *efihdr) +{ + + printf("%23s %12s %12s %8s %4s\n", + "Type", "Physical", "Virtual", "#Pages", "Attr"); + foreach_efi_map_entry(efihdr, print_efi_map_entry); +} + +#ifdef FDT +static void +try_load_dtb(caddr_t kmdp) +{ + vm_offset_t dtbp; + + dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t); +#if defined(FDT_DTB_STATIC) + /* + * In case the device tree blob was not retrieved (from metadata) try + * to use the statically embedded one. + */ + if (dtbp == 0) + dtbp = (vm_offset_t)&fdt_static_dtb; +#endif + + if (dtbp == (vm_offset_t)NULL) { + printf("ERROR loading DTB\n"); + return; + } + + if (OF_install(OFW_FDT, 0) == FALSE) + panic("Cannot install FDT"); + + if (OF_init((void *)dtbp) != 0) + panic("OF_init failed with the found device tree"); + + parse_fdt_bootargs(); +} +#endif + +static bool +bus_probe(void) +{ + bool has_acpi, has_fdt; + char *order, *env; + + has_acpi = has_fdt = false; + +#ifdef FDT + has_fdt = (OF_peer(0) != 0); +#endif +#ifdef DEV_ACPI + has_acpi = (acpi_find_table(ACPI_SIG_SPCR) != 0); +#endif + + env = kern_getenv("kern.cfg.order"); + if (env != NULL) { + order = env; + while (order != NULL) { + if (has_acpi && + strncmp(order, "acpi", 4) == 0 && + (order[4] == ',' || order[4] == '\0')) { + arm64_bus_method = ARM64_BUS_ACPI; + break; + } + if (has_fdt && + strncmp(order, "fdt", 3) == 0 && + (order[3] == ',' || order[3] == '\0')) { + arm64_bus_method = ARM64_BUS_FDT; + break; + } + order = strchr(order, ','); + } + freeenv(env); + + /* If we set the bus method it is valid */ + if (arm64_bus_method != ARM64_BUS_NONE) + return (true); + } + /* If no order or an invalid order was set use the default */ + if (arm64_bus_method == ARM64_BUS_NONE) { + if (has_fdt) + arm64_bus_method = ARM64_BUS_FDT; + else if (has_acpi) + arm64_bus_method = ARM64_BUS_ACPI; + } + + /* + * If no option was set the default is valid, otherwise we are + * setting one to get cninit() working, then calling panic to tell + * the user about the invalid bus setup. + */ + return (env == NULL); +} + +static void +cache_setup(void) +{ + int dczva_line_shift; + uint32_t dczid_el0; + + identify_cache(READ_SPECIALREG(ctr_el0)); + + dczid_el0 = READ_SPECIALREG(dczid_el0); + + /* Check if dc zva is not prohibited */ + if (dczid_el0 & DCZID_DZP) + dczva_line_size = 0; + else { + /* Same as with above calculations */ + dczva_line_shift = DCZID_BS_SIZE(dczid_el0); + dczva_line_size = sizeof(int) << dczva_line_shift; + + /* Change pagezero function */ + pagezero = pagezero_cache; + } +} + +int +memory_mapping_mode(vm_paddr_t pa) +{ + struct efi_md *map, *p; + size_t efisz; + int ndesc, i; + + if (efihdr == NULL) + return (VM_MEMATTR_WRITE_BACK); + + /* + * Memory map data provided by UEFI via the GetMemoryMap + * Boot Services API. + */ + efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf; + map = (struct efi_md *)((uint8_t *)efihdr + efisz); + + if (efihdr->descriptor_size == 0) + return (VM_MEMATTR_WRITE_BACK); + ndesc = efihdr->memory_size / efihdr->descriptor_size; + + for (i = 0, p = map; i < ndesc; i++, + p = efi_next_descriptor(p, efihdr->descriptor_size)) { + if (pa < p->md_phys || + pa >= p->md_phys + p->md_pages * EFI_PAGE_SIZE) + continue; + if (p->md_type == EFI_MD_TYPE_IOMEM || + p->md_type == EFI_MD_TYPE_IOPORT) + return (VM_MEMATTR_DEVICE); + else if ((p->md_attr & EFI_MD_ATTR_WB) != 0 || + p->md_type == EFI_MD_TYPE_RECLAIM) + return (VM_MEMATTR_WRITE_BACK); + else if ((p->md_attr & EFI_MD_ATTR_WT) != 0) + return (VM_MEMATTR_WRITE_THROUGH); + else if ((p->md_attr & EFI_MD_ATTR_WC) != 0) + return (VM_MEMATTR_WRITE_COMBINING); + break; + } + + return (VM_MEMATTR_DEVICE); +} + +void +initarm(struct arm64_bootparams *abp) +{ + struct efi_fb *efifb; + struct pcpu *pcpup; + char *env; +#ifdef FDT + struct mem_region mem_regions[FDT_MEM_REGIONS]; + int mem_regions_sz; +#endif + vm_offset_t lastaddr; + caddr_t kmdp; + bool valid; + + boot_el = abp->boot_el; + + /* Parse loader or FDT boot parametes. Determine last used address. */ + lastaddr = parse_boot_param(abp); + + /* Find the kernel address */ + kmdp = preload_search_by_type("elf kernel"); + if (kmdp == NULL) + kmdp = preload_search_by_type("elf64 kernel"); + + identify_cpu(0); + update_special_regs(0); + + link_elf_ireloc(kmdp); + try_load_dtb(kmdp); + + efi_systbl_phys = MD_FETCH(kmdp, MODINFOMD_FW_HANDLE, vm_paddr_t); + + /* Load the physical memory ranges */ + efihdr = (struct efi_map_header *)preload_search_info(kmdp, + MODINFO_METADATA | MODINFOMD_EFI_MAP); + if (efihdr != NULL) + add_efi_map_entries(efihdr); +#ifdef FDT + else { + /* Grab physical memory regions information from device tree. */ + if (fdt_get_mem_regions(mem_regions, &mem_regions_sz, + NULL) != 0) + panic("Cannot get physical memory regions"); + physmem_hardware_regions(mem_regions, mem_regions_sz); + } + if (fdt_get_reserved_mem(mem_regions, &mem_regions_sz) == 0) + physmem_exclude_regions(mem_regions, mem_regions_sz, + EXFLAG_NODUMP | EXFLAG_NOALLOC); +#endif + + /* Exclude the EFI framebuffer from our view of physical memory. */ + efifb = (struct efi_fb *)preload_search_info(kmdp, + MODINFO_METADATA | MODINFOMD_EFI_FB); + if (efifb != NULL) + physmem_exclude_region(efifb->fb_addr, efifb->fb_size, + EXFLAG_NOALLOC); + + /* Set the pcpu data, this is needed by pmap_bootstrap */ + pcpup = &__pcpu[0]; + pcpu_init(pcpup, 0, sizeof(struct pcpu)); + + /* + * Set the pcpu pointer with a backup in tpidr_el1 to be + * loaded when entering the kernel from userland. + */ + __asm __volatile( + "mov x18, %0 \n" + "msr tpidr_el1, %0" :: "r"(pcpup)); + + PCPU_SET(curthread, &thread0); + PCPU_SET(midr, get_midr()); + + /* Do basic tuning, hz etc */ + init_param1(); + + cache_setup(); + pan_setup(); + + /* Bootstrap enough of pmap to enter the kernel proper */ + pmap_bootstrap(abp->kern_l0pt, abp->kern_l1pt, + KERNBASE - abp->kern_delta, lastaddr - KERNBASE); + /* Exclude entries neexed in teh DMAP region, but not phys_avail */ + if (efihdr != NULL) + exclude_efi_map_entries(efihdr); + physmem_init_kernel_globals(); + + devmap_bootstrap(0, NULL); + + valid = bus_probe(); + + cninit(); + set_ttbr0(abp->kern_ttbr0); + cpu_tlb_flushID(); + + if (!valid) + panic("Invalid bus configuration: %s", + kern_getenv("kern.cfg.order")); + + init_proc0(abp->kern_stack); + msgbufinit(msgbufp, msgbufsize); + mutex_init(); + init_param2(physmem); + + dbg_init(); + kdb_init(); + pan_enable(); + + kcsan_cpu_init(0); + + env = kern_getenv("kernelname"); + if (env != NULL) + strlcpy(kernelname, env, sizeof(kernelname)); + + if (boothowto & RB_VERBOSE) { + print_efi_map_entries(efihdr); + physmem_print_tables(); + } + + early_boot = 0; +} + +void +dbg_init(void) +{ + + /* Clear OS lock */ + WRITE_SPECIALREG(oslar_el1, 0); + + /* This permits DDB to use debug registers for watchpoints. */ + dbg_monitor_init(); + + /* TODO: Eventually will need to initialize debug registers here. */ +} + +#ifdef DDB +#include <ddb/ddb.h> + +DB_SHOW_COMMAND(specialregs, db_show_spregs) +{ +#define PRINT_REG(reg) \ + db_printf(__STRING(reg) " = %#016lx\n", READ_SPECIALREG(reg)) + + PRINT_REG(actlr_el1); + PRINT_REG(afsr0_el1); + PRINT_REG(afsr1_el1); + PRINT_REG(aidr_el1); + PRINT_REG(amair_el1); + PRINT_REG(ccsidr_el1); + PRINT_REG(clidr_el1); + PRINT_REG(contextidr_el1); + PRINT_REG(cpacr_el1); + PRINT_REG(csselr_el1); + PRINT_REG(ctr_el0); + PRINT_REG(currentel); + PRINT_REG(daif); + PRINT_REG(dczid_el0); + PRINT_REG(elr_el1); + PRINT_REG(esr_el1); + PRINT_REG(far_el1); +#if 0 + /* ARM64TODO: Enable VFP before reading floating-point registers */ + PRINT_REG(fpcr); + PRINT_REG(fpsr); +#endif + PRINT_REG(id_aa64afr0_el1); + PRINT_REG(id_aa64afr1_el1); + PRINT_REG(id_aa64dfr0_el1); + PRINT_REG(id_aa64dfr1_el1); + PRINT_REG(id_aa64isar0_el1); + PRINT_REG(id_aa64isar1_el1); + PRINT_REG(id_aa64pfr0_el1); + PRINT_REG(id_aa64pfr1_el1); + PRINT_REG(id_afr0_el1); + PRINT_REG(id_dfr0_el1); + PRINT_REG(id_isar0_el1); + PRINT_REG(id_isar1_el1); + PRINT_REG(id_isar2_el1); + PRINT_REG(id_isar3_el1); + PRINT_REG(id_isar4_el1); + PRINT_REG(id_isar5_el1); + PRINT_REG(id_mmfr0_el1); + PRINT_REG(id_mmfr1_el1); + PRINT_REG(id_mmfr2_el1); + PRINT_REG(id_mmfr3_el1); +#if 0 + /* Missing from llvm */ + PRINT_REG(id_mmfr4_el1); +#endif + PRINT_REG(id_pfr0_el1); + PRINT_REG(id_pfr1_el1); + PRINT_REG(isr_el1); + PRINT_REG(mair_el1); + PRINT_REG(midr_el1); + PRINT_REG(mpidr_el1); + PRINT_REG(mvfr0_el1); + PRINT_REG(mvfr1_el1); + PRINT_REG(mvfr2_el1); + PRINT_REG(revidr_el1); + PRINT_REG(sctlr_el1); + PRINT_REG(sp_el0); + PRINT_REG(spsel); + PRINT_REG(spsr_el1); + PRINT_REG(tcr_el1); + PRINT_REG(tpidr_el0); + PRINT_REG(tpidr_el1); + PRINT_REG(tpidrro_el0); + PRINT_REG(ttbr0_el1); + PRINT_REG(ttbr1_el1); + PRINT_REG(vbar_el1); +#undef PRINT_REG +} + +DB_SHOW_COMMAND(vtop, db_show_vtop) +{ + uint64_t phys; + + if (have_addr) { + phys = arm64_address_translate_s1e1r(addr); + db_printf("EL1 physical address reg (read): 0x%016lx\n", phys); + phys = arm64_address_translate_s1e1w(addr); + db_printf("EL1 physical address reg (write): 0x%016lx\n", phys); + phys = arm64_address_translate_s1e0r(addr); + db_printf("EL0 physical address reg (read): 0x%016lx\n", phys); + phys = arm64_address_translate_s1e0w(addr); + db_printf("EL0 physical address reg (write): 0x%016lx\n", phys); + } else + db_printf("show vtop <virt_addr>\n"); +} +#endif diff --git a/sys/arm64/arm64/machdep_boot.c b/sys/arm64/arm64/machdep_boot.c new file mode 100644 index 000000000000..9ab4edf616e2 --- /dev/null +++ b/sys/arm64/arm64/machdep_boot.c @@ -0,0 +1,232 @@ +/*- + * Copyright (c) 2004 Olivier Houchard + * Copyright (c) 1994-1998 Mark Brinicombe. + * Copyright (c) 1994 Brini. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "opt_platform.h" + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/ctype.h> +#include <sys/linker.h> +#include <sys/reboot.h> +#include <sys/sysctl.h> +#ifdef FDT +#include <sys/boot.h> +#endif + +#include <machine/cpu.h> +#include <machine/machdep.h> +#include <machine/metadata.h> +#include <machine/vmparam.h> + +#ifdef FDT +#include <contrib/libfdt/libfdt.h> +#include <dev/fdt/fdt_common.h> +#endif + +extern int *end; +static char *loader_envp; +static char static_kenv[4096]; + +#ifdef FDT +#define CMDLINE_GUARD "FreeBSD:" +#define LBABI_MAX_COMMAND_LINE 512 +static char linux_command_line[LBABI_MAX_COMMAND_LINE + 1]; +#endif + +/* + * Fake up a boot descriptor table + */ + #define PRELOAD_PUSH_VALUE(type, value) do { \ + *(type *)(preload_ptr + size) = (value); \ + size += sizeof(type); \ +} while (0) + + #define PRELOAD_PUSH_STRING(str) do { \ + uint32_t ssize; \ + ssize = strlen(str) + 1; \ + PRELOAD_PUSH_VALUE(uint32_t, ssize); \ + strcpy((char*)(preload_ptr + size), str); \ + size += ssize; \ + size = roundup(size, sizeof(u_long)); \ +} while (0) + +/* Build minimal set of metatda. */ +static vm_offset_t +fake_preload_metadata(void *dtb_ptr, size_t dtb_size) +{ + vm_offset_t lastaddr; + static char fake_preload[256]; + caddr_t preload_ptr; + size_t size; + + lastaddr = (vm_offset_t)&end; + preload_ptr = (caddr_t)&fake_preload[0]; + size = 0; + + PRELOAD_PUSH_VALUE(uint32_t, MODINFO_NAME); + PRELOAD_PUSH_STRING("kernel"); + + PRELOAD_PUSH_VALUE(uint32_t, MODINFO_TYPE); + PRELOAD_PUSH_STRING("elf kernel"); + + PRELOAD_PUSH_VALUE(uint32_t, MODINFO_ADDR); + PRELOAD_PUSH_VALUE(uint32_t, sizeof(vm_offset_t)); + PRELOAD_PUSH_VALUE(uint64_t, VM_MIN_KERNEL_ADDRESS); + + PRELOAD_PUSH_VALUE(uint32_t, MODINFO_SIZE); + PRELOAD_PUSH_VALUE(uint32_t, sizeof(size_t)); + PRELOAD_PUSH_VALUE(uint64_t, (size_t)(&end - VM_MIN_KERNEL_ADDRESS)); + + if (dtb_ptr != NULL) { + /* Copy DTB to KVA space and insert it into module chain. */ + lastaddr = roundup(lastaddr, sizeof(int)); + PRELOAD_PUSH_VALUE(uint32_t, MODINFO_METADATA | MODINFOMD_DTBP); + PRELOAD_PUSH_VALUE(uint32_t, sizeof(uint64_t)); + PRELOAD_PUSH_VALUE(uint64_t, (uint64_t)lastaddr); + memmove((void *)lastaddr, dtb_ptr, dtb_size); + lastaddr += dtb_size; + lastaddr = roundup(lastaddr, sizeof(int)); + } + /* End marker */ + PRELOAD_PUSH_VALUE(uint32_t, 0); + PRELOAD_PUSH_VALUE(uint32_t, 0); + + preload_metadata = (caddr_t)(uintptr_t)fake_preload; + + init_static_kenv(NULL, 0); + + return (lastaddr); +} + +#ifdef FDT + +/* Convert the U-Boot command line into FreeBSD kenv and boot options. */ +static void +cmdline_set_env(char *cmdline, const char *guard) +{ + size_t guard_len; + + /* Skip leading spaces. */ + while (isspace(*cmdline)) + cmdline++; + + /* Test and remove guard. */ + if (guard != NULL && guard[0] != '\0') { + guard_len = strlen(guard); + if (strncasecmp(cmdline, guard, guard_len) != 0) + return; + cmdline += guard_len; + } + + boothowto |= boot_parse_cmdline(cmdline); +} + +void +parse_fdt_bootargs(void) +{ + + if (loader_envp == NULL && fdt_get_chosen_bootargs(linux_command_line, + LBABI_MAX_COMMAND_LINE) == 0) { + init_static_kenv(static_kenv, sizeof(static_kenv)); + cmdline_set_env(linux_command_line, CMDLINE_GUARD); + } +} + +#endif + +#if defined(LINUX_BOOT_ABI) && defined(FDT) +static vm_offset_t +linux_parse_boot_param(struct arm64_bootparams *abp) +{ + struct fdt_header *dtb_ptr; + size_t dtb_size; + + if (abp->modulep == 0) + return (0); + /* Test if modulep point to valid DTB. */ + dtb_ptr = (struct fdt_header *)abp->modulep; + if (fdt_check_header(dtb_ptr) != 0) + return (0); + dtb_size = fdt_totalsize(dtb_ptr); + return (fake_preload_metadata(dtb_ptr, dtb_size)); +} + +#endif + +static vm_offset_t +freebsd_parse_boot_param(struct arm64_bootparams *abp) +{ + vm_offset_t lastaddr = 0; + void *kmdp; +#ifdef DDB + vm_offset_t ksym_start; + vm_offset_t ksym_end; +#endif + + if (abp->modulep == 0) + return (0); + + preload_metadata = (caddr_t)(uintptr_t)(abp->modulep); + kmdp = preload_search_by_type("elf kernel"); + if (kmdp == NULL) + return (0); + + boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); + loader_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *); + init_static_kenv(loader_envp, 0); + lastaddr = MD_FETCH(kmdp, MODINFOMD_KERNEND, vm_offset_t); +#ifdef DDB + ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t); + ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t); + db_fetch_ksymtab(ksym_start, ksym_end, 0); +#endif + return (lastaddr); +} + +vm_offset_t +parse_boot_param(struct arm64_bootparams *abp) +{ + vm_offset_t lastaddr; + +#if defined(LINUX_BOOT_ABI) && defined(FDT) + lastaddr = linux_parse_boot_param(abp); + if (lastaddr != 0) + return (lastaddr); +#endif + lastaddr = freebsd_parse_boot_param(abp); + if (lastaddr != 0) + return (lastaddr); + + /* Fall back to hardcoded metadata. */ + lastaddr = fake_preload_metadata(NULL, 0); + + return (lastaddr); +} diff --git a/sys/arm64/arm64/mem.c b/sys/arm64/arm64/mem.c new file mode 100644 index 000000000000..d51744c6fbe3 --- /dev/null +++ b/sys/arm64/arm64/mem.c @@ -0,0 +1,138 @@ +/*- + * Copyright (c) 2014 Andrew Turner + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/conf.h> +#include <sys/malloc.h> +#include <sys/memrange.h> +#include <sys/uio.h> + +#include <machine/memdev.h> +#include <machine/vmparam.h> + +#include <vm/vm.h> +#include <vm/pmap.h> +#include <vm/vm_extern.h> +#include <vm/vm_page.h> + +struct mem_range_softc mem_range_softc; + +int +memrw(struct cdev *dev, struct uio *uio, int flags) +{ + struct iovec *iov; + struct vm_page m; + vm_page_t marr; + vm_offset_t off, v; + u_int cnt; + int error; + + error = 0; + + while (uio->uio_resid > 0 && error == 0) { + iov = uio->uio_iov; + if (iov->iov_len == 0) { + uio->uio_iov++; + uio->uio_iovcnt--; + if (uio->uio_iovcnt < 0) + panic("memrw"); + continue; + } + + v = uio->uio_offset; + off = v & PAGE_MASK; + cnt = ulmin(iov->iov_len, PAGE_SIZE - (u_int)off); + if (cnt == 0) + continue; + + switch(dev2unit(dev)) { + case CDEV_MINOR_KMEM: + /* If the address is in the DMAP just copy it */ + if (VIRT_IN_DMAP(v)) { + error = uiomove((void *)v, cnt, uio); + break; + } + + if (!kernacc((void *)v, cnt, uio->uio_rw == UIO_READ ? + VM_PROT_READ : VM_PROT_WRITE)) { + error = EFAULT; + break; + } + + /* Get the physical address to read */ + v = pmap_extract(kernel_pmap, v); + if (v == 0) { + error = EFAULT; + break; + } + + /* FALLTHROUGH */ + case CDEV_MINOR_MEM: + /* If within the DMAP use this to copy from */ + if (PHYS_IN_DMAP(v)) { + v = PHYS_TO_DMAP(v); + error = uiomove((void *)v, cnt, uio); + break; + } + + /* Have uiomove_fromphys handle the data */ + m.phys_addr = trunc_page(v); + marr = &m; + uiomove_fromphys(&marr, off, cnt, uio); + break; + } + } + + return (error); +} + +/* + * allow user processes to MMAP some memory sections + * instead of going through read/write + */ +/* ARGSUSED */ +int +memmmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr, + int prot __unused, vm_memattr_t *memattr __unused) +{ + if (dev2unit(dev) == CDEV_MINOR_MEM) { + *paddr = offset; + return (0); + } + return (-1); +} + +int +memioctl_md(struct cdev *dev __unused, u_long cmd __unused, + caddr_t data __unused, int flags __unused, struct thread *td __unused) +{ + return (ENOTTY); +} diff --git a/sys/arm64/arm64/memcpy.S b/sys/arm64/arm64/memcpy.S new file mode 100644 index 000000000000..f98c2513fa58 --- /dev/null +++ b/sys/arm64/arm64/memcpy.S @@ -0,0 +1,219 @@ +/* Copyright (c) 2012, Linaro Limited + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Linaro nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ + +/* + * Copyright (c) 2015 ARM Ltd + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the company may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <machine/asm.h> +__FBSDID("$FreeBSD$"); + +/* Assumptions: + * + * ARMv8-a, AArch64, unaligned accesses. + * + */ + +#define dstin x0 +#define src x1 +#define count x2 +#define dst x3 +#define srcend x4 +#define dstend x5 +#define A_l x6 +#define A_lw w6 +#define A_h x7 +#define A_hw w7 +#define B_l x8 +#define B_lw w8 +#define B_h x9 +#define C_l x10 +#define C_h x11 +#define D_l x12 +#define D_h x13 +#define E_l src +#define E_h count +#define F_l srcend +#define F_h dst +#define tmp1 x9 + +#define L(l) .L ## l + +/* Copies are split into 3 main cases: small copies of up to 16 bytes, + medium copies of 17..96 bytes which are fully unrolled. Large copies + of more than 96 bytes align the destination and use an unrolled loop + processing 64 bytes per iteration. + Small and medium copies read all data before writing, allowing any + kind of overlap, and memmove tailcalls memcpy for these cases as + well as non-overlapping copies. +*/ + +ENTRY(memcpy) + prfm PLDL1KEEP, [src] + add srcend, src, count + add dstend, dstin, count + cmp count, 16 + b.ls L(copy16) + cmp count, 96 + b.hi L(copy_long) + + /* Medium copies: 17..96 bytes. */ + sub tmp1, count, 1 + ldp A_l, A_h, [src] + tbnz tmp1, 6, L(copy96) + ldp D_l, D_h, [srcend, -16] + tbz tmp1, 5, 1f + ldp B_l, B_h, [src, 16] + ldp C_l, C_h, [srcend, -32] + stp B_l, B_h, [dstin, 16] + stp C_l, C_h, [dstend, -32] +1: + stp A_l, A_h, [dstin] + stp D_l, D_h, [dstend, -16] + ret + + .p2align 4 + /* Small copies: 0..16 bytes. */ +L(copy16): + cmp count, 8 + b.lo 1f + ldr A_l, [src] + ldr A_h, [srcend, -8] + str A_l, [dstin] + str A_h, [dstend, -8] + ret + .p2align 4 +1: + tbz count, 2, 1f + ldr A_lw, [src] + ldr A_hw, [srcend, -4] + str A_lw, [dstin] + str A_hw, [dstend, -4] + ret + + /* Copy 0..3 bytes. Use a branchless sequence that copies the same + byte 3 times if count==1, or the 2nd byte twice if count==2. */ +1: + cbz count, 2f + lsr tmp1, count, 1 + ldrb A_lw, [src] + ldrb A_hw, [srcend, -1] + ldrb B_lw, [src, tmp1] + strb A_lw, [dstin] + strb B_lw, [dstin, tmp1] + strb A_hw, [dstend, -1] +2: ret + + .p2align 4 + /* Copy 64..96 bytes. Copy 64 bytes from the start and + 32 bytes from the end. */ +L(copy96): + ldp B_l, B_h, [src, 16] + ldp C_l, C_h, [src, 32] + ldp D_l, D_h, [src, 48] + ldp E_l, E_h, [srcend, -32] + ldp F_l, F_h, [srcend, -16] + stp A_l, A_h, [dstin] + stp B_l, B_h, [dstin, 16] + stp C_l, C_h, [dstin, 32] + stp D_l, D_h, [dstin, 48] + stp E_l, E_h, [dstend, -32] + stp F_l, F_h, [dstend, -16] + ret + + /* Align DST to 16 byte alignment so that we don't cross cache line + boundaries on both loads and stores. There are at least 96 bytes + to copy, so copy 16 bytes unaligned and then align. The loop + copies 64 bytes per iteration and prefetches one iteration ahead. */ + + .p2align 4 +L(copy_long): + and tmp1, dstin, 15 + bic dst, dstin, 15 + ldp D_l, D_h, [src] + sub src, src, tmp1 + add count, count, tmp1 /* Count is now 16 too large. */ + ldp A_l, A_h, [src, 16] + stp D_l, D_h, [dstin] + ldp B_l, B_h, [src, 32] + ldp C_l, C_h, [src, 48] + ldp D_l, D_h, [src, 64]! + subs count, count, 128 + 16 /* Test and readjust count. */ + b.ls 2f +1: + stp A_l, A_h, [dst, 16] + ldp A_l, A_h, [src, 16] + stp B_l, B_h, [dst, 32] + ldp B_l, B_h, [src, 32] + stp C_l, C_h, [dst, 48] + ldp C_l, C_h, [src, 48] + stp D_l, D_h, [dst, 64]! + ldp D_l, D_h, [src, 64]! + subs count, count, 64 + b.hi 1b + + /* Write the last full set of 64 bytes. The remainder is at most 64 + bytes, so it is safe to always copy 64 bytes from the end even if + there is just 1 byte left. */ +2: + ldp E_l, E_h, [srcend, -64] + stp A_l, A_h, [dst, 16] + ldp A_l, A_h, [srcend, -48] + stp B_l, B_h, [dst, 32] + ldp B_l, B_h, [srcend, -32] + stp C_l, C_h, [dst, 48] + ldp C_l, C_h, [srcend, -16] + stp D_l, D_h, [dst, 64] + stp E_l, E_h, [dstend, -64] + stp A_l, A_h, [dstend, -48] + stp B_l, B_h, [dstend, -32] + stp C_l, C_h, [dstend, -16] + ret +END(memcpy) diff --git a/sys/arm64/arm64/memmove.S b/sys/arm64/arm64/memmove.S new file mode 100644 index 000000000000..4b99dccc536e --- /dev/null +++ b/sys/arm64/arm64/memmove.S @@ -0,0 +1,150 @@ +/* Copyright (c) 2013, Linaro Limited + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Linaro nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ + +/* + * Copyright (c) 2015 ARM Ltd + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the company may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <machine/asm.h> +__FBSDID("$FreeBSD$"); + +/* Assumptions: + * + * ARMv8-a, AArch64, unaligned accesses + */ + +/* Parameters and result. */ +#define dstin x0 +#define src x1 +#define count x2 +#define srcend x3 +#define dstend x4 +#define tmp1 x5 +#define A_l x6 +#define A_h x7 +#define B_l x8 +#define B_h x9 +#define C_l x10 +#define C_h x11 +#define D_l x12 +#define D_h x13 +#define E_l count +#define E_h tmp1 + +/* All memmoves up to 96 bytes are done by memcpy as it supports overlaps. + Larger backwards copies are also handled by memcpy. The only remaining + case is forward large copies. The destination is aligned, and an + unrolled loop processes 64 bytes per iteration. +*/ + +ENTRY(bcopy) + /* Switch the input pointers when called as bcopy */ + mov x3, x1 + mov x1, x0 + mov x0, x3 +EENTRY(memmove) + sub tmp1, dstin, src + cmp count, 96 + ccmp tmp1, count, 2, hi + b.hs memcpy + + cbz tmp1, 3f + add dstend, dstin, count + add srcend, src, count + + /* Align dstend to 16 byte alignment so that we don't cross cache line + boundaries on both loads and stores. There are at least 96 bytes + to copy, so copy 16 bytes unaligned and then align. The loop + copies 64 bytes per iteration and prefetches one iteration ahead. */ + + and tmp1, dstend, 15 + ldp D_l, D_h, [srcend, -16] + sub srcend, srcend, tmp1 + sub count, count, tmp1 + ldp A_l, A_h, [srcend, -16] + stp D_l, D_h, [dstend, -16] + ldp B_l, B_h, [srcend, -32] + ldp C_l, C_h, [srcend, -48] + ldp D_l, D_h, [srcend, -64]! + sub dstend, dstend, tmp1 + subs count, count, 128 + b.ls 2f + nop +1: + stp A_l, A_h, [dstend, -16] + ldp A_l, A_h, [srcend, -16] + stp B_l, B_h, [dstend, -32] + ldp B_l, B_h, [srcend, -32] + stp C_l, C_h, [dstend, -48] + ldp C_l, C_h, [srcend, -48] + stp D_l, D_h, [dstend, -64]! + ldp D_l, D_h, [srcend, -64]! + subs count, count, 64 + b.hi 1b + + /* Write the last full set of 64 bytes. The remainder is at most 64 + bytes, so it is safe to always copy 64 bytes from the start even if + there is just 1 byte left. */ +2: + ldp E_l, E_h, [src, 48] + stp A_l, A_h, [dstend, -16] + ldp A_l, A_h, [src, 32] + stp B_l, B_h, [dstend, -32] + ldp B_l, B_h, [src, 16] + stp C_l, C_h, [dstend, -48] + ldp C_l, C_h, [src] + stp D_l, D_h, [dstend, -64] + stp E_l, E_h, [dstin, 48] + stp A_l, A_h, [dstin, 32] + stp B_l, B_h, [dstin, 16] + stp C_l, C_h, [dstin] +3: ret +EEND(memmove) +END(bcopy) diff --git a/sys/arm64/arm64/minidump_machdep.c b/sys/arm64/arm64/minidump_machdep.c new file mode 100644 index 000000000000..ba22f7dfc16f --- /dev/null +++ b/sys/arm64/arm64/minidump_machdep.c @@ -0,0 +1,448 @@ +/*- + * Copyright (c) 2006 Peter Wemm + * Copyright (c) 2015 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Andrew Turner under + * sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include "opt_watchdog.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/conf.h> +#include <sys/cons.h> +#include <sys/kernel.h> +#include <sys/kerneldump.h> +#include <sys/msgbuf.h> +#include <sys/watchdog.h> +#include <sys/vmmeter.h> + +#include <vm/vm.h> +#include <vm/vm_param.h> +#include <vm/vm_page.h> +#include <vm/vm_phys.h> +#include <vm/pmap.h> + +#include <machine/md_var.h> +#include <machine/pte.h> +#include <machine/minidump.h> + +CTASSERT(sizeof(struct kerneldumpheader) == 512); + +uint64_t *vm_page_dump; +int vm_page_dump_size; + +static struct kerneldumpheader kdh; + +/* Handle chunked writes. */ +static size_t fragsz; +static void *dump_va; +static size_t counter, progress, dumpsize; + +static uint64_t tmpbuffer[Ln_ENTRIES]; + +CTASSERT(sizeof(*vm_page_dump) == 8); + +static int +is_dumpable(vm_paddr_t pa) +{ + vm_page_t m; + int i; + + if ((m = vm_phys_paddr_to_vm_page(pa)) != NULL) + return ((m->flags & PG_NODUMP) == 0); + for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) { + if (pa >= dump_avail[i] && pa < dump_avail[i + 1]) + return (1); + } + return (0); +} + +static int +blk_flush(struct dumperinfo *di) +{ + int error; + + if (fragsz == 0) + return (0); + + error = dump_append(di, dump_va, 0, fragsz); + fragsz = 0; + return (error); +} + +static struct { + int min_per; + int max_per; + int visited; +} progress_track[10] = { + { 0, 10, 0}, + { 10, 20, 0}, + { 20, 30, 0}, + { 30, 40, 0}, + { 40, 50, 0}, + { 50, 60, 0}, + { 60, 70, 0}, + { 70, 80, 0}, + { 80, 90, 0}, + { 90, 100, 0} +}; + +static void +report_progress(size_t progress, size_t dumpsize) +{ + int sofar, i; + + sofar = 100 - ((progress * 100) / dumpsize); + for (i = 0; i < nitems(progress_track); i++) { + if (sofar < progress_track[i].min_per || + sofar > progress_track[i].max_per) + continue; + if (progress_track[i].visited) + return; + progress_track[i].visited = 1; + printf("..%d%%", sofar); + return; + } +} + +static int +blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz) +{ + size_t len; + int error, c; + u_int maxdumpsz; + + maxdumpsz = min(di->maxiosize, MAXDUMPPGS * PAGE_SIZE); + if (maxdumpsz == 0) /* seatbelt */ + maxdumpsz = PAGE_SIZE; + error = 0; + if ((sz % PAGE_SIZE) != 0) { + printf("size not page aligned\n"); + return (EINVAL); + } + if (ptr != NULL && pa != 0) { + printf("cant have both va and pa!\n"); + return (EINVAL); + } + if ((((uintptr_t)pa) % PAGE_SIZE) != 0) { + printf("address not page aligned %p\n", ptr); + return (EINVAL); + } + if (ptr != NULL) { + /* + * If we're doing a virtual dump, flush any + * pre-existing pa pages. + */ + error = blk_flush(di); + if (error) + return (error); + } + while (sz) { + len = maxdumpsz - fragsz; + if (len > sz) + len = sz; + counter += len; + progress -= len; + if (counter >> 22) { + report_progress(progress, dumpsize); + counter &= (1 << 22) - 1; + } + + wdog_kern_pat(WD_LASTVAL); + + if (ptr) { + error = dump_append(di, ptr, 0, len); + if (error) + return (error); + ptr += len; + sz -= len; + } else { + dump_va = (void *)PHYS_TO_DMAP(pa); + fragsz += len; + pa += len; + sz -= len; + error = blk_flush(di); + if (error) + return (error); + } + + /* Check for user abort. */ + c = cncheckc(); + if (c == 0x03) + return (ECANCELED); + if (c != -1) + printf(" (CTRL-C to abort) "); + } + + return (0); +} + +int +minidumpsys(struct dumperinfo *di) +{ + struct minidumphdr mdhdr; + pd_entry_t *l0, *l1, *l2; + pt_entry_t *l3; + vm_offset_t va; + vm_paddr_t pa; + uint64_t bits; + uint32_t pmapsize; + int bit, error, i, j, retry_count; + + retry_count = 0; + retry: + retry_count++; + error = 0; + pmapsize = 0; + for (va = VM_MIN_KERNEL_ADDRESS; va < kernel_vm_end; va += L2_SIZE) { + pmapsize += PAGE_SIZE; + if (!pmap_get_tables(pmap_kernel(), va, &l0, &l1, &l2, &l3)) + continue; + + if ((*l1 & ATTR_DESCR_MASK) == L1_BLOCK) { + pa = *l1 & ~ATTR_MASK; + for (i = 0; i < Ln_ENTRIES * Ln_ENTRIES; + i++, pa += PAGE_SIZE) + if (is_dumpable(pa)) + dump_add_page(pa); + pmapsize += (Ln_ENTRIES - 1) * PAGE_SIZE; + va += L1_SIZE - L2_SIZE; + } else if ((*l2 & ATTR_DESCR_MASK) == L2_BLOCK) { + pa = *l2 & ~ATTR_MASK; + for (i = 0; i < Ln_ENTRIES; i++, pa += PAGE_SIZE) { + if (is_dumpable(pa)) + dump_add_page(pa); + } + } else if ((*l2 & ATTR_DESCR_MASK) == L2_TABLE) { + for (i = 0; i < Ln_ENTRIES; i++) { + if ((l3[i] & ATTR_DESCR_MASK) != L3_PAGE) + continue; + pa = l3[i] & ~ATTR_MASK; + if (is_dumpable(pa)) + dump_add_page(pa); + } + } + } + + /* Calculate dump size. */ + dumpsize = pmapsize; + dumpsize += round_page(msgbufp->msg_size); + dumpsize += round_page(vm_page_dump_size); + for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { + bits = vm_page_dump[i]; + while (bits) { + bit = ffsl(bits) - 1; + pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + + bit) * PAGE_SIZE; + /* Clear out undumpable pages now if needed */ + if (is_dumpable(pa)) + dumpsize += PAGE_SIZE; + else + dump_drop_page(pa); + bits &= ~(1ul << bit); + } + } + dumpsize += PAGE_SIZE; + + progress = dumpsize; + + /* Initialize mdhdr */ + bzero(&mdhdr, sizeof(mdhdr)); + strcpy(mdhdr.magic, MINIDUMP_MAGIC); + mdhdr.version = MINIDUMP_VERSION; + mdhdr.msgbufsize = msgbufp->msg_size; + mdhdr.bitmapsize = vm_page_dump_size; + mdhdr.pmapsize = pmapsize; + mdhdr.kernbase = VM_MIN_KERNEL_ADDRESS; + mdhdr.dmapphys = DMAP_MIN_PHYSADDR; + mdhdr.dmapbase = DMAP_MIN_ADDRESS; + mdhdr.dmapend = DMAP_MAX_ADDRESS; + + dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_AARCH64_VERSION, + dumpsize); + + error = dump_start(di, &kdh); + if (error != 0) + goto fail; + + printf("Dumping %llu out of %ju MB:", (long long)dumpsize >> 20, + ptoa((uintmax_t)physmem) / 1048576); + + /* Dump my header */ + bzero(&tmpbuffer, sizeof(tmpbuffer)); + bcopy(&mdhdr, &tmpbuffer, sizeof(mdhdr)); + error = blk_write(di, (char *)&tmpbuffer, 0, PAGE_SIZE); + if (error) + goto fail; + + /* Dump msgbuf up front */ + error = blk_write(di, (char *)msgbufp->msg_ptr, 0, + round_page(msgbufp->msg_size)); + if (error) + goto fail; + + /* Dump bitmap */ + error = blk_write(di, (char *)vm_page_dump, 0, + round_page(vm_page_dump_size)); + if (error) + goto fail; + + /* Dump kernel page directory pages */ + bzero(&tmpbuffer, sizeof(tmpbuffer)); + for (va = VM_MIN_KERNEL_ADDRESS; va < kernel_vm_end; va += L2_SIZE) { + if (!pmap_get_tables(pmap_kernel(), va, &l0, &l1, &l2, &l3)) { + /* We always write a page, even if it is zero */ + error = blk_write(di, (char *)&tmpbuffer, 0, PAGE_SIZE); + if (error) + goto fail; + /* flush, in case we reuse tmpbuffer in the same block*/ + error = blk_flush(di); + if (error) + goto fail; + } else if ((*l1 & ATTR_DESCR_MASK) == L1_BLOCK) { + /* + * Handle a 1GB block mapping: write out 512 fake L2 + * pages. + */ + pa = (*l1 & ~ATTR_MASK) | (va & L1_OFFSET); + + for (i = 0; i < Ln_ENTRIES; i++) { + for (j = 0; j < Ln_ENTRIES; j++) { + tmpbuffer[j] = pa + i * L2_SIZE + + j * PAGE_SIZE | ATTR_DEFAULT | + L3_PAGE; + } + error = blk_write(di, (char *)&tmpbuffer, 0, + PAGE_SIZE); + if (error) + goto fail; + } + /* flush, in case we reuse tmpbuffer in the same block*/ + error = blk_flush(di); + if (error) + goto fail; + bzero(&tmpbuffer, sizeof(tmpbuffer)); + va += L1_SIZE - L2_SIZE; + } else if ((*l2 & ATTR_DESCR_MASK) == L2_BLOCK) { + pa = (*l2 & ~ATTR_MASK) | (va & L2_OFFSET); + + /* Generate fake l3 entries based upon the l1 entry */ + for (i = 0; i < Ln_ENTRIES; i++) { + tmpbuffer[i] = pa + (i * PAGE_SIZE) | + ATTR_DEFAULT | L3_PAGE; + } + error = blk_write(di, (char *)&tmpbuffer, 0, PAGE_SIZE); + if (error) + goto fail; + /* flush, in case we reuse fakepd in the same block */ + error = blk_flush(di); + if (error) + goto fail; + bzero(&tmpbuffer, sizeof(tmpbuffer)); + continue; + } else { + pa = *l2 & ~ATTR_MASK; + + error = blk_write(di, NULL, pa, PAGE_SIZE); + if (error) + goto fail; + } + } + + /* Dump memory chunks */ + for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { + bits = vm_page_dump[i]; + while (bits) { + bit = ffsl(bits) - 1; + pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + + bit) * PAGE_SIZE; + error = blk_write(di, 0, pa, PAGE_SIZE); + if (error) + goto fail; + bits &= ~(1ul << bit); + } + } + + error = blk_flush(di); + if (error) + goto fail; + + error = dump_finish(di, &kdh); + if (error != 0) + goto fail; + + printf("\nDump complete\n"); + return (0); + +fail: + if (error < 0) + error = -error; + + printf("\n"); + if (error == ENOSPC) { + printf("Dump map grown while dumping. "); + if (retry_count < 5) { + printf("Retrying...\n"); + goto retry; + } + printf("Dump failed.\n"); + } + else if (error == ECANCELED) + printf("Dump aborted\n"); + else if (error == E2BIG) { + printf("Dump failed. Partition too small (about %lluMB were " + "needed this time).\n", (long long)dumpsize >> 20); + } else + printf("** DUMP FAILED (ERROR %d) **\n", error); + return (error); +} + +void +dump_add_page(vm_paddr_t pa) +{ + int idx, bit; + + pa >>= PAGE_SHIFT; + idx = pa >> 6; /* 2^6 = 64 */ + bit = pa & 63; + atomic_set_long(&vm_page_dump[idx], 1ul << bit); +} + +void +dump_drop_page(vm_paddr_t pa) +{ + int idx, bit; + + pa >>= PAGE_SHIFT; + idx = pa >> 6; /* 2^6 = 64 */ + bit = pa & 63; + atomic_clear_long(&vm_page_dump[idx], 1ul << bit); +} diff --git a/sys/arm64/arm64/mp_machdep.c b/sys/arm64/arm64/mp_machdep.c new file mode 100644 index 000000000000..8c8ceafe18e9 --- /dev/null +++ b/sys/arm64/arm64/mp_machdep.c @@ -0,0 +1,896 @@ +/*- + * Copyright (c) 2015-2016 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Andrew Turner under + * sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include "opt_acpi.h" +#include "opt_ddb.h" +#include "opt_kstack_pages.h" +#include "opt_platform.h" + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/bus.h> +#include <sys/cpu.h> +#include <sys/csan.h> +#include <sys/kernel.h> +#include <sys/ktr.h> +#include <sys/malloc.h> +#include <sys/module.h> +#include <sys/mutex.h> +#include <sys/pcpu.h> +#include <sys/proc.h> +#include <sys/sched.h> +#include <sys/smp.h> + +#include <vm/vm.h> +#include <vm/pmap.h> +#include <vm/vm_extern.h> +#include <vm/vm_kern.h> +#include <vm/vm_map.h> + +#include <machine/machdep.h> +#include <machine/debug_monitor.h> +#include <machine/intr.h> +#include <machine/smp.h> +#ifdef VFP +#include <machine/vfp.h> +#endif + +#ifdef DEV_ACPI +#include <contrib/dev/acpica/include/acpi.h> +#include <dev/acpica/acpivar.h> +#endif + +#ifdef FDT +#include <dev/ofw/openfirm.h> +#include <dev/ofw/ofw_bus.h> +#include <dev/ofw/ofw_bus_subr.h> +#include <dev/ofw/ofw_cpu.h> +#endif + +#include <dev/psci/psci.h> + +#include "pic_if.h" + +#define MP_QUIRK_CPULIST 0x01 /* The list of cpus may be wrong, */ + /* don't panic if one fails to start */ +static uint32_t mp_quirks; + +#ifdef FDT +static struct { + const char *compat; + uint32_t quirks; +} fdt_quirks[] = { + { "arm,foundation-aarch64", MP_QUIRK_CPULIST }, + { "arm,fvp-base", MP_QUIRK_CPULIST }, + /* This is incorrect in some DTS files */ + { "arm,vfp-base", MP_QUIRK_CPULIST }, + { NULL, 0 }, +}; +#endif + +typedef void intr_ipi_send_t(void *, cpuset_t, u_int); +typedef void intr_ipi_handler_t(void *); + +#define INTR_IPI_NAMELEN (MAXCOMLEN + 1) +struct intr_ipi { + intr_ipi_handler_t * ii_handler; + void * ii_handler_arg; + intr_ipi_send_t * ii_send; + void * ii_send_arg; + char ii_name[INTR_IPI_NAMELEN]; + u_long * ii_count; +}; + +static struct intr_ipi ipi_sources[INTR_IPI_COUNT]; + +static struct intr_ipi *intr_ipi_lookup(u_int); +static void intr_pic_ipi_setup(u_int, const char *, intr_ipi_handler_t *, + void *); + +static void ipi_ast(void *); +static void ipi_hardclock(void *); +static void ipi_preempt(void *); +static void ipi_rendezvous(void *); +static void ipi_stop(void *); + +struct pcb stoppcbs[MAXCPU]; + +/* + * Not all systems boot from the first CPU in the device tree. To work around + * this we need to find which CPU we have booted from so when we later + * enable the secondary CPUs we skip this one. + */ +static int cpu0 = -1; + +void mpentry(unsigned long cpuid); +void init_secondary(uint64_t); + +/* Synchronize AP startup. */ +static struct mtx ap_boot_mtx; + +/* Stacks for AP initialization, discarded once idle threads are started. */ +void *bootstack; +static void *bootstacks[MAXCPU]; + +/* Count of started APs, used to synchronize access to bootstack. */ +static volatile int aps_started; + +/* Set to 1 once we're ready to let the APs out of the pen. */ +static volatile int aps_ready; + +/* Temporary variables for init_secondary() */ +void *dpcpu[MAXCPU - 1]; + +static void +release_aps(void *dummy __unused) +{ + int i, started; + + /* Only release CPUs if they exist */ + if (mp_ncpus == 1) + return; + + intr_pic_ipi_setup(IPI_AST, "ast", ipi_ast, NULL); + intr_pic_ipi_setup(IPI_PREEMPT, "preempt", ipi_preempt, NULL); + intr_pic_ipi_setup(IPI_RENDEZVOUS, "rendezvous", ipi_rendezvous, NULL); + intr_pic_ipi_setup(IPI_STOP, "stop", ipi_stop, NULL); + intr_pic_ipi_setup(IPI_STOP_HARD, "stop hard", ipi_stop, NULL); + intr_pic_ipi_setup(IPI_HARDCLOCK, "hardclock", ipi_hardclock, NULL); + + atomic_store_rel_int(&aps_ready, 1); + /* Wake up the other CPUs */ + __asm __volatile( + "dsb ishst \n" + "sev \n" + ::: "memory"); + + printf("Release APs..."); + + started = 0; + for (i = 0; i < 2000; i++) { + if (smp_started) { + printf("done\n"); + return; + } + /* + * Don't time out while we are making progress. Some large + * systems can take a while to start all CPUs. + */ + if (smp_cpus > started) { + i = 0; + started = smp_cpus; + } + DELAY(1000); + } + + printf("APs not started\n"); +} +SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); + +void +init_secondary(uint64_t cpu) +{ + struct pcpu *pcpup; + pmap_t pmap0; + + pcpup = &__pcpu[cpu]; + /* + * Set the pcpu pointer with a backup in tpidr_el1 to be + * loaded when entering the kernel from userland. + */ + __asm __volatile( + "mov x18, %0 \n" + "msr tpidr_el1, %0" :: "r"(pcpup)); + + /* + * Identify current CPU. This is necessary to setup + * affinity registers and to provide support for + * runtime chip identification. + * + * We need this before signalling the CPU is ready to + * let the boot CPU use the results. + */ + identify_cpu(cpu); + + /* Ensure the stores in identify_cpu have completed */ + atomic_thread_fence_acq_rel(); + + /* Signal the BSP and spin until it has released all APs. */ + atomic_add_int(&aps_started, 1); + while (!atomic_load_int(&aps_ready)) + __asm __volatile("wfe"); + + pcpup->pc_midr = get_midr(); + + /* Initialize curthread */ + KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); + pcpup->pc_curthread = pcpup->pc_idlethread; + + /* Initialize curpmap to match TTBR0's current setting. */ + pmap0 = vmspace_pmap(&vmspace0); + KASSERT(pmap_to_ttbr0(pmap0) == READ_SPECIALREG(ttbr0_el1), + ("pmap0 doesn't match cpu %ld's ttbr0", cpu)); + pcpup->pc_curpmap = pmap0; + + install_cpu_errata(); + + intr_pic_init_secondary(); + + /* Start per-CPU event timers. */ + cpu_initclocks_ap(); + +#ifdef VFP + vfp_init(); +#endif + + dbg_init(); + pan_enable(); + + mtx_lock_spin(&ap_boot_mtx); + atomic_add_rel_32(&smp_cpus, 1); + if (smp_cpus == mp_ncpus) { + /* enable IPI's, tlb shootdown, freezes etc */ + atomic_store_rel_int(&smp_started, 1); + } + mtx_unlock_spin(&ap_boot_mtx); + + kcsan_cpu_init(cpu); + + /* + * Assert that smp_after_idle_runnable condition is reasonable. + */ + MPASS(PCPU_GET(curpcb) == NULL); + + /* Enter the scheduler */ + sched_throw(NULL); + + panic("scheduler returned us to init_secondary"); + /* NOTREACHED */ +} + +static void +smp_after_idle_runnable(void *arg __unused) +{ + struct pcpu *pc; + int cpu; + + for (cpu = 1; cpu < mp_ncpus; cpu++) { + if (bootstacks[cpu] != NULL) { + pc = pcpu_find(cpu); + while (atomic_load_ptr(&pc->pc_curpcb) == NULL) + cpu_spinwait(); + kmem_free((vm_offset_t)bootstacks[cpu], PAGE_SIZE); + } + } +} +SYSINIT(smp_after_idle_runnable, SI_SUB_SMP, SI_ORDER_ANY, + smp_after_idle_runnable, NULL); + +/* + * Send IPI thru interrupt controller. + */ +static void +pic_ipi_send(void *arg, cpuset_t cpus, u_int ipi) +{ + + KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__)); + + /* + * Ensure that this CPU's stores will be visible to IPI + * recipients before starting to send the interrupts. + */ + dsb(ishst); + + PIC_IPI_SEND(intr_irq_root_dev, arg, cpus, ipi); +} + +/* + * Setup IPI handler on interrupt controller. + * + * Not SMP coherent. + */ +static void +intr_pic_ipi_setup(u_int ipi, const char *name, intr_ipi_handler_t *hand, + void *arg) +{ + struct intr_irqsrc *isrc; + struct intr_ipi *ii; + int error; + + KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__)); + KASSERT(hand != NULL, ("%s: ipi %u no handler", __func__, ipi)); + + error = PIC_IPI_SETUP(intr_irq_root_dev, ipi, &isrc); + if (error != 0) + return; + + isrc->isrc_handlers++; + + ii = intr_ipi_lookup(ipi); + KASSERT(ii->ii_count == NULL, ("%s: ipi %u reused", __func__, ipi)); + + ii->ii_handler = hand; + ii->ii_handler_arg = arg; + ii->ii_send = pic_ipi_send; + ii->ii_send_arg = isrc; + strlcpy(ii->ii_name, name, INTR_IPI_NAMELEN); + ii->ii_count = intr_ipi_setup_counters(name); +} + +static void +intr_ipi_send(cpuset_t cpus, u_int ipi) +{ + struct intr_ipi *ii; + + ii = intr_ipi_lookup(ipi); + if (ii->ii_count == NULL) + panic("%s: not setup IPI %u", __func__, ipi); + + ii->ii_send(ii->ii_send_arg, cpus, ipi); +} + +static void +ipi_ast(void *dummy __unused) +{ + + CTR0(KTR_SMP, "IPI_AST"); +} + +static void +ipi_hardclock(void *dummy __unused) +{ + + CTR1(KTR_SMP, "%s: IPI_HARDCLOCK", __func__); + hardclockintr(); +} + +static void +ipi_preempt(void *dummy __unused) +{ + CTR1(KTR_SMP, "%s: IPI_PREEMPT", __func__); + sched_preempt(curthread); +} + +static void +ipi_rendezvous(void *dummy __unused) +{ + + CTR0(KTR_SMP, "IPI_RENDEZVOUS"); + smp_rendezvous_action(); +} + +static void +ipi_stop(void *dummy __unused) +{ + u_int cpu; + + CTR0(KTR_SMP, "IPI_STOP"); + + cpu = PCPU_GET(cpuid); + savectx(&stoppcbs[cpu]); + + /* Indicate we are stopped */ + CPU_SET_ATOMIC(cpu, &stopped_cpus); + + /* Wait for restart */ + while (!CPU_ISSET(cpu, &started_cpus)) + cpu_spinwait(); + +#ifdef DDB + dbg_register_sync(NULL); +#endif + + CPU_CLR_ATOMIC(cpu, &started_cpus); + CPU_CLR_ATOMIC(cpu, &stopped_cpus); + CTR0(KTR_SMP, "IPI_STOP (restart)"); +} + +struct cpu_group * +cpu_topo(void) +{ + + return (smp_topo_none()); +} + +/* Determine if we running MP machine */ +int +cpu_mp_probe(void) +{ + + /* ARM64TODO: Read the u bit of mpidr_el1 to determine this */ + return (1); +} + +static bool +start_cpu(u_int id, uint64_t target_cpu) +{ + struct pcpu *pcpup; + vm_paddr_t pa; + u_int cpuid; + int err, naps; + + /* Check we are able to start this cpu */ + if (id > mp_maxid) + return (false); + + KASSERT(id < MAXCPU, ("Too many CPUs")); + + /* We are already running on cpu 0 */ + if (id == cpu0) + return (true); + + /* + * Rotate the CPU IDs to put the boot CPU as CPU 0. We keep the other + * CPUs ordered as they are likely grouped into clusters so it can be + * useful to keep that property, e.g. for the GICv3 driver to send + * an IPI to all CPUs in the cluster. + */ + cpuid = id; + if (cpuid < cpu0) + cpuid += mp_maxid + 1; + cpuid -= cpu0; + + pcpup = &__pcpu[cpuid]; + pcpu_init(pcpup, cpuid, sizeof(struct pcpu)); + + dpcpu[cpuid - 1] = (void *)kmem_malloc(DPCPU_SIZE, M_WAITOK | M_ZERO); + dpcpu_init(dpcpu[cpuid - 1], cpuid); + + bootstacks[cpuid] = (void *)kmem_malloc(PAGE_SIZE, M_WAITOK | M_ZERO); + + naps = atomic_load_int(&aps_started); + bootstack = (char *)bootstacks[cpuid] + PAGE_SIZE; + + printf("Starting CPU %u (%lx)\n", cpuid, target_cpu); + pa = pmap_extract(kernel_pmap, (vm_offset_t)mpentry); + err = psci_cpu_on(target_cpu, pa, cpuid); + if (err != PSCI_RETVAL_SUCCESS) { + /* + * Panic here if INVARIANTS are enabled and PSCI failed to + * start the requested CPU. psci_cpu_on() returns PSCI_MISSING + * to indicate we are unable to use it to start the given CPU. + */ + KASSERT(err == PSCI_MISSING || + (mp_quirks & MP_QUIRK_CPULIST) == MP_QUIRK_CPULIST, + ("Failed to start CPU %u (%lx), error %d\n", + id, target_cpu, err)); + + pcpu_destroy(pcpup); + kmem_free((vm_offset_t)dpcpu[cpuid - 1], DPCPU_SIZE); + dpcpu[cpuid - 1] = NULL; + kmem_free((vm_offset_t)bootstacks[cpuid], PAGE_SIZE); + bootstacks[cpuid] = NULL; + mp_ncpus--; + + /* Notify the user that the CPU failed to start */ + printf("Failed to start CPU %u (%lx), error %d\n", + id, target_cpu, err); + } else { + /* Wait for the AP to switch to its boot stack. */ + while (atomic_load_int(&aps_started) < naps + 1) + cpu_spinwait(); + CPU_SET(cpuid, &all_cpus); + } + + return (true); +} + +#ifdef DEV_ACPI +static void +madt_handler(ACPI_SUBTABLE_HEADER *entry, void *arg) +{ + ACPI_MADT_GENERIC_INTERRUPT *intr; + u_int *cpuid; + u_int id; + + switch(entry->Type) { + case ACPI_MADT_TYPE_GENERIC_INTERRUPT: + intr = (ACPI_MADT_GENERIC_INTERRUPT *)entry; + cpuid = arg; + id = *cpuid; + start_cpu(id, intr->ArmMpidr); + __pcpu[id].pc_acpi_id = intr->Uid; + (*cpuid)++; + break; + default: + break; + } +} + +static void +cpu_init_acpi(void) +{ + ACPI_TABLE_MADT *madt; + vm_paddr_t physaddr; + u_int cpuid; + + physaddr = acpi_find_table(ACPI_SIG_MADT); + if (physaddr == 0) + return; + + madt = acpi_map_table(physaddr, ACPI_SIG_MADT); + if (madt == NULL) { + printf("Unable to map the MADT, not starting APs\n"); + return; + } + + cpuid = 0; + acpi_walk_subtables(madt + 1, (char *)madt + madt->Header.Length, + madt_handler, &cpuid); + + acpi_unmap_table(madt); + +#if MAXMEMDOM > 1 + acpi_pxm_set_cpu_locality(); +#endif +} +#endif + +#ifdef FDT +static boolean_t +cpu_init_fdt(u_int id, phandle_t node, u_int addr_size, pcell_t *reg) +{ + uint64_t target_cpu; + int domain; + + target_cpu = reg[0]; + if (addr_size == 2) { + target_cpu <<= 32; + target_cpu |= reg[1]; + } + + if (!start_cpu(id, target_cpu)) + return (FALSE); + + /* Try to read the numa node of this cpu */ + if (vm_ndomains == 1 || + OF_getencprop(node, "numa-node-id", &domain, sizeof(domain)) <= 0) + domain = 0; + __pcpu[id].pc_domain = domain; + if (domain < MAXMEMDOM) + CPU_SET(id, &cpuset_domain[domain]); + + return (TRUE); +} +#endif + +/* Initialize and fire up non-boot processors */ +void +cpu_mp_start(void) +{ +#ifdef FDT + phandle_t node; + int i; +#endif + + mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); + + CPU_SET(0, &all_cpus); + + switch(arm64_bus_method) { +#ifdef DEV_ACPI + case ARM64_BUS_ACPI: + mp_quirks = MP_QUIRK_CPULIST; + KASSERT(cpu0 >= 0, ("Current CPU was not found")); + cpu_init_acpi(); + break; +#endif +#ifdef FDT + case ARM64_BUS_FDT: + node = OF_peer(0); + for (i = 0; fdt_quirks[i].compat != NULL; i++) { + if (ofw_bus_node_is_compatible(node, + fdt_quirks[i].compat) != 0) { + mp_quirks = fdt_quirks[i].quirks; + } + } + KASSERT(cpu0 >= 0, ("Current CPU was not found")); + ofw_cpu_early_foreach(cpu_init_fdt, true); + break; +#endif + default: + break; + } +} + +/* Introduce rest of cores to the world */ +void +cpu_mp_announce(void) +{ +} + +#ifdef DEV_ACPI +static void +cpu_count_acpi_handler(ACPI_SUBTABLE_HEADER *entry, void *arg) +{ + ACPI_MADT_GENERIC_INTERRUPT *intr; + u_int *cores = arg; + uint64_t mpidr_reg; + + switch(entry->Type) { + case ACPI_MADT_TYPE_GENERIC_INTERRUPT: + intr = (ACPI_MADT_GENERIC_INTERRUPT *)entry; + if (cpu0 < 0) { + mpidr_reg = READ_SPECIALREG(mpidr_el1); + if ((mpidr_reg & 0xff00fffffful) == intr->ArmMpidr) + cpu0 = *cores; + } + (*cores)++; + break; + default: + break; + } +} + +static u_int +cpu_count_acpi(void) +{ + ACPI_TABLE_MADT *madt; + vm_paddr_t physaddr; + u_int cores; + + physaddr = acpi_find_table(ACPI_SIG_MADT); + if (physaddr == 0) + return (0); + + madt = acpi_map_table(physaddr, ACPI_SIG_MADT); + if (madt == NULL) { + printf("Unable to map the MADT, not starting APs\n"); + return (0); + } + + cores = 0; + acpi_walk_subtables(madt + 1, (char *)madt + madt->Header.Length, + cpu_count_acpi_handler, &cores); + + acpi_unmap_table(madt); + + return (cores); +} +#endif + +#ifdef FDT +static boolean_t +cpu_find_cpu0_fdt(u_int id, phandle_t node, u_int addr_size, pcell_t *reg) +{ + uint64_t mpidr_fdt, mpidr_reg; + + if (cpu0 < 0) { + mpidr_fdt = reg[0]; + if (addr_size == 2) { + mpidr_fdt <<= 32; + mpidr_fdt |= reg[1]; + } + + mpidr_reg = READ_SPECIALREG(mpidr_el1); + + if ((mpidr_reg & 0xff00fffffful) == mpidr_fdt) + cpu0 = id; + } + + return (TRUE); +} +#endif + +void +cpu_mp_setmaxid(void) +{ + int cores; + + mp_ncpus = 1; + mp_maxid = 0; + + switch(arm64_bus_method) { +#ifdef DEV_ACPI + case ARM64_BUS_ACPI: + cores = cpu_count_acpi(); + if (cores > 0) { + cores = MIN(cores, MAXCPU); + if (bootverbose) + printf("Found %d CPUs in the ACPI tables\n", + cores); + mp_ncpus = cores; + mp_maxid = cores - 1; + } + break; +#endif +#ifdef FDT + case ARM64_BUS_FDT: + cores = ofw_cpu_early_foreach(cpu_find_cpu0_fdt, false); + if (cores > 0) { + cores = MIN(cores, MAXCPU); + if (bootverbose) + printf("Found %d CPUs in the device tree\n", + cores); + mp_ncpus = cores; + mp_maxid = cores - 1; + } + break; +#endif + default: + if (bootverbose) + printf("No CPU data, limiting to 1 core\n"); + break; + } + + if (TUNABLE_INT_FETCH("hw.ncpu", &cores)) { + if (cores > 0 && cores < mp_ncpus) { + mp_ncpus = cores; + mp_maxid = cores - 1; + } + } +} + +/* + * Lookup IPI source. + */ +static struct intr_ipi * +intr_ipi_lookup(u_int ipi) +{ + + if (ipi >= INTR_IPI_COUNT) + panic("%s: no such IPI %u", __func__, ipi); + + return (&ipi_sources[ipi]); +} + +/* + * interrupt controller dispatch function for IPIs. It should + * be called straight from the interrupt controller, when associated + * interrupt source is learned. Or from anybody who has an interrupt + * source mapped. + */ +void +intr_ipi_dispatch(u_int ipi, struct trapframe *tf) +{ + void *arg; + struct intr_ipi *ii; + + ii = intr_ipi_lookup(ipi); + if (ii->ii_count == NULL) + panic("%s: not setup IPI %u", __func__, ipi); + + intr_ipi_increment_count(ii->ii_count, PCPU_GET(cpuid)); + + /* + * Supply ipi filter with trapframe argument + * if none is registered. + */ + arg = ii->ii_handler_arg != NULL ? ii->ii_handler_arg : tf; + ii->ii_handler(arg); +} + +#ifdef notyet +/* + * Map IPI into interrupt controller. + * + * Not SMP coherent. + */ +static int +ipi_map(struct intr_irqsrc *isrc, u_int ipi) +{ + boolean_t is_percpu; + int error; + + if (ipi >= INTR_IPI_COUNT) + panic("%s: no such IPI %u", __func__, ipi); + + KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__)); + + isrc->isrc_type = INTR_ISRCT_NAMESPACE; + isrc->isrc_nspc_type = INTR_IRQ_NSPC_IPI; + isrc->isrc_nspc_num = ipi_next_num; + + error = PIC_REGISTER(intr_irq_root_dev, isrc, &is_percpu); + if (error == 0) { + isrc->isrc_dev = intr_irq_root_dev; + ipi_next_num++; + } + return (error); +} + +/* + * Setup IPI handler to interrupt source. + * + * Note that there could be more ways how to send and receive IPIs + * on a platform like fast interrupts for example. In that case, + * one can call this function with ASIF_NOALLOC flag set and then + * call intr_ipi_dispatch() when appropriate. + * + * Not SMP coherent. + */ +int +intr_ipi_set_handler(u_int ipi, const char *name, intr_ipi_filter_t *filter, + void *arg, u_int flags) +{ + struct intr_irqsrc *isrc; + int error; + + if (filter == NULL) + return(EINVAL); + + isrc = intr_ipi_lookup(ipi); + if (isrc->isrc_ipifilter != NULL) + return (EEXIST); + + if ((flags & AISHF_NOALLOC) == 0) { + error = ipi_map(isrc, ipi); + if (error != 0) + return (error); + } + + isrc->isrc_ipifilter = filter; + isrc->isrc_arg = arg; + isrc->isrc_handlers = 1; + isrc->isrc_count = intr_ipi_setup_counters(name); + isrc->isrc_index = 0; /* it should not be used in IPI case */ + + if (isrc->isrc_dev != NULL) { + PIC_ENABLE_INTR(isrc->isrc_dev, isrc); + PIC_ENABLE_SOURCE(isrc->isrc_dev, isrc); + } + return (0); +} +#endif + +/* Sending IPI */ +void +ipi_all_but_self(u_int ipi) +{ + cpuset_t cpus; + + cpus = all_cpus; + CPU_CLR(PCPU_GET(cpuid), &cpus); + CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); + intr_ipi_send(cpus, ipi); +} + +void +ipi_cpu(int cpu, u_int ipi) +{ + cpuset_t cpus; + + CPU_ZERO(&cpus); + CPU_SET(cpu, &cpus); + + CTR3(KTR_SMP, "%s: cpu: %d, ipi: %x", __func__, cpu, ipi); + intr_ipi_send(cpus, ipi); +} + +void +ipi_selected(cpuset_t cpus, u_int ipi) +{ + + CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); + intr_ipi_send(cpus, ipi); +} diff --git a/sys/arm64/arm64/nexus.c b/sys/arm64/arm64/nexus.c new file mode 100644 index 000000000000..924496ec7f52 --- /dev/null +++ b/sys/arm64/arm64/nexus.c @@ -0,0 +1,549 @@ +/*- + * Copyright 1998 Massachusetts Institute of Technology + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose and without fee is hereby + * granted, provided that both the above copyright notice and this + * permission notice appear in all copies, that both the above + * copyright notice and this permission notice appear in all + * supporting documentation, and that the name of M.I.T. not be used + * in advertising or publicity pertaining to distribution of the + * software without specific, written prior permission. M.I.T. makes + * no representations about the suitability of this software for any + * purpose. It is provided "as is" without express or implied + * warranty. + * + * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS + * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT + * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +/* + * This code implements a `root nexus' for Arm Architecture + * machines. The function of the root nexus is to serve as an + * attachment point for both processors and buses, and to manage + * resources which are common to all of them. In particular, + * this code implements the core resource managers for interrupt + * requests, DMA requests (which rightfully should be a part of the + * ISA code but it's easier to do it here for now), I/O port addresses, + * and I/O memory address space. + */ + +#include "opt_acpi.h" +#include "opt_platform.h" + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/bus.h> +#include <sys/kernel.h> +#include <sys/malloc.h> +#include <sys/module.h> +#include <machine/bus.h> +#include <sys/rman.h> +#include <sys/interrupt.h> + +#include <machine/machdep.h> +#include <machine/vmparam.h> +#include <machine/pcb.h> +#include <vm/vm.h> +#include <vm/pmap.h> + +#include <machine/resource.h> +#include <machine/intr.h> + +#ifdef FDT +#include <dev/ofw/ofw_bus_subr.h> +#include <dev/ofw/openfirm.h> +#include "ofw_bus_if.h" +#endif +#ifdef DEV_ACPI +#include <contrib/dev/acpica/include/acpi.h> +#include <dev/acpica/acpivar.h> +#include "acpi_bus_if.h" +#include "pcib_if.h" +#endif + +extern struct bus_space memmap_bus; + +static MALLOC_DEFINE(M_NEXUSDEV, "nexusdev", "Nexus device"); + +struct nexus_device { + struct resource_list nx_resources; +}; + +#define DEVTONX(dev) ((struct nexus_device *)device_get_ivars(dev)) + +static struct rman mem_rman; +static struct rman irq_rman; + +static int nexus_attach(device_t); + +#ifdef FDT +static device_probe_t nexus_fdt_probe; +static device_attach_t nexus_fdt_attach; +#endif +#ifdef DEV_ACPI +static device_probe_t nexus_acpi_probe; +static device_attach_t nexus_acpi_attach; +#endif + +static int nexus_print_child(device_t, device_t); +static device_t nexus_add_child(device_t, u_int, const char *, int); +static struct resource *nexus_alloc_resource(device_t, device_t, int, int *, + rman_res_t, rman_res_t, rman_res_t, u_int); +static int nexus_activate_resource(device_t, device_t, int, int, + struct resource *); +static int nexus_config_intr(device_t dev, int irq, enum intr_trigger trig, + enum intr_polarity pol); +static struct resource_list *nexus_get_reslist(device_t, device_t); +static int nexus_set_resource(device_t, device_t, int, int, + rman_res_t, rman_res_t); +static int nexus_deactivate_resource(device_t, device_t, int, int, + struct resource *); +static int nexus_release_resource(device_t, device_t, int, int, + struct resource *); + +static int nexus_setup_intr(device_t dev, device_t child, struct resource *res, + int flags, driver_filter_t *filt, driver_intr_t *intr, void *arg, void **cookiep); +static int nexus_teardown_intr(device_t, device_t, struct resource *, void *); +static bus_space_tag_t nexus_get_bus_tag(device_t, device_t); +#ifdef SMP +static int nexus_bind_intr(device_t, device_t, struct resource *, int); +#endif + +#ifdef FDT +static int nexus_ofw_map_intr(device_t dev, device_t child, phandle_t iparent, + int icells, pcell_t *intr); +#endif + +static device_method_t nexus_methods[] = { + /* Bus interface */ + DEVMETHOD(bus_print_child, nexus_print_child), + DEVMETHOD(bus_add_child, nexus_add_child), + DEVMETHOD(bus_alloc_resource, nexus_alloc_resource), + DEVMETHOD(bus_activate_resource, nexus_activate_resource), + DEVMETHOD(bus_config_intr, nexus_config_intr), + DEVMETHOD(bus_get_resource_list, nexus_get_reslist), + DEVMETHOD(bus_set_resource, nexus_set_resource), + DEVMETHOD(bus_deactivate_resource, nexus_deactivate_resource), + DEVMETHOD(bus_release_resource, nexus_release_resource), + DEVMETHOD(bus_setup_intr, nexus_setup_intr), + DEVMETHOD(bus_teardown_intr, nexus_teardown_intr), + DEVMETHOD(bus_get_bus_tag, nexus_get_bus_tag), +#ifdef SMP + DEVMETHOD(bus_bind_intr, nexus_bind_intr), +#endif + { 0, 0 } +}; + +static driver_t nexus_driver = { + "nexus", + nexus_methods, + 1 /* no softc */ +}; + +static int +nexus_attach(device_t dev) +{ + + mem_rman.rm_start = 0; + mem_rman.rm_end = BUS_SPACE_MAXADDR; + mem_rman.rm_type = RMAN_ARRAY; + mem_rman.rm_descr = "I/O memory addresses"; + if (rman_init(&mem_rman) || + rman_manage_region(&mem_rman, 0, BUS_SPACE_MAXADDR)) + panic("nexus_attach mem_rman"); + irq_rman.rm_start = 0; + irq_rman.rm_end = ~0; + irq_rman.rm_type = RMAN_ARRAY; + irq_rman.rm_descr = "Interrupts"; + if (rman_init(&irq_rman) || rman_manage_region(&irq_rman, 0, ~0)) + panic("nexus_attach irq_rman"); + + bus_generic_probe(dev); + bus_generic_attach(dev); + + return (0); +} + +static int +nexus_print_child(device_t bus, device_t child) +{ + int retval = 0; + + retval += bus_print_child_header(bus, child); + retval += printf("\n"); + + return (retval); +} + +static device_t +nexus_add_child(device_t bus, u_int order, const char *name, int unit) +{ + device_t child; + struct nexus_device *ndev; + + ndev = malloc(sizeof(struct nexus_device), M_NEXUSDEV, M_NOWAIT|M_ZERO); + if (!ndev) + return (0); + resource_list_init(&ndev->nx_resources); + + child = device_add_child_ordered(bus, order, name, unit); + + /* should we free this in nexus_child_detached? */ + device_set_ivars(child, ndev); + + return (child); +} + +/* + * Allocate a resource on behalf of child. NB: child is usually going to be a + * child of one of our descendants, not a direct child of nexus0. + * (Exceptions include footbridge.) + */ +static struct resource * +nexus_alloc_resource(device_t bus, device_t child, int type, int *rid, + rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) +{ + struct nexus_device *ndev = DEVTONX(child); + struct resource *rv; + struct resource_list_entry *rle; + struct rman *rm; + int needactivate = flags & RF_ACTIVE; + + /* + * If this is an allocation of the "default" range for a given + * RID, and we know what the resources for this device are + * (ie. they aren't maintained by a child bus), then work out + * the start/end values. + */ + if (RMAN_IS_DEFAULT_RANGE(start, end) && (count == 1)) { + if (device_get_parent(child) != bus || ndev == NULL) + return(NULL); + rle = resource_list_find(&ndev->nx_resources, type, *rid); + if (rle == NULL) + return(NULL); + start = rle->start; + end = rle->end; + count = rle->count; + } + + switch (type) { + case SYS_RES_IRQ: + rm = &irq_rman; + break; + + case SYS_RES_MEMORY: + case SYS_RES_IOPORT: + rm = &mem_rman; + break; + + default: + return (NULL); + } + + rv = rman_reserve_resource(rm, start, end, count, flags, child); + if (rv == NULL) + return (NULL); + + rman_set_rid(rv, *rid); + rman_set_bushandle(rv, rman_get_start(rv)); + + if (needactivate) { + if (bus_activate_resource(child, type, *rid, rv)) { + rman_release_resource(rv); + return (NULL); + } + } + + return (rv); +} + +static int +nexus_release_resource(device_t bus, device_t child, int type, int rid, + struct resource *res) +{ + int error; + + if (rman_get_flags(res) & RF_ACTIVE) { + error = bus_deactivate_resource(child, type, rid, res); + if (error) + return (error); + } + return (rman_release_resource(res)); +} + +static int +nexus_config_intr(device_t dev, int irq, enum intr_trigger trig, + enum intr_polarity pol) +{ + + /* + * On arm64 (due to INTRNG), ACPI interrupt configuration is + * done in nexus_acpi_map_intr(). + */ + return (0); +} + +static int +nexus_setup_intr(device_t dev, device_t child, struct resource *res, int flags, + driver_filter_t *filt, driver_intr_t *intr, void *arg, void **cookiep) +{ + int error; + + if ((rman_get_flags(res) & RF_SHAREABLE) == 0) + flags |= INTR_EXCL; + + /* We depend here on rman_activate_resource() being idempotent. */ + error = rman_activate_resource(res); + if (error) + return (error); + + error = intr_setup_irq(child, res, filt, intr, arg, flags, cookiep); + + return (error); +} + +static int +nexus_teardown_intr(device_t dev, device_t child, struct resource *r, void *ih) +{ + + return (intr_teardown_irq(child, r, ih)); +} + +#ifdef SMP +static int +nexus_bind_intr(device_t dev, device_t child, struct resource *irq, int cpu) +{ + + return (intr_bind_irq(child, irq, cpu)); +} +#endif + +static bus_space_tag_t +nexus_get_bus_tag(device_t bus __unused, device_t child __unused) +{ + + return(&memmap_bus); +} + +static int +nexus_activate_resource(device_t bus, device_t child, int type, int rid, + struct resource *r) +{ + int err; + bus_addr_t paddr; + bus_size_t psize; + bus_space_handle_t vaddr; + + if ((err = rman_activate_resource(r)) != 0) + return (err); + + /* + * If this is a memory resource, map it into the kernel. + */ + if (type == SYS_RES_MEMORY || type == SYS_RES_IOPORT) { + paddr = (bus_addr_t)rman_get_start(r); + psize = (bus_size_t)rman_get_size(r); + err = bus_space_map(&memmap_bus, paddr, psize, 0, &vaddr); + if (err != 0) { + rman_deactivate_resource(r); + return (err); + } + rman_set_bustag(r, &memmap_bus); + rman_set_virtual(r, (void *)vaddr); + rman_set_bushandle(r, vaddr); + } else if (type == SYS_RES_IRQ) { + err = intr_activate_irq(child, r); + if (err != 0) { + rman_deactivate_resource(r); + return (err); + } + } + return (0); +} + +static struct resource_list * +nexus_get_reslist(device_t dev, device_t child) +{ + struct nexus_device *ndev = DEVTONX(child); + + return (&ndev->nx_resources); +} + +static int +nexus_set_resource(device_t dev, device_t child, int type, int rid, + rman_res_t start, rman_res_t count) +{ + struct nexus_device *ndev = DEVTONX(child); + struct resource_list *rl = &ndev->nx_resources; + + /* XXX this should return a success/failure indicator */ + resource_list_add(rl, type, rid, start, start + count - 1, count); + + return(0); +} + +static int +nexus_deactivate_resource(device_t bus, device_t child, int type, int rid, + struct resource *r) +{ + bus_size_t psize; + bus_space_handle_t vaddr; + + if (type == SYS_RES_MEMORY || type == SYS_RES_IOPORT) { + psize = (bus_size_t)rman_get_size(r); + vaddr = rman_get_bushandle(r); + + if (vaddr != 0) { + bus_space_unmap(&memmap_bus, vaddr, psize); + rman_set_virtual(r, NULL); + rman_set_bushandle(r, 0); + } + } else if (type == SYS_RES_IRQ) { + intr_deactivate_irq(child, r); + } + + return (rman_deactivate_resource(r)); +} + +#ifdef FDT +static device_method_t nexus_fdt_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, nexus_fdt_probe), + DEVMETHOD(device_attach, nexus_fdt_attach), + + /* OFW interface */ + DEVMETHOD(ofw_bus_map_intr, nexus_ofw_map_intr), + + DEVMETHOD_END, +}; + +#define nexus_baseclasses nexus_fdt_baseclasses +DEFINE_CLASS_1(nexus, nexus_fdt_driver, nexus_fdt_methods, 1, nexus_driver); +#undef nexus_baseclasses +static devclass_t nexus_fdt_devclass; + +EARLY_DRIVER_MODULE(nexus_fdt, root, nexus_fdt_driver, nexus_fdt_devclass, + 0, 0, BUS_PASS_BUS + BUS_PASS_ORDER_FIRST); + +static int +nexus_fdt_probe(device_t dev) +{ + + if (arm64_bus_method != ARM64_BUS_FDT) + return (ENXIO); + + device_quiet(dev); + return (BUS_PROBE_DEFAULT); +} + +static int +nexus_fdt_attach(device_t dev) +{ + + nexus_add_child(dev, 10, "ofwbus", 0); + return (nexus_attach(dev)); +} + +static int +nexus_ofw_map_intr(device_t dev, device_t child, phandle_t iparent, int icells, + pcell_t *intr) +{ + u_int irq; + struct intr_map_data_fdt *fdt_data; + size_t len; + + len = sizeof(*fdt_data) + icells * sizeof(pcell_t); + fdt_data = (struct intr_map_data_fdt *)intr_alloc_map_data( + INTR_MAP_DATA_FDT, len, M_WAITOK | M_ZERO); + fdt_data->iparent = iparent; + fdt_data->ncells = icells; + memcpy(fdt_data->cells, intr, icells * sizeof(pcell_t)); + irq = intr_map_irq(NULL, iparent, (struct intr_map_data *)fdt_data); + return (irq); +} +#endif + +#ifdef DEV_ACPI +static int nexus_acpi_map_intr(device_t dev, device_t child, u_int irq, int trig, int pol); + +static device_method_t nexus_acpi_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, nexus_acpi_probe), + DEVMETHOD(device_attach, nexus_acpi_attach), + + /* ACPI interface */ + DEVMETHOD(acpi_bus_map_intr, nexus_acpi_map_intr), + + DEVMETHOD_END, +}; + +#define nexus_baseclasses nexus_acpi_baseclasses +DEFINE_CLASS_1(nexus, nexus_acpi_driver, nexus_acpi_methods, 1, + nexus_driver); +#undef nexus_baseclasses +static devclass_t nexus_acpi_devclass; + +EARLY_DRIVER_MODULE(nexus_acpi, root, nexus_acpi_driver, nexus_acpi_devclass, + 0, 0, BUS_PASS_BUS + BUS_PASS_ORDER_FIRST); + +static int +nexus_acpi_probe(device_t dev) +{ + + if (arm64_bus_method != ARM64_BUS_ACPI || acpi_identify() != 0) + return (ENXIO); + + device_quiet(dev); + return (BUS_PROBE_LOW_PRIORITY); +} + +static int +nexus_acpi_attach(device_t dev) +{ + + nexus_add_child(dev, 10, "acpi", 0); + return (nexus_attach(dev)); +} + +static int +nexus_acpi_map_intr(device_t dev, device_t child, u_int irq, int trig, int pol) +{ + struct intr_map_data_acpi *acpi_data; + size_t len; + + len = sizeof(*acpi_data); + acpi_data = (struct intr_map_data_acpi *)intr_alloc_map_data( + INTR_MAP_DATA_ACPI, len, M_WAITOK | M_ZERO); + acpi_data->irq = irq; + acpi_data->pol = pol; + acpi_data->trig = trig; + + /* + * TODO: This will only handle a single interrupt controller. + * ACPI will map multiple controllers into a single virtual IRQ + * space. Each controller has a System Vector Base to hold the + * first irq it handles in this space. As such the correct way + * to handle interrupts with ACPI is to search through the + * controllers for the largest base value that is no larger than + * the IRQ value. + */ + irq = intr_map_irq(NULL, ACPI_INTR_XREF, + (struct intr_map_data *)acpi_data); + return (irq); +} +#endif diff --git a/sys/arm64/arm64/ofw_machdep.c b/sys/arm64/arm64/ofw_machdep.c new file mode 100644 index 000000000000..3941c1d35617 --- /dev/null +++ b/sys/arm64/arm64/ofw_machdep.c @@ -0,0 +1,58 @@ +/*- + * Copyright (c) 2015 Ian Lepore <ian@freebsd.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/bus.h> + +#include <machine/bus.h> + +#include <dev/ofw/openfirm.h> +#include <dev/ofw/ofw_subr.h> + +extern struct bus_space memmap_bus; + +int +OF_decode_addr(phandle_t dev, int regno, bus_space_tag_t *tag, + bus_space_handle_t *handle, bus_size_t *sz) +{ + bus_addr_t addr; + bus_size_t size; + int err; + + err = ofw_reg_to_paddr(dev, regno, &addr, &size, NULL); + if (err != 0) + return (err); + + *tag = &memmap_bus; + + if (sz != NULL) + *sz = size; + + return (bus_space_map(*tag, addr, size, 0, handle)); +} diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c new file mode 100644 index 000000000000..df160cc05012 --- /dev/null +++ b/sys/arm64/arm64/pmap.c @@ -0,0 +1,6710 @@ +/*- + * Copyright (c) 1991 Regents of the University of California. + * All rights reserved. + * Copyright (c) 1994 John S. Dyson + * All rights reserved. + * Copyright (c) 1994 David Greenman + * All rights reserved. + * Copyright (c) 2003 Peter Wemm + * All rights reserved. + * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu> + * All rights reserved. + * Copyright (c) 2014 Andrew Turner + * All rights reserved. + * Copyright (c) 2014-2016 The FreeBSD Foundation + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * the Systems Programming Group of the University of Utah Computer + * Science Department and William Jolitz of UUNET Technologies Inc. + * + * This software was developed by Andrew Turner under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 + */ +/*- + * Copyright (c) 2003 Networks Associates Technology, Inc. + * All rights reserved. + * + * This software was developed for the FreeBSD Project by Jake Burkholder, + * Safeport Network Services, and Network Associates Laboratories, the + * Security Research Division of Network Associates, Inc. under + * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA + * CHATS research program. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +/* + * Manages physical address maps. + * + * Since the information managed by this module is + * also stored by the logical address mapping module, + * this module may throw away valid virtual-to-physical + * mappings at almost any time. However, invalidations + * of virtual-to-physical mappings must be done as + * requested. + * + * In order to cope with hardware architectures which + * make virtual-to-physical map invalidates expensive, + * this module may delay invalidate or reduced protection + * operations until such time as they are actually + * necessary. This module is given full information as + * to which processors are currently using which maps, + * and to when physical maps must be made correct. + */ + +#include "opt_vm.h" + +#include <sys/param.h> +#include <sys/bitstring.h> +#include <sys/bus.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/ktr.h> +#include <sys/limits.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/mman.h> +#include <sys/msgbuf.h> +#include <sys/mutex.h> +#include <sys/physmem.h> +#include <sys/proc.h> +#include <sys/rwlock.h> +#include <sys/sbuf.h> +#include <sys/sx.h> +#include <sys/vmem.h> +#include <sys/vmmeter.h> +#include <sys/sched.h> +#include <sys/sysctl.h> +#include <sys/_unrhdr.h> +#include <sys/smp.h> + +#include <vm/vm.h> +#include <vm/vm_param.h> +#include <vm/vm_kern.h> +#include <vm/vm_page.h> +#include <vm/vm_map.h> +#include <vm/vm_object.h> +#include <vm/vm_extern.h> +#include <vm/vm_pageout.h> +#include <vm/vm_pager.h> +#include <vm/vm_phys.h> +#include <vm/vm_radix.h> +#include <vm/vm_reserv.h> +#include <vm/uma.h> + +#include <machine/machdep.h> +#include <machine/md_var.h> +#include <machine/pcb.h> + +#define PMAP_ASSERT_STAGE1(pmap) MPASS((pmap)->pm_stage == PM_STAGE1) +#define PMAP_ASSERT_STAGE2(pmap) MPASS((pmap)->pm_stage == PM_STAGE2) + +#define NL0PG (PAGE_SIZE/(sizeof (pd_entry_t))) +#define NL1PG (PAGE_SIZE/(sizeof (pd_entry_t))) +#define NL2PG (PAGE_SIZE/(sizeof (pd_entry_t))) +#define NL3PG (PAGE_SIZE/(sizeof (pt_entry_t))) + +#define NUL0E L0_ENTRIES +#define NUL1E (NUL0E * NL1PG) +#define NUL2E (NUL1E * NL2PG) + +#if !defined(DIAGNOSTIC) +#ifdef __GNUC_GNU_INLINE__ +#define PMAP_INLINE __attribute__((__gnu_inline__)) inline +#else +#define PMAP_INLINE extern inline +#endif +#else +#define PMAP_INLINE +#endif + +#ifdef PV_STATS +#define PV_STAT(x) do { x ; } while (0) +#else +#define PV_STAT(x) do { } while (0) +#endif + +#define pmap_l2_pindex(v) ((v) >> L2_SHIFT) +#define pa_to_pvh(pa) (&pv_table[pmap_l2_pindex(pa)]) + +#define NPV_LIST_LOCKS MAXCPU + +#define PHYS_TO_PV_LIST_LOCK(pa) \ + (&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS]) + +#define CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa) do { \ + struct rwlock **_lockp = (lockp); \ + struct rwlock *_new_lock; \ + \ + _new_lock = PHYS_TO_PV_LIST_LOCK(pa); \ + if (_new_lock != *_lockp) { \ + if (*_lockp != NULL) \ + rw_wunlock(*_lockp); \ + *_lockp = _new_lock; \ + rw_wlock(*_lockp); \ + } \ +} while (0) + +#define CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m) \ + CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m)) + +#define RELEASE_PV_LIST_LOCK(lockp) do { \ + struct rwlock **_lockp = (lockp); \ + \ + if (*_lockp != NULL) { \ + rw_wunlock(*_lockp); \ + *_lockp = NULL; \ + } \ +} while (0) + +#define VM_PAGE_TO_PV_LIST_LOCK(m) \ + PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m)) + +/* + * The presence of this flag indicates that the mapping is writeable. + * If the ATTR_S1_AP_RO bit is also set, then the mapping is clean, otherwise + * it is dirty. This flag may only be set on managed mappings. + * + * The DBM bit is reserved on ARMv8.0 but it seems we can safely treat it + * as a software managed bit. + */ +#define ATTR_SW_DBM ATTR_DBM + +struct pmap kernel_pmap_store; + +/* Used for mapping ACPI memory before VM is initialized */ +#define PMAP_PREINIT_MAPPING_COUNT 32 +#define PMAP_PREINIT_MAPPING_SIZE (PMAP_PREINIT_MAPPING_COUNT * L2_SIZE) +static vm_offset_t preinit_map_va; /* Start VA of pre-init mapping space */ +static int vm_initialized = 0; /* No need to use pre-init maps when set */ + +/* + * Reserve a few L2 blocks starting from 'preinit_map_va' pointer. + * Always map entire L2 block for simplicity. + * VA of L2 block = preinit_map_va + i * L2_SIZE + */ +static struct pmap_preinit_mapping { + vm_paddr_t pa; + vm_offset_t va; + vm_size_t size; +} pmap_preinit_mapping[PMAP_PREINIT_MAPPING_COUNT]; + +vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ +vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ +vm_offset_t kernel_vm_end = 0; + +/* + * Data for the pv entry allocation mechanism. + */ +static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); +static struct mtx pv_chunks_mutex; +static struct rwlock pv_list_locks[NPV_LIST_LOCKS]; +static struct md_page *pv_table; +static struct md_page pv_dummy; + +vm_paddr_t dmap_phys_base; /* The start of the dmap region */ +vm_paddr_t dmap_phys_max; /* The limit of the dmap region */ +vm_offset_t dmap_max_addr; /* The virtual address limit of the dmap */ + +/* This code assumes all L1 DMAP entries will be used */ +CTASSERT((DMAP_MIN_ADDRESS & ~L0_OFFSET) == DMAP_MIN_ADDRESS); +CTASSERT((DMAP_MAX_ADDRESS & ~L0_OFFSET) == DMAP_MAX_ADDRESS); + +#define DMAP_TABLES ((DMAP_MAX_ADDRESS - DMAP_MIN_ADDRESS) >> L0_SHIFT) +extern pt_entry_t pagetable_dmap[]; + +#define PHYSMAP_SIZE (2 * (VM_PHYSSEG_MAX - 1)) +static vm_paddr_t physmap[PHYSMAP_SIZE]; +static u_int physmap_idx; + +static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, + "VM/pmap parameters"); + +/* + * This ASID allocator uses a bit vector ("asid_set") to remember which ASIDs + * that it has currently allocated to a pmap, a cursor ("asid_next") to + * optimize its search for a free ASID in the bit vector, and an epoch number + * ("asid_epoch") to indicate when it has reclaimed all previously allocated + * ASIDs that are not currently active on a processor. + * + * The current epoch number is always in the range [0, INT_MAX). Negative + * numbers and INT_MAX are reserved for special cases that are described + * below. + */ +struct asid_set { + int asid_bits; + bitstr_t *asid_set; + int asid_set_size; + int asid_next; + int asid_epoch; + struct mtx asid_set_mutex; +}; + +static struct asid_set asids; +static struct asid_set vmids; + +static SYSCTL_NODE(_vm_pmap, OID_AUTO, asid, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, + "ASID allocator"); +SYSCTL_INT(_vm_pmap_asid, OID_AUTO, bits, CTLFLAG_RD, &asids.asid_bits, 0, + "The number of bits in an ASID"); +SYSCTL_INT(_vm_pmap_asid, OID_AUTO, next, CTLFLAG_RD, &asids.asid_next, 0, + "The last allocated ASID plus one"); +SYSCTL_INT(_vm_pmap_asid, OID_AUTO, epoch, CTLFLAG_RD, &asids.asid_epoch, 0, + "The current epoch number"); + +static SYSCTL_NODE(_vm_pmap, OID_AUTO, vmid, CTLFLAG_RD, 0, "VMID allocator"); +SYSCTL_INT(_vm_pmap_vmid, OID_AUTO, bits, CTLFLAG_RD, &vmids.asid_bits, 0, + "The number of bits in an VMID"); +SYSCTL_INT(_vm_pmap_vmid, OID_AUTO, next, CTLFLAG_RD, &vmids.asid_next, 0, + "The last allocated VMID plus one"); +SYSCTL_INT(_vm_pmap_vmid, OID_AUTO, epoch, CTLFLAG_RD, &vmids.asid_epoch, 0, + "The current epoch number"); + +void (*pmap_clean_stage2_tlbi)(void); +void (*pmap_invalidate_vpipt_icache)(void); + +/* + * A pmap's cookie encodes an ASID and epoch number. Cookies for reserved + * ASIDs have a negative epoch number, specifically, INT_MIN. Cookies for + * dynamically allocated ASIDs have a non-negative epoch number. + * + * An invalid ASID is represented by -1. + * + * There are two special-case cookie values: (1) COOKIE_FROM(-1, INT_MIN), + * which indicates that an ASID should never be allocated to the pmap, and + * (2) COOKIE_FROM(-1, INT_MAX), which indicates that an ASID should be + * allocated when the pmap is next activated. + */ +#define COOKIE_FROM(asid, epoch) ((long)((u_int)(asid) | \ + ((u_long)(epoch) << 32))) +#define COOKIE_TO_ASID(cookie) ((int)(cookie)) +#define COOKIE_TO_EPOCH(cookie) ((int)((u_long)(cookie) >> 32)) + +static int superpages_enabled = 1; +SYSCTL_INT(_vm_pmap, OID_AUTO, superpages_enabled, + CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &superpages_enabled, 0, + "Are large page mappings enabled?"); + +/* + * Internal flags for pmap_enter()'s helper functions. + */ +#define PMAP_ENTER_NORECLAIM 0x1000000 /* Don't reclaim PV entries. */ +#define PMAP_ENTER_NOREPLACE 0x2000000 /* Don't replace mappings. */ + +static void free_pv_chunk(struct pv_chunk *pc); +static void free_pv_entry(pmap_t pmap, pv_entry_t pv); +static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp); +static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp); +static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); +static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, + vm_offset_t va); + +static void pmap_abort_ptp(pmap_t pmap, vm_offset_t va, vm_page_t mpte); +static bool pmap_activate_int(pmap_t pmap); +static void pmap_alloc_asid(pmap_t pmap); +static int pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode); +static pt_entry_t *pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va); +static pt_entry_t *pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, + vm_offset_t va, struct rwlock **lockp); +static pt_entry_t *pmap_demote_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va); +static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, + vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp); +static int pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, + u_int flags, vm_page_t m, struct rwlock **lockp); +static int pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva, + pd_entry_t l1e, struct spglist *free, struct rwlock **lockp); +static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva, + pd_entry_t l2e, struct spglist *free, struct rwlock **lockp); +static void pmap_reset_asid_set(pmap_t pmap); +static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, + vm_page_t m, struct rwlock **lockp); + +static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, + struct rwlock **lockp); + +static void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, + struct spglist *free); +static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t, struct spglist *); +static __inline vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va); + +/* + * These load the old table data and store the new value. + * They need to be atomic as the System MMU may write to the table at + * the same time as the CPU. + */ +#define pmap_clear(table) atomic_store_64(table, 0) +#define pmap_clear_bits(table, bits) atomic_clear_64(table, bits) +#define pmap_load(table) (*table) +#define pmap_load_clear(table) atomic_swap_64(table, 0) +#define pmap_load_store(table, entry) atomic_swap_64(table, entry) +#define pmap_set_bits(table, bits) atomic_set_64(table, bits) +#define pmap_store(table, entry) atomic_store_64(table, entry) + +/********************/ +/* Inline functions */ +/********************/ + +static __inline void +pagecopy(void *s, void *d) +{ + + memcpy(d, s, PAGE_SIZE); +} + +static __inline pd_entry_t * +pmap_l0(pmap_t pmap, vm_offset_t va) +{ + + return (&pmap->pm_l0[pmap_l0_index(va)]); +} + +static __inline pd_entry_t * +pmap_l0_to_l1(pd_entry_t *l0, vm_offset_t va) +{ + pd_entry_t *l1; + + l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK); + return (&l1[pmap_l1_index(va)]); +} + +static __inline pd_entry_t * +pmap_l1(pmap_t pmap, vm_offset_t va) +{ + pd_entry_t *l0; + + l0 = pmap_l0(pmap, va); + if ((pmap_load(l0) & ATTR_DESCR_MASK) != L0_TABLE) + return (NULL); + + return (pmap_l0_to_l1(l0, va)); +} + +static __inline pd_entry_t * +pmap_l1_to_l2(pd_entry_t *l1, vm_offset_t va) +{ + pd_entry_t *l2; + + l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK); + return (&l2[pmap_l2_index(va)]); +} + +static __inline pd_entry_t * +pmap_l2(pmap_t pmap, vm_offset_t va) +{ + pd_entry_t *l1; + + l1 = pmap_l1(pmap, va); + if ((pmap_load(l1) & ATTR_DESCR_MASK) != L1_TABLE) + return (NULL); + + return (pmap_l1_to_l2(l1, va)); +} + +static __inline pt_entry_t * +pmap_l2_to_l3(pd_entry_t *l2, vm_offset_t va) +{ + pt_entry_t *l3; + + l3 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l2) & ~ATTR_MASK); + return (&l3[pmap_l3_index(va)]); +} + +/* + * Returns the lowest valid pde for a given virtual address. + * The next level may or may not point to a valid page or block. + */ +static __inline pd_entry_t * +pmap_pde(pmap_t pmap, vm_offset_t va, int *level) +{ + pd_entry_t *l0, *l1, *l2, desc; + + l0 = pmap_l0(pmap, va); + desc = pmap_load(l0) & ATTR_DESCR_MASK; + if (desc != L0_TABLE) { + *level = -1; + return (NULL); + } + + l1 = pmap_l0_to_l1(l0, va); + desc = pmap_load(l1) & ATTR_DESCR_MASK; + if (desc != L1_TABLE) { + *level = 0; + return (l0); + } + + l2 = pmap_l1_to_l2(l1, va); + desc = pmap_load(l2) & ATTR_DESCR_MASK; + if (desc != L2_TABLE) { + *level = 1; + return (l1); + } + + *level = 2; + return (l2); +} + +/* + * Returns the lowest valid pte block or table entry for a given virtual + * address. If there are no valid entries return NULL and set the level to + * the first invalid level. + */ +static __inline pt_entry_t * +pmap_pte(pmap_t pmap, vm_offset_t va, int *level) +{ + pd_entry_t *l1, *l2, desc; + pt_entry_t *l3; + + l1 = pmap_l1(pmap, va); + if (l1 == NULL) { + *level = 0; + return (NULL); + } + desc = pmap_load(l1) & ATTR_DESCR_MASK; + if (desc == L1_BLOCK) { + *level = 1; + return (l1); + } + + if (desc != L1_TABLE) { + *level = 1; + return (NULL); + } + + l2 = pmap_l1_to_l2(l1, va); + desc = pmap_load(l2) & ATTR_DESCR_MASK; + if (desc == L2_BLOCK) { + *level = 2; + return (l2); + } + + if (desc != L2_TABLE) { + *level = 2; + return (NULL); + } + + *level = 3; + l3 = pmap_l2_to_l3(l2, va); + if ((pmap_load(l3) & ATTR_DESCR_MASK) != L3_PAGE) + return (NULL); + + return (l3); +} + +bool +pmap_ps_enabled(pmap_t pmap __unused) +{ + + return (superpages_enabled != 0); +} + +bool +pmap_get_tables(pmap_t pmap, vm_offset_t va, pd_entry_t **l0, pd_entry_t **l1, + pd_entry_t **l2, pt_entry_t **l3) +{ + pd_entry_t *l0p, *l1p, *l2p; + + if (pmap->pm_l0 == NULL) + return (false); + + l0p = pmap_l0(pmap, va); + *l0 = l0p; + + if ((pmap_load(l0p) & ATTR_DESCR_MASK) != L0_TABLE) + return (false); + + l1p = pmap_l0_to_l1(l0p, va); + *l1 = l1p; + + if ((pmap_load(l1p) & ATTR_DESCR_MASK) == L1_BLOCK) { + *l2 = NULL; + *l3 = NULL; + return (true); + } + + if ((pmap_load(l1p) & ATTR_DESCR_MASK) != L1_TABLE) + return (false); + + l2p = pmap_l1_to_l2(l1p, va); + *l2 = l2p; + + if ((pmap_load(l2p) & ATTR_DESCR_MASK) == L2_BLOCK) { + *l3 = NULL; + return (true); + } + + if ((pmap_load(l2p) & ATTR_DESCR_MASK) != L2_TABLE) + return (false); + + *l3 = pmap_l2_to_l3(l2p, va); + + return (true); +} + +static __inline int +pmap_l3_valid(pt_entry_t l3) +{ + + return ((l3 & ATTR_DESCR_MASK) == L3_PAGE); +} + +CTASSERT(L1_BLOCK == L2_BLOCK); + +static pt_entry_t +pmap_pte_memattr(pmap_t pmap, vm_memattr_t memattr) +{ + pt_entry_t val; + + if (pmap->pm_stage == PM_STAGE1) { + val = ATTR_S1_IDX(memattr); + if (memattr == VM_MEMATTR_DEVICE) + val |= ATTR_S1_XN; + return (val); + } + + val = 0; + + switch (memattr) { + case VM_MEMATTR_DEVICE: + return (ATTR_S2_MEMATTR(ATTR_S2_MEMATTR_DEVICE_nGnRnE) | + ATTR_S2_XN(ATTR_S2_XN_ALL)); + case VM_MEMATTR_UNCACHEABLE: + return (ATTR_S2_MEMATTR(ATTR_S2_MEMATTR_NC)); + case VM_MEMATTR_WRITE_BACK: + return (ATTR_S2_MEMATTR(ATTR_S2_MEMATTR_WB)); + case VM_MEMATTR_WRITE_THROUGH: + return (ATTR_S2_MEMATTR(ATTR_S2_MEMATTR_WT)); + default: + panic("%s: invalid memory attribute %x", __func__, memattr); + } +} + +static pt_entry_t +pmap_pte_prot(pmap_t pmap, vm_prot_t prot) +{ + pt_entry_t val; + + val = 0; + if (pmap->pm_stage == PM_STAGE1) { + if ((prot & VM_PROT_EXECUTE) == 0) + val |= ATTR_S1_XN; + if ((prot & VM_PROT_WRITE) == 0) + val |= ATTR_S1_AP(ATTR_S1_AP_RO); + } else { + if ((prot & VM_PROT_WRITE) != 0) + val |= ATTR_S2_S2AP(ATTR_S2_S2AP_WRITE); + if ((prot & VM_PROT_READ) != 0) + val |= ATTR_S2_S2AP(ATTR_S2_S2AP_READ); + if ((prot & VM_PROT_EXECUTE) == 0) + val |= ATTR_S2_XN(ATTR_S2_XN_ALL); + } + + return (val); +} + +/* + * Checks if the PTE is dirty. + */ +static inline int +pmap_pte_dirty(pmap_t pmap, pt_entry_t pte) +{ + + KASSERT((pte & ATTR_SW_MANAGED) != 0, ("pte %#lx is unmanaged", pte)); + + if (pmap->pm_stage == PM_STAGE1) { + KASSERT((pte & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) != 0, + ("pte %#lx is writeable and missing ATTR_SW_DBM", pte)); + + return ((pte & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) == + (ATTR_S1_AP(ATTR_S1_AP_RW) | ATTR_SW_DBM)); + } + + return ((pte & ATTR_S2_S2AP(ATTR_S2_S2AP_WRITE)) == + ATTR_S2_S2AP(ATTR_S2_S2AP_WRITE)); +} + +static __inline void +pmap_resident_count_inc(pmap_t pmap, int count) +{ + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + pmap->pm_stats.resident_count += count; +} + +static __inline void +pmap_resident_count_dec(pmap_t pmap, int count) +{ + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + KASSERT(pmap->pm_stats.resident_count >= count, + ("pmap %p resident count underflow %ld %d", pmap, + pmap->pm_stats.resident_count, count)); + pmap->pm_stats.resident_count -= count; +} + +static pt_entry_t * +pmap_early_page_idx(vm_offset_t l1pt, vm_offset_t va, u_int *l1_slot, + u_int *l2_slot) +{ + pt_entry_t *l2; + pd_entry_t *l1; + + l1 = (pd_entry_t *)l1pt; + *l1_slot = (va >> L1_SHIFT) & Ln_ADDR_MASK; + + /* Check locore has used a table L1 map */ + KASSERT((l1[*l1_slot] & ATTR_DESCR_MASK) == L1_TABLE, + ("Invalid bootstrap L1 table")); + /* Find the address of the L2 table */ + l2 = (pt_entry_t *)init_pt_va; + *l2_slot = pmap_l2_index(va); + + return (l2); +} + +static vm_paddr_t +pmap_early_vtophys(vm_offset_t l1pt, vm_offset_t va) +{ + u_int l1_slot, l2_slot; + pt_entry_t *l2; + + l2 = pmap_early_page_idx(l1pt, va, &l1_slot, &l2_slot); + + return ((l2[l2_slot] & ~ATTR_MASK) + (va & L2_OFFSET)); +} + +static vm_offset_t +pmap_bootstrap_dmap(vm_offset_t kern_l1, vm_paddr_t min_pa, + vm_offset_t freemempos) +{ + pt_entry_t *l2; + vm_offset_t va; + vm_paddr_t l2_pa, pa; + u_int l1_slot, l2_slot, prev_l1_slot; + int i; + + dmap_phys_base = min_pa & ~L1_OFFSET; + dmap_phys_max = 0; + dmap_max_addr = 0; + l2 = NULL; + prev_l1_slot = -1; + +#define DMAP_TABLES ((DMAP_MAX_ADDRESS - DMAP_MIN_ADDRESS) >> L0_SHIFT) + memset(pagetable_dmap, 0, PAGE_SIZE * DMAP_TABLES); + + for (i = 0; i < (physmap_idx * 2); i += 2) { + pa = physmap[i] & ~L2_OFFSET; + va = pa - dmap_phys_base + DMAP_MIN_ADDRESS; + + /* Create L2 mappings at the start of the region */ + if ((pa & L1_OFFSET) != 0) { + l1_slot = ((va - DMAP_MIN_ADDRESS) >> L1_SHIFT); + if (l1_slot != prev_l1_slot) { + prev_l1_slot = l1_slot; + l2 = (pt_entry_t *)freemempos; + l2_pa = pmap_early_vtophys(kern_l1, + (vm_offset_t)l2); + freemempos += PAGE_SIZE; + + pmap_store(&pagetable_dmap[l1_slot], + (l2_pa & ~Ln_TABLE_MASK) | L1_TABLE); + + memset(l2, 0, PAGE_SIZE); + } + KASSERT(l2 != NULL, + ("pmap_bootstrap_dmap: NULL l2 map")); + for (; va < DMAP_MAX_ADDRESS && pa < physmap[i + 1]; + pa += L2_SIZE, va += L2_SIZE) { + /* + * We are on a boundary, stop to + * create a level 1 block + */ + if ((pa & L1_OFFSET) == 0) + break; + + l2_slot = pmap_l2_index(va); + KASSERT(l2_slot != 0, ("...")); + pmap_store(&l2[l2_slot], + (pa & ~L2_OFFSET) | ATTR_DEFAULT | + ATTR_S1_XN | + ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK) | + L2_BLOCK); + } + KASSERT(va == (pa - dmap_phys_base + DMAP_MIN_ADDRESS), + ("...")); + } + + for (; va < DMAP_MAX_ADDRESS && pa < physmap[i + 1] && + (physmap[i + 1] - pa) >= L1_SIZE; + pa += L1_SIZE, va += L1_SIZE) { + l1_slot = ((va - DMAP_MIN_ADDRESS) >> L1_SHIFT); + pmap_store(&pagetable_dmap[l1_slot], + (pa & ~L1_OFFSET) | ATTR_DEFAULT | ATTR_S1_XN | + ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK) | L1_BLOCK); + } + + /* Create L2 mappings at the end of the region */ + if (pa < physmap[i + 1]) { + l1_slot = ((va - DMAP_MIN_ADDRESS) >> L1_SHIFT); + if (l1_slot != prev_l1_slot) { + prev_l1_slot = l1_slot; + l2 = (pt_entry_t *)freemempos; + l2_pa = pmap_early_vtophys(kern_l1, + (vm_offset_t)l2); + freemempos += PAGE_SIZE; + + pmap_store(&pagetable_dmap[l1_slot], + (l2_pa & ~Ln_TABLE_MASK) | L1_TABLE); + + memset(l2, 0, PAGE_SIZE); + } + KASSERT(l2 != NULL, + ("pmap_bootstrap_dmap: NULL l2 map")); + for (; va < DMAP_MAX_ADDRESS && pa < physmap[i + 1]; + pa += L2_SIZE, va += L2_SIZE) { + l2_slot = pmap_l2_index(va); + pmap_store(&l2[l2_slot], + (pa & ~L2_OFFSET) | ATTR_DEFAULT | + ATTR_S1_XN | + ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK) | + L2_BLOCK); + } + } + + if (pa > dmap_phys_max) { + dmap_phys_max = pa; + dmap_max_addr = va; + } + } + + cpu_tlb_flushID(); + + return (freemempos); +} + +static vm_offset_t +pmap_bootstrap_l2(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l2_start) +{ + vm_offset_t l2pt; + vm_paddr_t pa; + pd_entry_t *l1; + u_int l1_slot; + + KASSERT((va & L1_OFFSET) == 0, ("Invalid virtual address")); + + l1 = (pd_entry_t *)l1pt; + l1_slot = pmap_l1_index(va); + l2pt = l2_start; + + for (; va < VM_MAX_KERNEL_ADDRESS; l1_slot++, va += L1_SIZE) { + KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index")); + + pa = pmap_early_vtophys(l1pt, l2pt); + pmap_store(&l1[l1_slot], + (pa & ~Ln_TABLE_MASK) | L1_TABLE); + l2pt += PAGE_SIZE; + } + + /* Clean the L2 page table */ + memset((void *)l2_start, 0, l2pt - l2_start); + + return l2pt; +} + +static vm_offset_t +pmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start) +{ + vm_offset_t l3pt; + vm_paddr_t pa; + pd_entry_t *l2; + u_int l2_slot; + + KASSERT((va & L2_OFFSET) == 0, ("Invalid virtual address")); + + l2 = pmap_l2(kernel_pmap, va); + l2 = (pd_entry_t *)rounddown2((uintptr_t)l2, PAGE_SIZE); + l2_slot = pmap_l2_index(va); + l3pt = l3_start; + + for (; va < VM_MAX_KERNEL_ADDRESS; l2_slot++, va += L2_SIZE) { + KASSERT(l2_slot < Ln_ENTRIES, ("Invalid L2 index")); + + pa = pmap_early_vtophys(l1pt, l3pt); + pmap_store(&l2[l2_slot], + (pa & ~Ln_TABLE_MASK) | ATTR_S1_UXN | L2_TABLE); + l3pt += PAGE_SIZE; + } + + /* Clean the L2 page table */ + memset((void *)l3_start, 0, l3pt - l3_start); + + return l3pt; +} + +/* + * Bootstrap the system enough to run with virtual memory. + */ +void +pmap_bootstrap(vm_offset_t l0pt, vm_offset_t l1pt, vm_paddr_t kernstart, + vm_size_t kernlen) +{ + vm_offset_t freemempos; + vm_offset_t dpcpu, msgbufpv; + vm_paddr_t start_pa, pa, min_pa; + uint64_t kern_delta; + int i; + + /* Verify that the ASID is set through TTBR0. */ + KASSERT((READ_SPECIALREG(tcr_el1) & TCR_A1) == 0, + ("pmap_bootstrap: TCR_EL1.A1 != 0")); + + kern_delta = KERNBASE - kernstart; + + printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen); + printf("%lx\n", l1pt); + printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK); + + /* Set this early so we can use the pagetable walking functions */ + kernel_pmap_store.pm_l0 = (pd_entry_t *)l0pt; + PMAP_LOCK_INIT(kernel_pmap); + kernel_pmap->pm_l0_paddr = l0pt - kern_delta; + kernel_pmap->pm_cookie = COOKIE_FROM(-1, INT_MIN); + kernel_pmap->pm_stage = PM_STAGE1; + kernel_pmap->pm_asid_set = &asids; + + /* Assume the address we were loaded to is a valid physical address */ + min_pa = KERNBASE - kern_delta; + + physmap_idx = physmem_avail(physmap, nitems(physmap)); + physmap_idx /= 2; + + /* + * Find the minimum physical address. physmap is sorted, + * but may contain empty ranges. + */ + for (i = 0; i < physmap_idx * 2; i += 2) { + if (physmap[i] == physmap[i + 1]) + continue; + if (physmap[i] <= min_pa) + min_pa = physmap[i]; + } + + freemempos = KERNBASE + kernlen; + freemempos = roundup2(freemempos, PAGE_SIZE); + + /* Create a direct map region early so we can use it for pa -> va */ + freemempos = pmap_bootstrap_dmap(l1pt, min_pa, freemempos); + + start_pa = pa = KERNBASE - kern_delta; + + /* + * Create the l2 tables up to VM_MAX_KERNEL_ADDRESS. We assume that the + * loader allocated the first and only l2 page table page used to map + * the kernel, preloaded files and module metadata. + */ + freemempos = pmap_bootstrap_l2(l1pt, KERNBASE + L1_SIZE, freemempos); + /* And the l3 tables for the early devmap */ + freemempos = pmap_bootstrap_l3(l1pt, + VM_MAX_KERNEL_ADDRESS - (PMAP_MAPDEV_EARLY_SIZE), freemempos); + + cpu_tlb_flushID(); + +#define alloc_pages(var, np) \ + (var) = freemempos; \ + freemempos += (np * PAGE_SIZE); \ + memset((char *)(var), 0, ((np) * PAGE_SIZE)); + + /* Allocate dynamic per-cpu area. */ + alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); + dpcpu_init((void *)dpcpu, 0); + + /* Allocate memory for the msgbuf, e.g. for /sbin/dmesg */ + alloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE); + msgbufp = (void *)msgbufpv; + + /* Reserve some VA space for early BIOS/ACPI mapping */ + preinit_map_va = roundup2(freemempos, L2_SIZE); + + virtual_avail = preinit_map_va + PMAP_PREINIT_MAPPING_SIZE; + virtual_avail = roundup2(virtual_avail, L1_SIZE); + virtual_end = VM_MAX_KERNEL_ADDRESS - (PMAP_MAPDEV_EARLY_SIZE); + kernel_vm_end = virtual_avail; + + pa = pmap_early_vtophys(l1pt, freemempos); + + physmem_exclude_region(start_pa, pa - start_pa, EXFLAG_NOALLOC); + + cpu_tlb_flushID(); +} + +/* + * Initialize a vm_page's machine-dependent fields. + */ +void +pmap_page_init(vm_page_t m) +{ + + TAILQ_INIT(&m->md.pv_list); + m->md.pv_memattr = VM_MEMATTR_WRITE_BACK; +} + +static void +pmap_init_asids(struct asid_set *set, int bits) +{ + int i; + + set->asid_bits = bits; + + /* + * We may be too early in the overall initialization process to use + * bit_alloc(). + */ + set->asid_set_size = 1 << set->asid_bits; + set->asid_set = (bitstr_t *)kmem_malloc(bitstr_size(set->asid_set_size), + M_WAITOK | M_ZERO); + for (i = 0; i < ASID_FIRST_AVAILABLE; i++) + bit_set(set->asid_set, i); + set->asid_next = ASID_FIRST_AVAILABLE; + mtx_init(&set->asid_set_mutex, "asid set", NULL, MTX_SPIN); +} + +/* + * Initialize the pmap module. + * Called by vm_init, to initialize any structures that the pmap + * system needs to map virtual memory. + */ +void +pmap_init(void) +{ + vm_size_t s; + uint64_t mmfr1; + int i, pv_npg, vmid_bits; + + /* + * Are large page mappings enabled? + */ + TUNABLE_INT_FETCH("vm.pmap.superpages_enabled", &superpages_enabled); + if (superpages_enabled) { + KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0, + ("pmap_init: can't assign to pagesizes[1]")); + pagesizes[1] = L2_SIZE; + } + + /* + * Initialize the ASID allocator. + */ + pmap_init_asids(&asids, + (READ_SPECIALREG(tcr_el1) & TCR_ASID_16) != 0 ? 16 : 8); + + if (has_hyp()) { + mmfr1 = READ_SPECIALREG(id_aa64mmfr1_el1); + vmid_bits = 8; + + if (ID_AA64MMFR1_VMIDBits_VAL(mmfr1) == + ID_AA64MMFR1_VMIDBits_16) + vmid_bits = 16; + pmap_init_asids(&vmids, vmid_bits); + } + + /* + * Initialize the pv chunk list mutex. + */ + mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF); + + /* + * Initialize the pool of pv list locks. + */ + for (i = 0; i < NPV_LIST_LOCKS; i++) + rw_init(&pv_list_locks[i], "pmap pv list"); + + /* + * Calculate the size of the pv head table for superpages. + */ + pv_npg = howmany(vm_phys_segs[vm_phys_nsegs - 1].end, L2_SIZE); + + /* + * Allocate memory for the pv head table for superpages. + */ + s = (vm_size_t)(pv_npg * sizeof(struct md_page)); + s = round_page(s); + pv_table = (struct md_page *)kmem_malloc(s, M_WAITOK | M_ZERO); + for (i = 0; i < pv_npg; i++) + TAILQ_INIT(&pv_table[i].pv_list); + TAILQ_INIT(&pv_dummy.pv_list); + + vm_initialized = 1; +} + +static SYSCTL_NODE(_vm_pmap, OID_AUTO, l2, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, + "2MB page mapping counters"); + +static u_long pmap_l2_demotions; +SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, demotions, CTLFLAG_RD, + &pmap_l2_demotions, 0, "2MB page demotions"); + +static u_long pmap_l2_mappings; +SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, mappings, CTLFLAG_RD, + &pmap_l2_mappings, 0, "2MB page mappings"); + +static u_long pmap_l2_p_failures; +SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, p_failures, CTLFLAG_RD, + &pmap_l2_p_failures, 0, "2MB page promotion failures"); + +static u_long pmap_l2_promotions; +SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, promotions, CTLFLAG_RD, + &pmap_l2_promotions, 0, "2MB page promotions"); + +/* + * Invalidate a single TLB entry. + */ +static __inline void +pmap_invalidate_page(pmap_t pmap, vm_offset_t va) +{ + uint64_t r; + + PMAP_ASSERT_STAGE1(pmap); + + dsb(ishst); + if (pmap == kernel_pmap) { + r = atop(va); + __asm __volatile("tlbi vaae1is, %0" : : "r" (r)); + } else { + r = ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie)) | atop(va); + __asm __volatile("tlbi vae1is, %0" : : "r" (r)); + } + dsb(ish); + isb(); +} + +static __inline void +pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) +{ + uint64_t end, r, start; + + PMAP_ASSERT_STAGE1(pmap); + + dsb(ishst); + if (pmap == kernel_pmap) { + start = atop(sva); + end = atop(eva); + for (r = start; r < end; r++) + __asm __volatile("tlbi vaae1is, %0" : : "r" (r)); + } else { + start = end = ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie)); + start |= atop(sva); + end |= atop(eva); + for (r = start; r < end; r++) + __asm __volatile("tlbi vae1is, %0" : : "r" (r)); + } + dsb(ish); + isb(); +} + +static __inline void +pmap_invalidate_all(pmap_t pmap) +{ + uint64_t r; + + PMAP_ASSERT_STAGE1(pmap); + + dsb(ishst); + if (pmap == kernel_pmap) { + __asm __volatile("tlbi vmalle1is"); + } else { + r = ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie)); + __asm __volatile("tlbi aside1is, %0" : : "r" (r)); + } + dsb(ish); + isb(); +} + +/* + * Routine: pmap_extract + * Function: + * Extract the physical page address associated + * with the given map/virtual_address pair. + */ +vm_paddr_t +pmap_extract(pmap_t pmap, vm_offset_t va) +{ + pt_entry_t *pte, tpte; + vm_paddr_t pa; + int lvl; + + pa = 0; + PMAP_LOCK(pmap); + /* + * Find the block or page map for this virtual address. pmap_pte + * will return either a valid block/page entry, or NULL. + */ + pte = pmap_pte(pmap, va, &lvl); + if (pte != NULL) { + tpte = pmap_load(pte); + pa = tpte & ~ATTR_MASK; + switch(lvl) { + case 1: + KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK, + ("pmap_extract: Invalid L1 pte found: %lx", + tpte & ATTR_DESCR_MASK)); + pa |= (va & L1_OFFSET); + break; + case 2: + KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK, + ("pmap_extract: Invalid L2 pte found: %lx", + tpte & ATTR_DESCR_MASK)); + pa |= (va & L2_OFFSET); + break; + case 3: + KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE, + ("pmap_extract: Invalid L3 pte found: %lx", + tpte & ATTR_DESCR_MASK)); + pa |= (va & L3_OFFSET); + break; + } + } + PMAP_UNLOCK(pmap); + return (pa); +} + +/* + * Routine: pmap_extract_and_hold + * Function: + * Atomically extract and hold the physical page + * with the given pmap and virtual address pair + * if that mapping permits the given protection. + */ +vm_page_t +pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) +{ + pt_entry_t *pte, tpte; + vm_offset_t off; + vm_page_t m; + int lvl; + bool use; + + m = NULL; + PMAP_LOCK(pmap); + pte = pmap_pte(pmap, va, &lvl); + if (pte != NULL) { + tpte = pmap_load(pte); + + KASSERT(lvl > 0 && lvl <= 3, + ("pmap_extract_and_hold: Invalid level %d", lvl)); + CTASSERT(L1_BLOCK == L2_BLOCK); + KASSERT((lvl == 3 && (tpte & ATTR_DESCR_MASK) == L3_PAGE) || + (lvl < 3 && (tpte & ATTR_DESCR_MASK) == L1_BLOCK), + ("pmap_extract_and_hold: Invalid pte at L%d: %lx", lvl, + tpte & ATTR_DESCR_MASK)); + + use = false; + if ((prot & VM_PROT_WRITE) == 0) + use = true; + else if (pmap->pm_stage == PM_STAGE1 && + (tpte & ATTR_S1_AP_RW_BIT) == ATTR_S1_AP(ATTR_S1_AP_RW)) + use = true; + else if (pmap->pm_stage == PM_STAGE2 && + ((tpte & ATTR_S2_S2AP(ATTR_S2_S2AP_WRITE)) == + ATTR_S2_S2AP(ATTR_S2_S2AP_WRITE))) + use = true; + + if (use) { + switch(lvl) { + case 1: + off = va & L1_OFFSET; + break; + case 2: + off = va & L2_OFFSET; + break; + case 3: + default: + off = 0; + } + m = PHYS_TO_VM_PAGE((tpte & ~ATTR_MASK) | off); + if (!vm_page_wire_mapped(m)) + m = NULL; + } + } + PMAP_UNLOCK(pmap); + return (m); +} + +vm_paddr_t +pmap_kextract(vm_offset_t va) +{ + pt_entry_t *pte, tpte; + + if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) + return (DMAP_TO_PHYS(va)); + pte = pmap_l1(kernel_pmap, va); + if (pte == NULL) + return (0); + + /* + * A concurrent pmap_update_entry() will clear the entry's valid bit + * but leave the rest of the entry unchanged. Therefore, we treat a + * non-zero entry as being valid, and we ignore the valid bit when + * determining whether the entry maps a block, page, or table. + */ + tpte = pmap_load(pte); + if (tpte == 0) + return (0); + if ((tpte & ATTR_DESCR_TYPE_MASK) == ATTR_DESCR_TYPE_BLOCK) + return ((tpte & ~ATTR_MASK) | (va & L1_OFFSET)); + pte = pmap_l1_to_l2(&tpte, va); + tpte = pmap_load(pte); + if (tpte == 0) + return (0); + if ((tpte & ATTR_DESCR_TYPE_MASK) == ATTR_DESCR_TYPE_BLOCK) + return ((tpte & ~ATTR_MASK) | (va & L2_OFFSET)); + pte = pmap_l2_to_l3(&tpte, va); + tpte = pmap_load(pte); + if (tpte == 0) + return (0); + return ((tpte & ~ATTR_MASK) | (va & L3_OFFSET)); +} + +/*************************************************** + * Low level mapping routines..... + ***************************************************/ + +void +pmap_kenter(vm_offset_t sva, vm_size_t size, vm_paddr_t pa, int mode) +{ + pd_entry_t *pde; + pt_entry_t *pte, attr; + vm_offset_t va; + int lvl; + + KASSERT((pa & L3_OFFSET) == 0, + ("pmap_kenter: Invalid physical address")); + KASSERT((sva & L3_OFFSET) == 0, + ("pmap_kenter: Invalid virtual address")); + KASSERT((size & PAGE_MASK) == 0, + ("pmap_kenter: Mapping is not page-sized")); + + attr = ATTR_DEFAULT | ATTR_S1_AP(ATTR_S1_AP_RW) | ATTR_S1_XN | + ATTR_S1_IDX(mode) | L3_PAGE; + va = sva; + while (size != 0) { + pde = pmap_pde(kernel_pmap, va, &lvl); + KASSERT(pde != NULL, + ("pmap_kenter: Invalid page entry, va: 0x%lx", va)); + KASSERT(lvl == 2, ("pmap_kenter: Invalid level %d", lvl)); + + pte = pmap_l2_to_l3(pde, va); + pmap_load_store(pte, (pa & ~L3_OFFSET) | attr); + + va += PAGE_SIZE; + pa += PAGE_SIZE; + size -= PAGE_SIZE; + } + pmap_invalidate_range(kernel_pmap, sva, va); +} + +void +pmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa) +{ + + pmap_kenter(sva, size, pa, VM_MEMATTR_DEVICE); +} + +/* + * Remove a page from the kernel pagetables. + */ +PMAP_INLINE void +pmap_kremove(vm_offset_t va) +{ + pt_entry_t *pte; + int lvl; + + pte = pmap_pte(kernel_pmap, va, &lvl); + KASSERT(pte != NULL, ("pmap_kremove: Invalid address")); + KASSERT(lvl == 3, ("pmap_kremove: Invalid pte level %d", lvl)); + + pmap_clear(pte); + pmap_invalidate_page(kernel_pmap, va); +} + +void +pmap_kremove_device(vm_offset_t sva, vm_size_t size) +{ + pt_entry_t *pte; + vm_offset_t va; + int lvl; + + KASSERT((sva & L3_OFFSET) == 0, + ("pmap_kremove_device: Invalid virtual address")); + KASSERT((size & PAGE_MASK) == 0, + ("pmap_kremove_device: Mapping is not page-sized")); + + va = sva; + while (size != 0) { + pte = pmap_pte(kernel_pmap, va, &lvl); + KASSERT(pte != NULL, ("Invalid page table, va: 0x%lx", va)); + KASSERT(lvl == 3, + ("Invalid device pagetable level: %d != 3", lvl)); + pmap_clear(pte); + + va += PAGE_SIZE; + size -= PAGE_SIZE; + } + pmap_invalidate_range(kernel_pmap, sva, va); +} + +/* + * Used to map a range of physical addresses into kernel + * virtual address space. + * + * The value passed in '*virt' is a suggested virtual address for + * the mapping. Architectures which can support a direct-mapped + * physical to virtual region can return the appropriate address + * within that region, leaving '*virt' unchanged. Other + * architectures should map the pages starting at '*virt' and + * update '*virt' with the first usable address after the mapped + * region. + */ +vm_offset_t +pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) +{ + return PHYS_TO_DMAP(start); +} + +/* + * Add a list of wired pages to the kva + * this routine is only used for temporary + * kernel mappings that do not need to have + * page modification or references recorded. + * Note that old mappings are simply written + * over. The page *must* be wired. + * Note: SMP coherent. Uses a ranged shootdown IPI. + */ +void +pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) +{ + pd_entry_t *pde; + pt_entry_t *pte, pa; + vm_offset_t va; + vm_page_t m; + int i, lvl; + + va = sva; + for (i = 0; i < count; i++) { + pde = pmap_pde(kernel_pmap, va, &lvl); + KASSERT(pde != NULL, + ("pmap_qenter: Invalid page entry, va: 0x%lx", va)); + KASSERT(lvl == 2, + ("pmap_qenter: Invalid level %d", lvl)); + + m = ma[i]; + pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | + ATTR_S1_AP(ATTR_S1_AP_RW) | ATTR_S1_XN | + ATTR_S1_IDX(m->md.pv_memattr) | L3_PAGE; + pte = pmap_l2_to_l3(pde, va); + pmap_load_store(pte, pa); + + va += L3_SIZE; + } + pmap_invalidate_range(kernel_pmap, sva, va); +} + +/* + * This routine tears out page mappings from the + * kernel -- it is meant only for temporary mappings. + */ +void +pmap_qremove(vm_offset_t sva, int count) +{ + pt_entry_t *pte; + vm_offset_t va; + int lvl; + + KASSERT(sva >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", sva)); + + va = sva; + while (count-- > 0) { + pte = pmap_pte(kernel_pmap, va, &lvl); + KASSERT(lvl == 3, + ("Invalid device pagetable level: %d != 3", lvl)); + if (pte != NULL) { + pmap_clear(pte); + } + + va += PAGE_SIZE; + } + pmap_invalidate_range(kernel_pmap, sva, va); +} + +/*************************************************** + * Page table page management routines..... + ***************************************************/ +/* + * Schedule the specified unused page table page to be freed. Specifically, + * add the page to the specified list of pages that will be released to the + * physical memory manager after the TLB has been updated. + */ +static __inline void +pmap_add_delayed_free_list(vm_page_t m, struct spglist *free, + boolean_t set_PG_ZERO) +{ + + if (set_PG_ZERO) + m->flags |= PG_ZERO; + else + m->flags &= ~PG_ZERO; + SLIST_INSERT_HEAD(free, m, plinks.s.ss); +} + +/* + * Decrements a page table page's reference count, which is used to record the + * number of valid page table entries within the page. If the reference count + * drops to zero, then the page table page is unmapped. Returns TRUE if the + * page table page was unmapped and FALSE otherwise. + */ +static inline boolean_t +pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) +{ + + --m->ref_count; + if (m->ref_count == 0) { + _pmap_unwire_l3(pmap, va, m, free); + return (TRUE); + } else + return (FALSE); +} + +static void +_pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) +{ + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + /* + * unmap the page table page + */ + if (m->pindex >= (NUL2E + NUL1E)) { + /* l1 page */ + pd_entry_t *l0; + + l0 = pmap_l0(pmap, va); + pmap_clear(l0); + } else if (m->pindex >= NUL2E) { + /* l2 page */ + pd_entry_t *l1; + + l1 = pmap_l1(pmap, va); + pmap_clear(l1); + } else { + /* l3 page */ + pd_entry_t *l2; + + l2 = pmap_l2(pmap, va); + pmap_clear(l2); + } + pmap_resident_count_dec(pmap, 1); + if (m->pindex < NUL2E) { + /* We just released an l3, unhold the matching l2 */ + pd_entry_t *l1, tl1; + vm_page_t l2pg; + + l1 = pmap_l1(pmap, va); + tl1 = pmap_load(l1); + l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK); + pmap_unwire_l3(pmap, va, l2pg, free); + } else if (m->pindex < (NUL2E + NUL1E)) { + /* We just released an l2, unhold the matching l1 */ + pd_entry_t *l0, tl0; + vm_page_t l1pg; + + l0 = pmap_l0(pmap, va); + tl0 = pmap_load(l0); + l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK); + pmap_unwire_l3(pmap, va, l1pg, free); + } + pmap_invalidate_page(pmap, va); + + /* + * Put page on a list so that it is released after + * *ALL* TLB shootdown is done + */ + pmap_add_delayed_free_list(m, free, TRUE); +} + +/* + * After removing a page table entry, this routine is used to + * conditionally free the page, and manage the reference count. + */ +static int +pmap_unuse_pt(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde, + struct spglist *free) +{ + vm_page_t mpte; + + if (va >= VM_MAXUSER_ADDRESS) + return (0); + KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0")); + mpte = PHYS_TO_VM_PAGE(ptepde & ~ATTR_MASK); + return (pmap_unwire_l3(pmap, va, mpte, free)); +} + +/* + * Release a page table page reference after a failed attempt to create a + * mapping. + */ +static void +pmap_abort_ptp(pmap_t pmap, vm_offset_t va, vm_page_t mpte) +{ + struct spglist free; + + SLIST_INIT(&free); + if (pmap_unwire_l3(pmap, va, mpte, &free)) { + /* + * Although "va" was never mapped, the TLB could nonetheless + * have intermediate entries that refer to the freed page + * table pages. Invalidate those entries. + * + * XXX redundant invalidation (See _pmap_unwire_l3().) + */ + pmap_invalidate_page(pmap, va); + vm_page_free_pages_toq(&free, true); + } +} + +void +pmap_pinit0(pmap_t pmap) +{ + + PMAP_LOCK_INIT(pmap); + bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); + pmap->pm_l0_paddr = READ_SPECIALREG(ttbr0_el1); + pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(pmap->pm_l0_paddr); + pmap->pm_root.rt_root = 0; + pmap->pm_cookie = COOKIE_FROM(ASID_RESERVED_FOR_PID_0, INT_MIN); + pmap->pm_stage = PM_STAGE1; + pmap->pm_asid_set = &asids; + + PCPU_SET(curpmap, pmap); +} + +int +pmap_pinit_stage(pmap_t pmap, enum pmap_stage stage) +{ + vm_page_t l0pt; + + /* + * allocate the l0 page + */ + while ((l0pt = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | + VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) + vm_wait(NULL); + + pmap->pm_l0_paddr = VM_PAGE_TO_PHYS(l0pt); + pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(pmap->pm_l0_paddr); + + if ((l0pt->flags & PG_ZERO) == 0) + pagezero(pmap->pm_l0); + + pmap->pm_root.rt_root = 0; + bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); + pmap->pm_cookie = COOKIE_FROM(-1, INT_MAX); + + pmap->pm_stage = stage; + switch (stage) { + case PM_STAGE1: + pmap->pm_asid_set = &asids; + break; + case PM_STAGE2: + pmap->pm_asid_set = &vmids; + break; + default: + panic("%s: Invalid pmap type %d", __func__, stage); + break; + } + + /* XXX Temporarily disable deferred ASID allocation. */ + pmap_alloc_asid(pmap); + + return (1); +} + +int +pmap_pinit(pmap_t pmap) +{ + + return (pmap_pinit_stage(pmap, PM_STAGE1)); +} + +/* + * This routine is called if the desired page table page does not exist. + * + * If page table page allocation fails, this routine may sleep before + * returning NULL. It sleeps only if a lock pointer was given. + * + * Note: If a page allocation fails at page table level two or three, + * one or two pages may be held during the wait, only to be released + * afterwards. This conservative approach is easily argued to avoid + * race conditions. + */ +static vm_page_t +_pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp) +{ + vm_page_t m, l1pg, l2pg; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + + /* + * Allocate a page table page. + */ + if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | + VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { + if (lockp != NULL) { + RELEASE_PV_LIST_LOCK(lockp); + PMAP_UNLOCK(pmap); + vm_wait(NULL); + PMAP_LOCK(pmap); + } + + /* + * Indicate the need to retry. While waiting, the page table + * page may have been allocated. + */ + return (NULL); + } + if ((m->flags & PG_ZERO) == 0) + pmap_zero_page(m); + + /* + * Because of AArch64's weak memory consistency model, we must have a + * barrier here to ensure that the stores for zeroing "m", whether by + * pmap_zero_page() or an earlier function, are visible before adding + * "m" to the page table. Otherwise, a page table walk by another + * processor's MMU could see the mapping to "m" and a stale, non-zero + * PTE within "m". + */ + dmb(ishst); + + /* + * Map the pagetable page into the process address space, if + * it isn't already there. + */ + + if (ptepindex >= (NUL2E + NUL1E)) { + pd_entry_t *l0; + vm_pindex_t l0index; + + l0index = ptepindex - (NUL2E + NUL1E); + l0 = &pmap->pm_l0[l0index]; + pmap_store(l0, VM_PAGE_TO_PHYS(m) | L0_TABLE); + } else if (ptepindex >= NUL2E) { + vm_pindex_t l0index, l1index; + pd_entry_t *l0, *l1; + pd_entry_t tl0; + + l1index = ptepindex - NUL2E; + l0index = l1index >> L0_ENTRIES_SHIFT; + + l0 = &pmap->pm_l0[l0index]; + tl0 = pmap_load(l0); + if (tl0 == 0) { + /* recurse for allocating page dir */ + if (_pmap_alloc_l3(pmap, NUL2E + NUL1E + l0index, + lockp) == NULL) { + vm_page_unwire_noq(m); + vm_page_free_zero(m); + return (NULL); + } + } else { + l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK); + l1pg->ref_count++; + } + + l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK); + l1 = &l1[ptepindex & Ln_ADDR_MASK]; + pmap_store(l1, VM_PAGE_TO_PHYS(m) | L1_TABLE); + } else { + vm_pindex_t l0index, l1index; + pd_entry_t *l0, *l1, *l2; + pd_entry_t tl0, tl1; + + l1index = ptepindex >> Ln_ENTRIES_SHIFT; + l0index = l1index >> L0_ENTRIES_SHIFT; + + l0 = &pmap->pm_l0[l0index]; + tl0 = pmap_load(l0); + if (tl0 == 0) { + /* recurse for allocating page dir */ + if (_pmap_alloc_l3(pmap, NUL2E + l1index, + lockp) == NULL) { + vm_page_unwire_noq(m); + vm_page_free_zero(m); + return (NULL); + } + tl0 = pmap_load(l0); + l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK); + l1 = &l1[l1index & Ln_ADDR_MASK]; + } else { + l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK); + l1 = &l1[l1index & Ln_ADDR_MASK]; + tl1 = pmap_load(l1); + if (tl1 == 0) { + /* recurse for allocating page dir */ + if (_pmap_alloc_l3(pmap, NUL2E + l1index, + lockp) == NULL) { + vm_page_unwire_noq(m); + vm_page_free_zero(m); + return (NULL); + } + } else { + l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK); + l2pg->ref_count++; + } + } + + l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK); + l2 = &l2[ptepindex & Ln_ADDR_MASK]; + pmap_store(l2, VM_PAGE_TO_PHYS(m) | L2_TABLE); + } + + pmap_resident_count_inc(pmap, 1); + + return (m); +} + +static pd_entry_t * +pmap_alloc_l2(pmap_t pmap, vm_offset_t va, vm_page_t *l2pgp, + struct rwlock **lockp) +{ + pd_entry_t *l1, *l2; + vm_page_t l2pg; + vm_pindex_t l2pindex; + +retry: + l1 = pmap_l1(pmap, va); + if (l1 != NULL && (pmap_load(l1) & ATTR_DESCR_MASK) == L1_TABLE) { + l2 = pmap_l1_to_l2(l1, va); + if (va < VM_MAXUSER_ADDRESS) { + /* Add a reference to the L2 page. */ + l2pg = PHYS_TO_VM_PAGE(pmap_load(l1) & ~ATTR_MASK); + l2pg->ref_count++; + } else + l2pg = NULL; + } else if (va < VM_MAXUSER_ADDRESS) { + /* Allocate a L2 page. */ + l2pindex = pmap_l2_pindex(va) >> Ln_ENTRIES_SHIFT; + l2pg = _pmap_alloc_l3(pmap, NUL2E + l2pindex, lockp); + if (l2pg == NULL) { + if (lockp != NULL) + goto retry; + else + return (NULL); + } + l2 = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(l2pg)); + l2 = &l2[pmap_l2_index(va)]; + } else + panic("pmap_alloc_l2: missing page table page for va %#lx", + va); + *l2pgp = l2pg; + return (l2); +} + +static vm_page_t +pmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp) +{ + vm_pindex_t ptepindex; + pd_entry_t *pde, tpde; +#ifdef INVARIANTS + pt_entry_t *pte; +#endif + vm_page_t m; + int lvl; + + /* + * Calculate pagetable page index + */ + ptepindex = pmap_l2_pindex(va); +retry: + /* + * Get the page directory entry + */ + pde = pmap_pde(pmap, va, &lvl); + + /* + * If the page table page is mapped, we just increment the hold count, + * and activate it. If we get a level 2 pde it will point to a level 3 + * table. + */ + switch (lvl) { + case -1: + break; + case 0: +#ifdef INVARIANTS + pte = pmap_l0_to_l1(pde, va); + KASSERT(pmap_load(pte) == 0, + ("pmap_alloc_l3: TODO: l0 superpages")); +#endif + break; + case 1: +#ifdef INVARIANTS + pte = pmap_l1_to_l2(pde, va); + KASSERT(pmap_load(pte) == 0, + ("pmap_alloc_l3: TODO: l1 superpages")); +#endif + break; + case 2: + tpde = pmap_load(pde); + if (tpde != 0) { + m = PHYS_TO_VM_PAGE(tpde & ~ATTR_MASK); + m->ref_count++; + return (m); + } + break; + default: + panic("pmap_alloc_l3: Invalid level %d", lvl); + } + + /* + * Here if the pte page isn't mapped, or if it has been deallocated. + */ + m = _pmap_alloc_l3(pmap, ptepindex, lockp); + if (m == NULL && lockp != NULL) + goto retry; + + return (m); +} + +/*************************************************** + * Pmap allocation/deallocation routines. + ***************************************************/ + +/* + * Release any resources held by the given physical map. + * Called when a pmap initialized by pmap_pinit is being released. + * Should only be called if the map contains no valid mappings. + */ +void +pmap_release(pmap_t pmap) +{ + struct asid_set *set; + vm_page_t m; + int asid; + + KASSERT(pmap->pm_stats.resident_count == 0, + ("pmap_release: pmap resident count %ld != 0", + pmap->pm_stats.resident_count)); + KASSERT(vm_radix_is_empty(&pmap->pm_root), + ("pmap_release: pmap has reserved page table page(s)")); + + set = pmap->pm_asid_set; + KASSERT(set != NULL, ("%s: NULL asid set", __func__)); + + /* + * Allow the ASID to be reused. In stage 2 VMIDs we don't invalidate + * the entries when removing them so rely on a later tlb invalidation. + * this will happen when updating the VMID generation. Because of this + * we don't reuse VMIDs within a generation. + */ + if (pmap->pm_stage == PM_STAGE1) { + mtx_lock_spin(&set->asid_set_mutex); + if (COOKIE_TO_EPOCH(pmap->pm_cookie) == set->asid_epoch) { + asid = COOKIE_TO_ASID(pmap->pm_cookie); + KASSERT(asid >= ASID_FIRST_AVAILABLE && + asid < set->asid_set_size, + ("pmap_release: pmap cookie has out-of-range asid")); + bit_clear(set->asid_set, asid); + } + mtx_unlock_spin(&set->asid_set_mutex); + } + + m = PHYS_TO_VM_PAGE(pmap->pm_l0_paddr); + vm_page_unwire_noq(m); + vm_page_free_zero(m); +} + +static int +kvm_size(SYSCTL_HANDLER_ARGS) +{ + unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS; + + return sysctl_handle_long(oidp, &ksize, 0, req); +} +SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG | CTLFLAG_RD | CTLFLAG_MPSAFE, + 0, 0, kvm_size, "LU", + "Size of KVM"); + +static int +kvm_free(SYSCTL_HANDLER_ARGS) +{ + unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; + + return sysctl_handle_long(oidp, &kfree, 0, req); +} +SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG | CTLFLAG_RD | CTLFLAG_MPSAFE, + 0, 0, kvm_free, "LU", + "Amount of KVM free"); + +/* + * grow the number of kernel page table entries, if needed + */ +void +pmap_growkernel(vm_offset_t addr) +{ + vm_paddr_t paddr; + vm_page_t nkpg; + pd_entry_t *l0, *l1, *l2; + + mtx_assert(&kernel_map->system_mtx, MA_OWNED); + + addr = roundup2(addr, L2_SIZE); + if (addr - 1 >= vm_map_max(kernel_map)) + addr = vm_map_max(kernel_map); + while (kernel_vm_end < addr) { + l0 = pmap_l0(kernel_pmap, kernel_vm_end); + KASSERT(pmap_load(l0) != 0, + ("pmap_growkernel: No level 0 kernel entry")); + + l1 = pmap_l0_to_l1(l0, kernel_vm_end); + if (pmap_load(l1) == 0) { + /* We need a new PDP entry */ + nkpg = vm_page_alloc(NULL, kernel_vm_end >> L1_SHIFT, + VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | + VM_ALLOC_WIRED | VM_ALLOC_ZERO); + if (nkpg == NULL) + panic("pmap_growkernel: no memory to grow kernel"); + if ((nkpg->flags & PG_ZERO) == 0) + pmap_zero_page(nkpg); + /* See the dmb() in _pmap_alloc_l3(). */ + dmb(ishst); + paddr = VM_PAGE_TO_PHYS(nkpg); + pmap_store(l1, paddr | L1_TABLE); + continue; /* try again */ + } + l2 = pmap_l1_to_l2(l1, kernel_vm_end); + if (pmap_load(l2) != 0) { + kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; + if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) { + kernel_vm_end = vm_map_max(kernel_map); + break; + } + continue; + } + + nkpg = vm_page_alloc(NULL, kernel_vm_end >> L2_SHIFT, + VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | + VM_ALLOC_ZERO); + if (nkpg == NULL) + panic("pmap_growkernel: no memory to grow kernel"); + if ((nkpg->flags & PG_ZERO) == 0) + pmap_zero_page(nkpg); + /* See the dmb() in _pmap_alloc_l3(). */ + dmb(ishst); + paddr = VM_PAGE_TO_PHYS(nkpg); + pmap_store(l2, paddr | L2_TABLE); + + kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; + if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) { + kernel_vm_end = vm_map_max(kernel_map); + break; + } + } +} + +/*************************************************** + * page management routines. + ***************************************************/ + +CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); +CTASSERT(_NPCM == 3); +CTASSERT(_NPCPV == 168); + +static __inline struct pv_chunk * +pv_to_chunk(pv_entry_t pv) +{ + + return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); +} + +#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) + +#define PC_FREE0 0xfffffffffffffffful +#define PC_FREE1 0xfffffffffffffffful +#define PC_FREE2 0x000000fffffffffful + +static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 }; + +#if 0 +#ifdef PV_STATS +static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; + +SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, + "Current number of pv entry chunks"); +SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, + "Current number of pv entry chunks allocated"); +SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, + "Current number of pv entry chunks frees"); +SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, + "Number of times tried to get a chunk page but failed."); + +static long pv_entry_frees, pv_entry_allocs, pv_entry_count; +static int pv_entry_spare; + +SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, + "Current number of pv entry frees"); +SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, + "Current number of pv entry allocs"); +SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, + "Current number of pv entries"); +SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, + "Current number of spare pv entries"); +#endif +#endif /* 0 */ + +/* + * We are in a serious low memory condition. Resort to + * drastic measures to free some pages so we can allocate + * another pv entry chunk. + * + * Returns NULL if PV entries were reclaimed from the specified pmap. + * + * We do not, however, unmap 2mpages because subsequent accesses will + * allocate per-page pv entries until repromotion occurs, thereby + * exacerbating the shortage of free pv entries. + */ +static vm_page_t +reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp) +{ + struct pv_chunk *pc, *pc_marker, *pc_marker_end; + struct pv_chunk_header pc_marker_b, pc_marker_end_b; + struct md_page *pvh; + pd_entry_t *pde; + pmap_t next_pmap, pmap; + pt_entry_t *pte, tpte; + pv_entry_t pv; + vm_offset_t va; + vm_page_t m, m_pc; + struct spglist free; + uint64_t inuse; + int bit, field, freed, lvl; + static int active_reclaims = 0; + + PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); + KASSERT(lockp != NULL, ("reclaim_pv_chunk: lockp is NULL")); + + pmap = NULL; + m_pc = NULL; + SLIST_INIT(&free); + bzero(&pc_marker_b, sizeof(pc_marker_b)); + bzero(&pc_marker_end_b, sizeof(pc_marker_end_b)); + pc_marker = (struct pv_chunk *)&pc_marker_b; + pc_marker_end = (struct pv_chunk *)&pc_marker_end_b; + + mtx_lock(&pv_chunks_mutex); + active_reclaims++; + TAILQ_INSERT_HEAD(&pv_chunks, pc_marker, pc_lru); + TAILQ_INSERT_TAIL(&pv_chunks, pc_marker_end, pc_lru); + while ((pc = TAILQ_NEXT(pc_marker, pc_lru)) != pc_marker_end && + SLIST_EMPTY(&free)) { + next_pmap = pc->pc_pmap; + if (next_pmap == NULL) { + /* + * The next chunk is a marker. However, it is + * not our marker, so active_reclaims must be + * > 1. Consequently, the next_chunk code + * will not rotate the pv_chunks list. + */ + goto next_chunk; + } + mtx_unlock(&pv_chunks_mutex); + + /* + * A pv_chunk can only be removed from the pc_lru list + * when both pv_chunks_mutex is owned and the + * corresponding pmap is locked. + */ + if (pmap != next_pmap) { + if (pmap != NULL && pmap != locked_pmap) + PMAP_UNLOCK(pmap); + pmap = next_pmap; + /* Avoid deadlock and lock recursion. */ + if (pmap > locked_pmap) { + RELEASE_PV_LIST_LOCK(lockp); + PMAP_LOCK(pmap); + mtx_lock(&pv_chunks_mutex); + continue; + } else if (pmap != locked_pmap) { + if (PMAP_TRYLOCK(pmap)) { + mtx_lock(&pv_chunks_mutex); + continue; + } else { + pmap = NULL; /* pmap is not locked */ + mtx_lock(&pv_chunks_mutex); + pc = TAILQ_NEXT(pc_marker, pc_lru); + if (pc == NULL || + pc->pc_pmap != next_pmap) + continue; + goto next_chunk; + } + } + } + + /* + * Destroy every non-wired, 4 KB page mapping in the chunk. + */ + freed = 0; + for (field = 0; field < _NPCM; field++) { + for (inuse = ~pc->pc_map[field] & pc_freemask[field]; + inuse != 0; inuse &= ~(1UL << bit)) { + bit = ffsl(inuse) - 1; + pv = &pc->pc_pventry[field * 64 + bit]; + va = pv->pv_va; + pde = pmap_pde(pmap, va, &lvl); + if (lvl != 2) + continue; + pte = pmap_l2_to_l3(pde, va); + tpte = pmap_load(pte); + if ((tpte & ATTR_SW_WIRED) != 0) + continue; + tpte = pmap_load_clear(pte); + m = PHYS_TO_VM_PAGE(tpte & ~ATTR_MASK); + if (pmap_pte_dirty(pmap, tpte)) + vm_page_dirty(m); + if ((tpte & ATTR_AF) != 0) { + pmap_invalidate_page(pmap, va); + vm_page_aflag_set(m, PGA_REFERENCED); + } + CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); + m->md.pv_gen++; + if (TAILQ_EMPTY(&m->md.pv_list) && + (m->flags & PG_FICTITIOUS) == 0) { + pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); + if (TAILQ_EMPTY(&pvh->pv_list)) { + vm_page_aflag_clear(m, + PGA_WRITEABLE); + } + } + pc->pc_map[field] |= 1UL << bit; + pmap_unuse_pt(pmap, va, pmap_load(pde), &free); + freed++; + } + } + if (freed == 0) { + mtx_lock(&pv_chunks_mutex); + goto next_chunk; + } + /* Every freed mapping is for a 4 KB page. */ + pmap_resident_count_dec(pmap, freed); + PV_STAT(atomic_add_long(&pv_entry_frees, freed)); + PV_STAT(atomic_add_int(&pv_entry_spare, freed)); + PV_STAT(atomic_subtract_long(&pv_entry_count, freed)); + TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); + if (pc->pc_map[0] == PC_FREE0 && pc->pc_map[1] == PC_FREE1 && + pc->pc_map[2] == PC_FREE2) { + PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV)); + PV_STAT(atomic_subtract_int(&pc_chunk_count, 1)); + PV_STAT(atomic_add_int(&pc_chunk_frees, 1)); + /* Entire chunk is free; return it. */ + m_pc = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); + dump_drop_page(m_pc->phys_addr); + mtx_lock(&pv_chunks_mutex); + TAILQ_REMOVE(&pv_chunks, pc, pc_lru); + break; + } + TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); + mtx_lock(&pv_chunks_mutex); + /* One freed pv entry in locked_pmap is sufficient. */ + if (pmap == locked_pmap) + break; + +next_chunk: + TAILQ_REMOVE(&pv_chunks, pc_marker, pc_lru); + TAILQ_INSERT_AFTER(&pv_chunks, pc, pc_marker, pc_lru); + if (active_reclaims == 1 && pmap != NULL) { + /* + * Rotate the pv chunks list so that we do not + * scan the same pv chunks that could not be + * freed (because they contained a wired + * and/or superpage mapping) on every + * invocation of reclaim_pv_chunk(). + */ + while ((pc = TAILQ_FIRST(&pv_chunks)) != pc_marker) { + MPASS(pc->pc_pmap != NULL); + TAILQ_REMOVE(&pv_chunks, pc, pc_lru); + TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); + } + } + } + TAILQ_REMOVE(&pv_chunks, pc_marker, pc_lru); + TAILQ_REMOVE(&pv_chunks, pc_marker_end, pc_lru); + active_reclaims--; + mtx_unlock(&pv_chunks_mutex); + if (pmap != NULL && pmap != locked_pmap) + PMAP_UNLOCK(pmap); + if (m_pc == NULL && !SLIST_EMPTY(&free)) { + m_pc = SLIST_FIRST(&free); + SLIST_REMOVE_HEAD(&free, plinks.s.ss); + /* Recycle a freed page table page. */ + m_pc->ref_count = 1; + } + vm_page_free_pages_toq(&free, true); + return (m_pc); +} + +/* + * free the pv_entry back to the free list + */ +static void +free_pv_entry(pmap_t pmap, pv_entry_t pv) +{ + struct pv_chunk *pc; + int idx, field, bit; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + PV_STAT(atomic_add_long(&pv_entry_frees, 1)); + PV_STAT(atomic_add_int(&pv_entry_spare, 1)); + PV_STAT(atomic_subtract_long(&pv_entry_count, 1)); + pc = pv_to_chunk(pv); + idx = pv - &pc->pc_pventry[0]; + field = idx / 64; + bit = idx % 64; + pc->pc_map[field] |= 1ul << bit; + if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 || + pc->pc_map[2] != PC_FREE2) { + /* 98% of the time, pc is already at the head of the list. */ + if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) { + TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); + TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); + } + return; + } + TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); + free_pv_chunk(pc); +} + +static void +free_pv_chunk(struct pv_chunk *pc) +{ + vm_page_t m; + + mtx_lock(&pv_chunks_mutex); + TAILQ_REMOVE(&pv_chunks, pc, pc_lru); + mtx_unlock(&pv_chunks_mutex); + PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV)); + PV_STAT(atomic_subtract_int(&pc_chunk_count, 1)); + PV_STAT(atomic_add_int(&pc_chunk_frees, 1)); + /* entire chunk is free, return it */ + m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); + dump_drop_page(m->phys_addr); + vm_page_unwire_noq(m); + vm_page_free(m); +} + +/* + * Returns a new PV entry, allocating a new PV chunk from the system when + * needed. If this PV chunk allocation fails and a PV list lock pointer was + * given, a PV chunk is reclaimed from an arbitrary pmap. Otherwise, NULL is + * returned. + * + * The given PV list lock may be released. + */ +static pv_entry_t +get_pv_entry(pmap_t pmap, struct rwlock **lockp) +{ + int bit, field; + pv_entry_t pv; + struct pv_chunk *pc; + vm_page_t m; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + PV_STAT(atomic_add_long(&pv_entry_allocs, 1)); +retry: + pc = TAILQ_FIRST(&pmap->pm_pvchunk); + if (pc != NULL) { + for (field = 0; field < _NPCM; field++) { + if (pc->pc_map[field]) { + bit = ffsl(pc->pc_map[field]) - 1; + break; + } + } + if (field < _NPCM) { + pv = &pc->pc_pventry[field * 64 + bit]; + pc->pc_map[field] &= ~(1ul << bit); + /* If this was the last item, move it to tail */ + if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && + pc->pc_map[2] == 0) { + TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); + TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, + pc_list); + } + PV_STAT(atomic_add_long(&pv_entry_count, 1)); + PV_STAT(atomic_subtract_int(&pv_entry_spare, 1)); + return (pv); + } + } + /* No free items, allocate another chunk */ + m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | + VM_ALLOC_WIRED); + if (m == NULL) { + if (lockp == NULL) { + PV_STAT(pc_chunk_tryfail++); + return (NULL); + } + m = reclaim_pv_chunk(pmap, lockp); + if (m == NULL) + goto retry; + } + PV_STAT(atomic_add_int(&pc_chunk_count, 1)); + PV_STAT(atomic_add_int(&pc_chunk_allocs, 1)); + dump_add_page(m->phys_addr); + pc = (void *)PHYS_TO_DMAP(m->phys_addr); + pc->pc_pmap = pmap; + pc->pc_map[0] = PC_FREE0 & ~1ul; /* preallocated bit 0 */ + pc->pc_map[1] = PC_FREE1; + pc->pc_map[2] = PC_FREE2; + mtx_lock(&pv_chunks_mutex); + TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); + mtx_unlock(&pv_chunks_mutex); + pv = &pc->pc_pventry[0]; + TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); + PV_STAT(atomic_add_long(&pv_entry_count, 1)); + PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1)); + return (pv); +} + +/* + * Ensure that the number of spare PV entries in the specified pmap meets or + * exceeds the given count, "needed". + * + * The given PV list lock may be released. + */ +static void +reserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp) +{ + struct pch new_tail; + struct pv_chunk *pc; + vm_page_t m; + int avail, free; + bool reclaimed; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + KASSERT(lockp != NULL, ("reserve_pv_entries: lockp is NULL")); + + /* + * Newly allocated PV chunks must be stored in a private list until + * the required number of PV chunks have been allocated. Otherwise, + * reclaim_pv_chunk() could recycle one of these chunks. In + * contrast, these chunks must be added to the pmap upon allocation. + */ + TAILQ_INIT(&new_tail); +retry: + avail = 0; + TAILQ_FOREACH(pc, &pmap->pm_pvchunk, pc_list) { + bit_count((bitstr_t *)pc->pc_map, 0, + sizeof(pc->pc_map) * NBBY, &free); + if (free == 0) + break; + avail += free; + if (avail >= needed) + break; + } + for (reclaimed = false; avail < needed; avail += _NPCPV) { + m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | + VM_ALLOC_WIRED); + if (m == NULL) { + m = reclaim_pv_chunk(pmap, lockp); + if (m == NULL) + goto retry; + reclaimed = true; + } + PV_STAT(atomic_add_int(&pc_chunk_count, 1)); + PV_STAT(atomic_add_int(&pc_chunk_allocs, 1)); + dump_add_page(m->phys_addr); + pc = (void *)PHYS_TO_DMAP(m->phys_addr); + pc->pc_pmap = pmap; + pc->pc_map[0] = PC_FREE0; + pc->pc_map[1] = PC_FREE1; + pc->pc_map[2] = PC_FREE2; + TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); + TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru); + PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV)); + + /* + * The reclaim might have freed a chunk from the current pmap. + * If that chunk contained available entries, we need to + * re-count the number of available entries. + */ + if (reclaimed) + goto retry; + } + if (!TAILQ_EMPTY(&new_tail)) { + mtx_lock(&pv_chunks_mutex); + TAILQ_CONCAT(&pv_chunks, &new_tail, pc_lru); + mtx_unlock(&pv_chunks_mutex); + } +} + +/* + * First find and then remove the pv entry for the specified pmap and virtual + * address from the specified pv list. Returns the pv entry if found and NULL + * otherwise. This operation can be performed on pv lists for either 4KB or + * 2MB page mappings. + */ +static __inline pv_entry_t +pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) +{ + pv_entry_t pv; + + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { + if (pmap == PV_PMAP(pv) && va == pv->pv_va) { + TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); + pvh->pv_gen++; + break; + } + } + return (pv); +} + +/* + * After demotion from a 2MB page mapping to 512 4KB page mappings, + * destroy the pv entry for the 2MB page mapping and reinstantiate the pv + * entries for each of the 4KB page mappings. + */ +static void +pmap_pv_demote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, + struct rwlock **lockp) +{ + struct md_page *pvh; + struct pv_chunk *pc; + pv_entry_t pv; + vm_offset_t va_last; + vm_page_t m; + int bit, field; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + KASSERT((va & L2_OFFSET) == 0, + ("pmap_pv_demote_l2: va is not 2mpage aligned")); + KASSERT((pa & L2_OFFSET) == 0, + ("pmap_pv_demote_l2: pa is not 2mpage aligned")); + CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); + + /* + * Transfer the 2mpage's pv entry for this mapping to the first + * page's pv list. Once this transfer begins, the pv list lock + * must not be released until the last pv entry is reinstantiated. + */ + pvh = pa_to_pvh(pa); + pv = pmap_pvh_remove(pvh, pmap, va); + KASSERT(pv != NULL, ("pmap_pv_demote_l2: pv not found")); + m = PHYS_TO_VM_PAGE(pa); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); + m->md.pv_gen++; + /* Instantiate the remaining Ln_ENTRIES - 1 pv entries. */ + PV_STAT(atomic_add_long(&pv_entry_allocs, Ln_ENTRIES - 1)); + va_last = va + L2_SIZE - PAGE_SIZE; + for (;;) { + pc = TAILQ_FIRST(&pmap->pm_pvchunk); + KASSERT(pc->pc_map[0] != 0 || pc->pc_map[1] != 0 || + pc->pc_map[2] != 0, ("pmap_pv_demote_l2: missing spare")); + for (field = 0; field < _NPCM; field++) { + while (pc->pc_map[field]) { + bit = ffsl(pc->pc_map[field]) - 1; + pc->pc_map[field] &= ~(1ul << bit); + pv = &pc->pc_pventry[field * 64 + bit]; + va += PAGE_SIZE; + pv->pv_va = va; + m++; + KASSERT((m->oflags & VPO_UNMANAGED) == 0, + ("pmap_pv_demote_l2: page %p is not managed", m)); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); + m->md.pv_gen++; + if (va == va_last) + goto out; + } + } + TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); + TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); + } +out: + if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && pc->pc_map[2] == 0) { + TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); + TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); + } + PV_STAT(atomic_add_long(&pv_entry_count, Ln_ENTRIES - 1)); + PV_STAT(atomic_subtract_int(&pv_entry_spare, Ln_ENTRIES - 1)); +} + +/* + * First find and then destroy the pv entry for the specified pmap and virtual + * address. This operation can be performed on pv lists for either 4KB or 2MB + * page mappings. + */ +static void +pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) +{ + pv_entry_t pv; + + pv = pmap_pvh_remove(pvh, pmap, va); + KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); + free_pv_entry(pmap, pv); +} + +/* + * Conditionally create the PV entry for a 4KB page mapping if the required + * memory can be allocated without resorting to reclamation. + */ +static boolean_t +pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m, + struct rwlock **lockp) +{ + pv_entry_t pv; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + /* Pass NULL instead of the lock pointer to disable reclamation. */ + if ((pv = get_pv_entry(pmap, NULL)) != NULL) { + pv->pv_va = va; + CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); + m->md.pv_gen++; + return (TRUE); + } else + return (FALSE); +} + +/* + * Create the PV entry for a 2MB page mapping. Always returns true unless the + * flag PMAP_ENTER_NORECLAIM is specified. If that flag is specified, returns + * false if the PV entry cannot be allocated without resorting to reclamation. + */ +static bool +pmap_pv_insert_l2(pmap_t pmap, vm_offset_t va, pd_entry_t l2e, u_int flags, + struct rwlock **lockp) +{ + struct md_page *pvh; + pv_entry_t pv; + vm_paddr_t pa; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + /* Pass NULL instead of the lock pointer to disable reclamation. */ + if ((pv = get_pv_entry(pmap, (flags & PMAP_ENTER_NORECLAIM) != 0 ? + NULL : lockp)) == NULL) + return (false); + pv->pv_va = va; + pa = l2e & ~ATTR_MASK; + CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); + pvh = pa_to_pvh(pa); + TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); + pvh->pv_gen++; + return (true); +} + +static void +pmap_remove_kernel_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va) +{ + pt_entry_t newl2, oldl2; + vm_page_t ml3; + vm_paddr_t ml3pa; + + KASSERT(!VIRT_IN_DMAP(va), ("removing direct mapping of %#lx", va)); + KASSERT(pmap == kernel_pmap, ("pmap %p is not kernel_pmap", pmap)); + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + + ml3 = pmap_remove_pt_page(pmap, va); + if (ml3 == NULL) + panic("pmap_remove_kernel_l2: Missing pt page"); + + ml3pa = VM_PAGE_TO_PHYS(ml3); + newl2 = ml3pa | L2_TABLE; + + /* + * If this page table page was unmapped by a promotion, then it + * contains valid mappings. Zero it to invalidate those mappings. + */ + if (ml3->valid != 0) + pagezero((void *)PHYS_TO_DMAP(ml3pa)); + + /* + * Demote the mapping. The caller must have already invalidated the + * mapping (i.e., the "break" in break-before-make). + */ + oldl2 = pmap_load_store(l2, newl2); + KASSERT(oldl2 == 0, ("%s: found existing mapping at %p: %#lx", + __func__, l2, oldl2)); +} + +/* + * pmap_remove_l2: Do the things to unmap a level 2 superpage. + */ +static int +pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva, + pd_entry_t l1e, struct spglist *free, struct rwlock **lockp) +{ + struct md_page *pvh; + pt_entry_t old_l2; + vm_offset_t eva, va; + vm_page_t m, ml3; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + KASSERT((sva & L2_OFFSET) == 0, ("pmap_remove_l2: sva is not aligned")); + old_l2 = pmap_load_clear(l2); + KASSERT((old_l2 & ATTR_DESCR_MASK) == L2_BLOCK, + ("pmap_remove_l2: L2e %lx is not a block mapping", old_l2)); + + /* + * Since a promotion must break the 4KB page mappings before making + * the 2MB page mapping, a pmap_invalidate_page() suffices. + */ + pmap_invalidate_page(pmap, sva); + + if (old_l2 & ATTR_SW_WIRED) + pmap->pm_stats.wired_count -= L2_SIZE / PAGE_SIZE; + pmap_resident_count_dec(pmap, L2_SIZE / PAGE_SIZE); + if (old_l2 & ATTR_SW_MANAGED) { + CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, old_l2 & ~ATTR_MASK); + pvh = pa_to_pvh(old_l2 & ~ATTR_MASK); + pmap_pvh_free(pvh, pmap, sva); + eva = sva + L2_SIZE; + for (va = sva, m = PHYS_TO_VM_PAGE(old_l2 & ~ATTR_MASK); + va < eva; va += PAGE_SIZE, m++) { + if (pmap_pte_dirty(pmap, old_l2)) + vm_page_dirty(m); + if (old_l2 & ATTR_AF) + vm_page_aflag_set(m, PGA_REFERENCED); + if (TAILQ_EMPTY(&m->md.pv_list) && + TAILQ_EMPTY(&pvh->pv_list)) + vm_page_aflag_clear(m, PGA_WRITEABLE); + } + } + if (pmap == kernel_pmap) { + pmap_remove_kernel_l2(pmap, l2, sva); + } else { + ml3 = pmap_remove_pt_page(pmap, sva); + if (ml3 != NULL) { + KASSERT(ml3->valid == VM_PAGE_BITS_ALL, + ("pmap_remove_l2: l3 page not promoted")); + pmap_resident_count_dec(pmap, 1); + KASSERT(ml3->ref_count == NL3PG, + ("pmap_remove_l2: l3 page ref count error")); + ml3->ref_count = 0; + pmap_add_delayed_free_list(ml3, free, FALSE); + } + } + return (pmap_unuse_pt(pmap, sva, l1e, free)); +} + +/* + * pmap_remove_l3: do the things to unmap a page in a process + */ +static int +pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va, + pd_entry_t l2e, struct spglist *free, struct rwlock **lockp) +{ + struct md_page *pvh; + pt_entry_t old_l3; + vm_page_t m; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + old_l3 = pmap_load_clear(l3); + pmap_invalidate_page(pmap, va); + if (old_l3 & ATTR_SW_WIRED) + pmap->pm_stats.wired_count -= 1; + pmap_resident_count_dec(pmap, 1); + if (old_l3 & ATTR_SW_MANAGED) { + m = PHYS_TO_VM_PAGE(old_l3 & ~ATTR_MASK); + if (pmap_pte_dirty(pmap, old_l3)) + vm_page_dirty(m); + if (old_l3 & ATTR_AF) + vm_page_aflag_set(m, PGA_REFERENCED); + CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); + pmap_pvh_free(&m->md, pmap, va); + if (TAILQ_EMPTY(&m->md.pv_list) && + (m->flags & PG_FICTITIOUS) == 0) { + pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); + if (TAILQ_EMPTY(&pvh->pv_list)) + vm_page_aflag_clear(m, PGA_WRITEABLE); + } + } + return (pmap_unuse_pt(pmap, va, l2e, free)); +} + +/* + * Remove the specified range of addresses from the L3 page table that is + * identified by the given L2 entry. + */ +static void +pmap_remove_l3_range(pmap_t pmap, pd_entry_t l2e, vm_offset_t sva, + vm_offset_t eva, struct spglist *free, struct rwlock **lockp) +{ + struct md_page *pvh; + struct rwlock *new_lock; + pt_entry_t *l3, old_l3; + vm_offset_t va; + vm_page_t l3pg, m; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + KASSERT(rounddown2(sva, L2_SIZE) + L2_SIZE == roundup2(eva, L2_SIZE), + ("pmap_remove_l3_range: range crosses an L3 page table boundary")); + l3pg = sva < VM_MAXUSER_ADDRESS ? PHYS_TO_VM_PAGE(l2e & ~ATTR_MASK) : + NULL; + va = eva; + for (l3 = pmap_l2_to_l3(&l2e, sva); sva != eva; l3++, sva += L3_SIZE) { + if (!pmap_l3_valid(pmap_load(l3))) { + if (va != eva) { + pmap_invalidate_range(pmap, va, sva); + va = eva; + } + continue; + } + old_l3 = pmap_load_clear(l3); + if ((old_l3 & ATTR_SW_WIRED) != 0) + pmap->pm_stats.wired_count--; + pmap_resident_count_dec(pmap, 1); + if ((old_l3 & ATTR_SW_MANAGED) != 0) { + m = PHYS_TO_VM_PAGE(old_l3 & ~ATTR_MASK); + if (pmap_pte_dirty(pmap, old_l3)) + vm_page_dirty(m); + if ((old_l3 & ATTR_AF) != 0) + vm_page_aflag_set(m, PGA_REFERENCED); + new_lock = PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m)); + if (new_lock != *lockp) { + if (*lockp != NULL) { + /* + * Pending TLB invalidations must be + * performed before the PV list lock is + * released. Otherwise, a concurrent + * pmap_remove_all() on a physical page + * could return while a stale TLB entry + * still provides access to that page. + */ + if (va != eva) { + pmap_invalidate_range(pmap, va, + sva); + va = eva; + } + rw_wunlock(*lockp); + } + *lockp = new_lock; + rw_wlock(*lockp); + } + pmap_pvh_free(&m->md, pmap, sva); + if (TAILQ_EMPTY(&m->md.pv_list) && + (m->flags & PG_FICTITIOUS) == 0) { + pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); + if (TAILQ_EMPTY(&pvh->pv_list)) + vm_page_aflag_clear(m, PGA_WRITEABLE); + } + } + if (va == eva) + va = sva; + if (l3pg != NULL && pmap_unwire_l3(pmap, sva, l3pg, free)) { + sva += L3_SIZE; + break; + } + } + if (va != eva) + pmap_invalidate_range(pmap, va, sva); +} + +/* + * Remove the given range of addresses from the specified map. + * + * It is assumed that the start and end are properly + * rounded to the page size. + */ +void +pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) +{ + struct rwlock *lock; + vm_offset_t va_next; + pd_entry_t *l0, *l1, *l2; + pt_entry_t l3_paddr; + struct spglist free; + + /* + * Perform an unsynchronized read. This is, however, safe. + */ + if (pmap->pm_stats.resident_count == 0) + return; + + SLIST_INIT(&free); + + PMAP_LOCK(pmap); + + lock = NULL; + for (; sva < eva; sva = va_next) { + if (pmap->pm_stats.resident_count == 0) + break; + + l0 = pmap_l0(pmap, sva); + if (pmap_load(l0) == 0) { + va_next = (sva + L0_SIZE) & ~L0_OFFSET; + if (va_next < sva) + va_next = eva; + continue; + } + + l1 = pmap_l0_to_l1(l0, sva); + if (pmap_load(l1) == 0) { + va_next = (sva + L1_SIZE) & ~L1_OFFSET; + if (va_next < sva) + va_next = eva; + continue; + } + + /* + * Calculate index for next page table. + */ + va_next = (sva + L2_SIZE) & ~L2_OFFSET; + if (va_next < sva) + va_next = eva; + + l2 = pmap_l1_to_l2(l1, sva); + if (l2 == NULL) + continue; + + l3_paddr = pmap_load(l2); + + if ((l3_paddr & ATTR_DESCR_MASK) == L2_BLOCK) { + if (sva + L2_SIZE == va_next && eva >= va_next) { + pmap_remove_l2(pmap, l2, sva, pmap_load(l1), + &free, &lock); + continue; + } else if (pmap_demote_l2_locked(pmap, l2, sva, + &lock) == NULL) + continue; + l3_paddr = pmap_load(l2); + } + + /* + * Weed out invalid mappings. + */ + if ((l3_paddr & ATTR_DESCR_MASK) != L2_TABLE) + continue; + + /* + * Limit our scan to either the end of the va represented + * by the current page table page, or to the end of the + * range being removed. + */ + if (va_next > eva) + va_next = eva; + + pmap_remove_l3_range(pmap, l3_paddr, sva, va_next, &free, + &lock); + } + if (lock != NULL) + rw_wunlock(lock); + PMAP_UNLOCK(pmap); + vm_page_free_pages_toq(&free, true); +} + +/* + * Routine: pmap_remove_all + * Function: + * Removes this physical page from + * all physical maps in which it resides. + * Reflects back modify bits to the pager. + * + * Notes: + * Original versions of this routine were very + * inefficient because they iteratively called + * pmap_remove (slow...) + */ + +void +pmap_remove_all(vm_page_t m) +{ + struct md_page *pvh; + pv_entry_t pv; + pmap_t pmap; + struct rwlock *lock; + pd_entry_t *pde, tpde; + pt_entry_t *pte, tpte; + vm_offset_t va; + struct spglist free; + int lvl, pvh_gen, md_gen; + + KASSERT((m->oflags & VPO_UNMANAGED) == 0, + ("pmap_remove_all: page %p is not managed", m)); + SLIST_INIT(&free); + lock = VM_PAGE_TO_PV_LIST_LOCK(m); + pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : + pa_to_pvh(VM_PAGE_TO_PHYS(m)); +retry: + rw_wlock(lock); + while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) { + pmap = PV_PMAP(pv); + if (!PMAP_TRYLOCK(pmap)) { + pvh_gen = pvh->pv_gen; + rw_wunlock(lock); + PMAP_LOCK(pmap); + rw_wlock(lock); + if (pvh_gen != pvh->pv_gen) { + rw_wunlock(lock); + PMAP_UNLOCK(pmap); + goto retry; + } + } + va = pv->pv_va; + pte = pmap_pte(pmap, va, &lvl); + KASSERT(pte != NULL, + ("pmap_remove_all: no page table entry found")); + KASSERT(lvl == 2, + ("pmap_remove_all: invalid pte level %d", lvl)); + + pmap_demote_l2_locked(pmap, pte, va, &lock); + PMAP_UNLOCK(pmap); + } + while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { + pmap = PV_PMAP(pv); + PMAP_ASSERT_STAGE1(pmap); + if (!PMAP_TRYLOCK(pmap)) { + pvh_gen = pvh->pv_gen; + md_gen = m->md.pv_gen; + rw_wunlock(lock); + PMAP_LOCK(pmap); + rw_wlock(lock); + if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { + rw_wunlock(lock); + PMAP_UNLOCK(pmap); + goto retry; + } + } + pmap_resident_count_dec(pmap, 1); + + pde = pmap_pde(pmap, pv->pv_va, &lvl); + KASSERT(pde != NULL, + ("pmap_remove_all: no page directory entry found")); + KASSERT(lvl == 2, + ("pmap_remove_all: invalid pde level %d", lvl)); + tpde = pmap_load(pde); + + pte = pmap_l2_to_l3(pde, pv->pv_va); + tpte = pmap_load_clear(pte); + if (tpte & ATTR_SW_WIRED) + pmap->pm_stats.wired_count--; + if ((tpte & ATTR_AF) != 0) { + pmap_invalidate_page(pmap, pv->pv_va); + vm_page_aflag_set(m, PGA_REFERENCED); + } + + /* + * Update the vm_page_t clean and reference bits. + */ + if (pmap_pte_dirty(pmap, tpte)) + vm_page_dirty(m); + pmap_unuse_pt(pmap, pv->pv_va, tpde, &free); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); + m->md.pv_gen++; + free_pv_entry(pmap, pv); + PMAP_UNLOCK(pmap); + } + vm_page_aflag_clear(m, PGA_WRITEABLE); + rw_wunlock(lock); + vm_page_free_pages_toq(&free, true); +} + +/* + * pmap_protect_l2: do the things to protect a 2MB page in a pmap + */ +static void +pmap_protect_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva, pt_entry_t mask, + pt_entry_t nbits) +{ + pd_entry_t old_l2; + vm_page_t m, mt; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + PMAP_ASSERT_STAGE1(pmap); + KASSERT((sva & L2_OFFSET) == 0, + ("pmap_protect_l2: sva is not 2mpage aligned")); + old_l2 = pmap_load(l2); + KASSERT((old_l2 & ATTR_DESCR_MASK) == L2_BLOCK, + ("pmap_protect_l2: L2e %lx is not a block mapping", old_l2)); + + /* + * Return if the L2 entry already has the desired access restrictions + * in place. + */ +retry: + if ((old_l2 & mask) == nbits) + return; + + /* + * When a dirty read/write superpage mapping is write protected, + * update the dirty field of each of the superpage's constituent 4KB + * pages. + */ + if ((old_l2 & ATTR_SW_MANAGED) != 0 && + (nbits & ATTR_S1_AP(ATTR_S1_AP_RO)) != 0 && + pmap_pte_dirty(pmap, old_l2)) { + m = PHYS_TO_VM_PAGE(old_l2 & ~ATTR_MASK); + for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++) + vm_page_dirty(mt); + } + + if (!atomic_fcmpset_64(l2, &old_l2, (old_l2 & ~mask) | nbits)) + goto retry; + + /* + * Since a promotion must break the 4KB page mappings before making + * the 2MB page mapping, a pmap_invalidate_page() suffices. + */ + pmap_invalidate_page(pmap, sva); +} + +/* + * Set the physical protection on the + * specified range of this map as requested. + */ +void +pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) +{ + vm_offset_t va, va_next; + pd_entry_t *l0, *l1, *l2; + pt_entry_t *l3p, l3, mask, nbits; + + PMAP_ASSERT_STAGE1(pmap); + KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot)); + if (prot == VM_PROT_NONE) { + pmap_remove(pmap, sva, eva); + return; + } + + mask = nbits = 0; + if ((prot & VM_PROT_WRITE) == 0) { + mask |= ATTR_S1_AP_RW_BIT | ATTR_SW_DBM; + nbits |= ATTR_S1_AP(ATTR_S1_AP_RO); + } + if ((prot & VM_PROT_EXECUTE) == 0) { + mask |= ATTR_S1_XN; + nbits |= ATTR_S1_XN; + } + if (mask == 0) + return; + + PMAP_LOCK(pmap); + for (; sva < eva; sva = va_next) { + l0 = pmap_l0(pmap, sva); + if (pmap_load(l0) == 0) { + va_next = (sva + L0_SIZE) & ~L0_OFFSET; + if (va_next < sva) + va_next = eva; + continue; + } + + l1 = pmap_l0_to_l1(l0, sva); + if (pmap_load(l1) == 0) { + va_next = (sva + L1_SIZE) & ~L1_OFFSET; + if (va_next < sva) + va_next = eva; + continue; + } + + va_next = (sva + L2_SIZE) & ~L2_OFFSET; + if (va_next < sva) + va_next = eva; + + l2 = pmap_l1_to_l2(l1, sva); + if (pmap_load(l2) == 0) + continue; + + if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK) { + if (sva + L2_SIZE == va_next && eva >= va_next) { + pmap_protect_l2(pmap, l2, sva, mask, nbits); + continue; + } else if (pmap_demote_l2(pmap, l2, sva) == NULL) + continue; + } + KASSERT((pmap_load(l2) & ATTR_DESCR_MASK) == L2_TABLE, + ("pmap_protect: Invalid L2 entry after demotion")); + + if (va_next > eva) + va_next = eva; + + va = va_next; + for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++, + sva += L3_SIZE) { + l3 = pmap_load(l3p); +retry: + /* + * Go to the next L3 entry if the current one is + * invalid or already has the desired access + * restrictions in place. (The latter case occurs + * frequently. For example, in a "buildworld" + * workload, almost 1 out of 4 L3 entries already + * have the desired restrictions.) + */ + if (!pmap_l3_valid(l3) || (l3 & mask) == nbits) { + if (va != va_next) { + pmap_invalidate_range(pmap, va, sva); + va = va_next; + } + continue; + } + + /* + * When a dirty read/write mapping is write protected, + * update the page's dirty field. + */ + if ((l3 & ATTR_SW_MANAGED) != 0 && + (nbits & ATTR_S1_AP(ATTR_S1_AP_RO)) != 0 && + pmap_pte_dirty(pmap, l3)) + vm_page_dirty(PHYS_TO_VM_PAGE(l3 & ~ATTR_MASK)); + + if (!atomic_fcmpset_64(l3p, &l3, (l3 & ~mask) | nbits)) + goto retry; + if (va == va_next) + va = sva; + } + if (va != va_next) + pmap_invalidate_range(pmap, va, sva); + } + PMAP_UNLOCK(pmap); +} + +/* + * Inserts the specified page table page into the specified pmap's collection + * of idle page table pages. Each of a pmap's page table pages is responsible + * for mapping a distinct range of virtual addresses. The pmap's collection is + * ordered by this virtual address range. + * + * If "promoted" is false, then the page table page "mpte" must be zero filled. + */ +static __inline int +pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted) +{ + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + mpte->valid = promoted ? VM_PAGE_BITS_ALL : 0; + return (vm_radix_insert(&pmap->pm_root, mpte)); +} + +/* + * Removes the page table page mapping the specified virtual address from the + * specified pmap's collection of idle page table pages, and returns it. + * Otherwise, returns NULL if there is no page table page corresponding to the + * specified virtual address. + */ +static __inline vm_page_t +pmap_remove_pt_page(pmap_t pmap, vm_offset_t va) +{ + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + return (vm_radix_remove(&pmap->pm_root, pmap_l2_pindex(va))); +} + +/* + * Performs a break-before-make update of a pmap entry. This is needed when + * either promoting or demoting pages to ensure the TLB doesn't get into an + * inconsistent state. + */ +static void +pmap_update_entry(pmap_t pmap, pd_entry_t *pte, pd_entry_t newpte, + vm_offset_t va, vm_size_t size) +{ + register_t intr; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + + /* + * Ensure we don't get switched out with the page table in an + * inconsistent state. We also need to ensure no interrupts fire + * as they may make use of an address we are about to invalidate. + */ + intr = intr_disable(); + + /* + * Clear the old mapping's valid bit, but leave the rest of the entry + * unchanged, so that a lockless, concurrent pmap_kextract() can still + * lookup the physical address. + */ + pmap_clear_bits(pte, ATTR_DESCR_VALID); + pmap_invalidate_range(pmap, va, va + size); + + /* Create the new mapping */ + pmap_store(pte, newpte); + dsb(ishst); + + intr_restore(intr); +} + +#if VM_NRESERVLEVEL > 0 +/* + * After promotion from 512 4KB page mappings to a single 2MB page mapping, + * replace the many pv entries for the 4KB page mappings by a single pv entry + * for the 2MB page mapping. + */ +static void +pmap_pv_promote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, + struct rwlock **lockp) +{ + struct md_page *pvh; + pv_entry_t pv; + vm_offset_t va_last; + vm_page_t m; + + KASSERT((pa & L2_OFFSET) == 0, + ("pmap_pv_promote_l2: pa is not 2mpage aligned")); + CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); + + /* + * Transfer the first page's pv entry for this mapping to the 2mpage's + * pv list. Aside from avoiding the cost of a call to get_pv_entry(), + * a transfer avoids the possibility that get_pv_entry() calls + * reclaim_pv_chunk() and that reclaim_pv_chunk() removes one of the + * mappings that is being promoted. + */ + m = PHYS_TO_VM_PAGE(pa); + va = va & ~L2_OFFSET; + pv = pmap_pvh_remove(&m->md, pmap, va); + KASSERT(pv != NULL, ("pmap_pv_promote_l2: pv not found")); + pvh = pa_to_pvh(pa); + TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); + pvh->pv_gen++; + /* Free the remaining NPTEPG - 1 pv entries. */ + va_last = va + L2_SIZE - PAGE_SIZE; + do { + m++; + va += PAGE_SIZE; + pmap_pvh_free(&m->md, pmap, va); + } while (va < va_last); +} + +/* + * Tries to promote the 512, contiguous 4KB page mappings that are within a + * single level 2 table entry to a single 2MB page mapping. For promotion + * to occur, two conditions must be met: (1) the 4KB page mappings must map + * aligned, contiguous physical memory and (2) the 4KB page mappings must have + * identical characteristics. + */ +static void +pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va, + struct rwlock **lockp) +{ + pt_entry_t *firstl3, *l3, newl2, oldl3, pa; + vm_page_t mpte; + vm_offset_t sva; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + PMAP_ASSERT_STAGE1(pmap); + + sva = va & ~L2_OFFSET; + firstl3 = pmap_l2_to_l3(l2, sva); + newl2 = pmap_load(firstl3); + +setl2: + if (((newl2 & (~ATTR_MASK | ATTR_AF)) & L2_OFFSET) != ATTR_AF) { + atomic_add_long(&pmap_l2_p_failures, 1); + CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx" + " in pmap %p", va, pmap); + return; + } + + if ((newl2 & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) == + (ATTR_S1_AP(ATTR_S1_AP_RO) | ATTR_SW_DBM)) { + if (!atomic_fcmpset_64(l2, &newl2, newl2 & ~ATTR_SW_DBM)) + goto setl2; + newl2 &= ~ATTR_SW_DBM; + } + + pa = newl2 + L2_SIZE - PAGE_SIZE; + for (l3 = firstl3 + NL3PG - 1; l3 > firstl3; l3--) { + oldl3 = pmap_load(l3); +setl3: + if ((oldl3 & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) == + (ATTR_S1_AP(ATTR_S1_AP_RO) | ATTR_SW_DBM)) { + if (!atomic_fcmpset_64(l3, &oldl3, oldl3 & + ~ATTR_SW_DBM)) + goto setl3; + oldl3 &= ~ATTR_SW_DBM; + } + if (oldl3 != pa) { + atomic_add_long(&pmap_l2_p_failures, 1); + CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx" + " in pmap %p", va, pmap); + return; + } + pa -= PAGE_SIZE; + } + + /* + * Save the page table page in its current state until the L2 + * mapping the superpage is demoted by pmap_demote_l2() or + * destroyed by pmap_remove_l3(). + */ + mpte = PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK); + KASSERT(mpte >= vm_page_array && + mpte < &vm_page_array[vm_page_array_size], + ("pmap_promote_l2: page table page is out of range")); + KASSERT(mpte->pindex == pmap_l2_pindex(va), + ("pmap_promote_l2: page table page's pindex is wrong")); + if (pmap_insert_pt_page(pmap, mpte, true)) { + atomic_add_long(&pmap_l2_p_failures, 1); + CTR2(KTR_PMAP, + "pmap_promote_l2: failure for va %#lx in pmap %p", va, + pmap); + return; + } + + if ((newl2 & ATTR_SW_MANAGED) != 0) + pmap_pv_promote_l2(pmap, va, newl2 & ~ATTR_MASK, lockp); + + newl2 &= ~ATTR_DESCR_MASK; + newl2 |= L2_BLOCK; + + pmap_update_entry(pmap, l2, newl2, sva, L2_SIZE); + + atomic_add_long(&pmap_l2_promotions, 1); + CTR2(KTR_PMAP, "pmap_promote_l2: success for va %#lx in pmap %p", va, + pmap); +} +#endif /* VM_NRESERVLEVEL > 0 */ + +/* + * Insert the given physical page (p) at + * the specified virtual address (v) in the + * target physical map with the protection requested. + * + * If specified, the page will be wired down, meaning + * that the related pte can not be reclaimed. + * + * NB: This is the only routine which MAY NOT lazy-evaluate + * or lose information. That is, this routine must actually + * insert this page into the given map NOW. + */ +int +pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, + u_int flags, int8_t psind) +{ + struct rwlock *lock; + pd_entry_t *pde; + pt_entry_t new_l3, orig_l3; + pt_entry_t *l2, *l3; + pv_entry_t pv; + vm_paddr_t opa, pa; + vm_page_t mpte, om; + boolean_t nosleep; + int lvl, rv; + + va = trunc_page(va); + if ((m->oflags & VPO_UNMANAGED) == 0) + VM_PAGE_OBJECT_BUSY_ASSERT(m); + pa = VM_PAGE_TO_PHYS(m); + new_l3 = (pt_entry_t)(pa | ATTR_DEFAULT | L3_PAGE); + new_l3 |= pmap_pte_memattr(pmap, m->md.pv_memattr); + new_l3 |= pmap_pte_prot(pmap, prot); + + if ((flags & PMAP_ENTER_WIRED) != 0) + new_l3 |= ATTR_SW_WIRED; + if (pmap->pm_stage == PM_STAGE1) { + if (va < VM_MAXUSER_ADDRESS) + new_l3 |= ATTR_S1_AP(ATTR_S1_AP_USER) | ATTR_S1_PXN; + else + new_l3 |= ATTR_S1_UXN; + if (pmap != kernel_pmap) + new_l3 |= ATTR_S1_nG; + } else { + /* + * Clear the access flag on executable mappings, this will be + * set later when the page is accessed. The fault handler is + * required to invalidate the I-cache. + * + * TODO: Switch to the valid flag to allow hardware management + * of the access flag. Much of the pmap code assumes the + * valid flag is set and fails to destroy the old page tables + * correctly if it is clear. + */ + if (prot & VM_PROT_EXECUTE) + new_l3 &= ~ATTR_AF; + } + if ((m->oflags & VPO_UNMANAGED) == 0) { + new_l3 |= ATTR_SW_MANAGED; + if ((prot & VM_PROT_WRITE) != 0) { + new_l3 |= ATTR_SW_DBM; + if ((flags & VM_PROT_WRITE) == 0) { + if (pmap->pm_stage == PM_STAGE1) + new_l3 |= ATTR_S1_AP(ATTR_S1_AP_RO); + else + new_l3 &= + ~ATTR_S2_S2AP(ATTR_S2_S2AP_WRITE); + } + } + } + + CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa); + + lock = NULL; + PMAP_LOCK(pmap); + if (psind == 1) { + /* Assert the required virtual and physical alignment. */ + KASSERT((va & L2_OFFSET) == 0, ("pmap_enter: va unaligned")); + KASSERT(m->psind > 0, ("pmap_enter: m->psind < psind")); + rv = pmap_enter_l2(pmap, va, (new_l3 & ~L3_PAGE) | L2_BLOCK, + flags, m, &lock); + goto out; + } + mpte = NULL; + + /* + * In the case that a page table page is not + * resident, we are creating it here. + */ +retry: + pde = pmap_pde(pmap, va, &lvl); + if (pde != NULL && lvl == 2) { + l3 = pmap_l2_to_l3(pde, va); + if (va < VM_MAXUSER_ADDRESS && mpte == NULL) { + mpte = PHYS_TO_VM_PAGE(pmap_load(pde) & ~ATTR_MASK); + mpte->ref_count++; + } + goto havel3; + } else if (pde != NULL && lvl == 1) { + l2 = pmap_l1_to_l2(pde, va); + if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK && + (l3 = pmap_demote_l2_locked(pmap, l2, va, &lock)) != NULL) { + l3 = &l3[pmap_l3_index(va)]; + if (va < VM_MAXUSER_ADDRESS) { + mpte = PHYS_TO_VM_PAGE( + pmap_load(l2) & ~ATTR_MASK); + mpte->ref_count++; + } + goto havel3; + } + /* We need to allocate an L3 table. */ + } + if (va < VM_MAXUSER_ADDRESS) { + nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0; + + /* + * We use _pmap_alloc_l3() instead of pmap_alloc_l3() in order + * to handle the possibility that a superpage mapping for "va" + * was created while we slept. + */ + mpte = _pmap_alloc_l3(pmap, pmap_l2_pindex(va), + nosleep ? NULL : &lock); + if (mpte == NULL && nosleep) { + CTR0(KTR_PMAP, "pmap_enter: mpte == NULL"); + rv = KERN_RESOURCE_SHORTAGE; + goto out; + } + goto retry; + } else + panic("pmap_enter: missing L3 table for kernel va %#lx", va); + +havel3: + orig_l3 = pmap_load(l3); + opa = orig_l3 & ~ATTR_MASK; + pv = NULL; + + /* + * Is the specified virtual address already mapped? + */ + if (pmap_l3_valid(orig_l3)) { + /* + * Only allow adding new entries on stage 2 tables for now. + * This simplifies cache invalidation as we may need to call + * into EL2 to perform such actions. + */ + PMAP_ASSERT_STAGE1(pmap); + /* + * Wiring change, just update stats. We don't worry about + * wiring PT pages as they remain resident as long as there + * are valid mappings in them. Hence, if a user page is wired, + * the PT page will be also. + */ + if ((flags & PMAP_ENTER_WIRED) != 0 && + (orig_l3 & ATTR_SW_WIRED) == 0) + pmap->pm_stats.wired_count++; + else if ((flags & PMAP_ENTER_WIRED) == 0 && + (orig_l3 & ATTR_SW_WIRED) != 0) + pmap->pm_stats.wired_count--; + + /* + * Remove the extra PT page reference. + */ + if (mpte != NULL) { + mpte->ref_count--; + KASSERT(mpte->ref_count > 0, + ("pmap_enter: missing reference to page table page," + " va: 0x%lx", va)); + } + + /* + * Has the physical page changed? + */ + if (opa == pa) { + /* + * No, might be a protection or wiring change. + */ + if ((orig_l3 & ATTR_SW_MANAGED) != 0 && + (new_l3 & ATTR_SW_DBM) != 0) + vm_page_aflag_set(m, PGA_WRITEABLE); + goto validate; + } + + /* + * The physical page has changed. Temporarily invalidate + * the mapping. + */ + orig_l3 = pmap_load_clear(l3); + KASSERT((orig_l3 & ~ATTR_MASK) == opa, + ("pmap_enter: unexpected pa update for %#lx", va)); + if ((orig_l3 & ATTR_SW_MANAGED) != 0) { + om = PHYS_TO_VM_PAGE(opa); + + /* + * The pmap lock is sufficient to synchronize with + * concurrent calls to pmap_page_test_mappings() and + * pmap_ts_referenced(). + */ + if (pmap_pte_dirty(pmap, orig_l3)) + vm_page_dirty(om); + if ((orig_l3 & ATTR_AF) != 0) { + pmap_invalidate_page(pmap, va); + vm_page_aflag_set(om, PGA_REFERENCED); + } + CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa); + pv = pmap_pvh_remove(&om->md, pmap, va); + if ((m->oflags & VPO_UNMANAGED) != 0) + free_pv_entry(pmap, pv); + if ((om->a.flags & PGA_WRITEABLE) != 0 && + TAILQ_EMPTY(&om->md.pv_list) && + ((om->flags & PG_FICTITIOUS) != 0 || + TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list))) + vm_page_aflag_clear(om, PGA_WRITEABLE); + } else { + KASSERT((orig_l3 & ATTR_AF) != 0, + ("pmap_enter: unmanaged mapping lacks ATTR_AF")); + pmap_invalidate_page(pmap, va); + } + orig_l3 = 0; + } else { + /* + * Increment the counters. + */ + if ((new_l3 & ATTR_SW_WIRED) != 0) + pmap->pm_stats.wired_count++; + pmap_resident_count_inc(pmap, 1); + } + /* + * Enter on the PV list if part of our managed memory. + */ + if ((m->oflags & VPO_UNMANAGED) == 0) { + if (pv == NULL) { + pv = get_pv_entry(pmap, &lock); + pv->pv_va = va; + } + CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); + m->md.pv_gen++; + if ((new_l3 & ATTR_SW_DBM) != 0) + vm_page_aflag_set(m, PGA_WRITEABLE); + } + +validate: + if (pmap->pm_stage == PM_STAGE1) { + /* + * Sync icache if exec permission and attribute + * VM_MEMATTR_WRITE_BACK is set. Do it now, before the mapping + * is stored and made valid for hardware table walk. If done + * later, then other can access this page before caches are + * properly synced. Don't do it for kernel memory which is + * mapped with exec permission even if the memory isn't going + * to hold executable code. The only time when icache sync is + * needed is after kernel module is loaded and the relocation + * info is processed. And it's done in elf_cpu_load_file(). + */ + if ((prot & VM_PROT_EXECUTE) && pmap != kernel_pmap && + m->md.pv_memattr == VM_MEMATTR_WRITE_BACK && + (opa != pa || (orig_l3 & ATTR_S1_XN))) { + PMAP_ASSERT_STAGE1(pmap); + cpu_icache_sync_range(PHYS_TO_DMAP(pa), PAGE_SIZE); + } + } else { + cpu_dcache_wb_range(PHYS_TO_DMAP(pa), PAGE_SIZE); + } + + /* + * Update the L3 entry + */ + if (pmap_l3_valid(orig_l3)) { + PMAP_ASSERT_STAGE1(pmap); + KASSERT(opa == pa, ("pmap_enter: invalid update")); + if ((orig_l3 & ~ATTR_AF) != (new_l3 & ~ATTR_AF)) { + /* same PA, different attributes */ + orig_l3 = pmap_load_store(l3, new_l3); + pmap_invalidate_page(pmap, va); + if ((orig_l3 & ATTR_SW_MANAGED) != 0 && + pmap_pte_dirty(pmap, orig_l3)) + vm_page_dirty(m); + } else { + /* + * orig_l3 == new_l3 + * This can happens if multiple threads simultaneously + * access not yet mapped page. This bad for performance + * since this can cause full demotion-NOP-promotion + * cycle. + * Another possible reasons are: + * - VM and pmap memory layout are diverged + * - tlb flush is missing somewhere and CPU doesn't see + * actual mapping. + */ + CTR4(KTR_PMAP, "%s: already mapped page - " + "pmap %p va 0x%#lx pte 0x%lx", + __func__, pmap, va, new_l3); + } + } else { + /* New mapping */ + pmap_store(l3, new_l3); + dsb(ishst); + } + +#if VM_NRESERVLEVEL > 0 + /* + * Try to promote from level 3 pages to a level 2 superpage. This + * currently only works on stage 1 pmaps as pmap_promote_l2 looks at + * stage 1 specific fields and performs a break-before-make sequence + * that is incorrect a stage 2 pmap. + */ + if ((mpte == NULL || mpte->ref_count == NL3PG) && + pmap_ps_enabled(pmap) && pmap->pm_stage == PM_STAGE1 && + (m->flags & PG_FICTITIOUS) == 0 && + vm_reserv_level_iffullpop(m) == 0) { + pmap_promote_l2(pmap, pde, va, &lock); + } +#endif + + rv = KERN_SUCCESS; +out: + if (lock != NULL) + rw_wunlock(lock); + PMAP_UNLOCK(pmap); + return (rv); +} + +/* + * Tries to create a read- and/or execute-only 2MB page mapping. Returns true + * if successful. Returns false if (1) a page table page cannot be allocated + * without sleeping, (2) a mapping already exists at the specified virtual + * address, or (3) a PV entry cannot be allocated without reclaiming another + * PV entry. + */ +static bool +pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, + struct rwlock **lockp) +{ + pd_entry_t new_l2; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + PMAP_ASSERT_STAGE1(pmap); + + new_l2 = (pd_entry_t)(VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | + ATTR_S1_IDX(m->md.pv_memattr) | ATTR_S1_AP(ATTR_S1_AP_RO) | + L2_BLOCK); + if ((m->oflags & VPO_UNMANAGED) == 0) { + new_l2 |= ATTR_SW_MANAGED; + new_l2 &= ~ATTR_AF; + } + if ((prot & VM_PROT_EXECUTE) == 0 || + m->md.pv_memattr == VM_MEMATTR_DEVICE) + new_l2 |= ATTR_S1_XN; + if (va < VM_MAXUSER_ADDRESS) + new_l2 |= ATTR_S1_AP(ATTR_S1_AP_USER) | ATTR_S1_PXN; + else + new_l2 |= ATTR_S1_UXN; + if (pmap != kernel_pmap) + new_l2 |= ATTR_S1_nG; + return (pmap_enter_l2(pmap, va, new_l2, PMAP_ENTER_NOSLEEP | + PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM, NULL, lockp) == + KERN_SUCCESS); +} + +/* + * Returns true if every page table entry in the specified page table is + * zero. + */ +static bool +pmap_every_pte_zero(vm_paddr_t pa) +{ + pt_entry_t *pt_end, *pte; + + KASSERT((pa & PAGE_MASK) == 0, ("pa is misaligned")); + pte = (pt_entry_t *)PHYS_TO_DMAP(pa); + for (pt_end = pte + Ln_ENTRIES; pte < pt_end; pte++) { + if (*pte != 0) + return (false); + } + return (true); +} + +/* + * Tries to create the specified 2MB page mapping. Returns KERN_SUCCESS if + * the mapping was created, and either KERN_FAILURE or KERN_RESOURCE_SHORTAGE + * otherwise. Returns KERN_FAILURE if PMAP_ENTER_NOREPLACE was specified and + * a mapping already exists at the specified virtual address. Returns + * KERN_RESOURCE_SHORTAGE if PMAP_ENTER_NOSLEEP was specified and a page table + * page allocation failed. Returns KERN_RESOURCE_SHORTAGE if + * PMAP_ENTER_NORECLAIM was specified and a PV entry allocation failed. + * + * The parameter "m" is only used when creating a managed, writeable mapping. + */ +static int +pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, u_int flags, + vm_page_t m, struct rwlock **lockp) +{ + struct spglist free; + pd_entry_t *l2, old_l2; + vm_page_t l2pg, mt; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + + if ((l2 = pmap_alloc_l2(pmap, va, &l2pg, (flags & + PMAP_ENTER_NOSLEEP) != 0 ? NULL : lockp)) == NULL) { + CTR2(KTR_PMAP, "pmap_enter_l2: failure for va %#lx in pmap %p", + va, pmap); + return (KERN_RESOURCE_SHORTAGE); + } + + /* + * If there are existing mappings, either abort or remove them. + */ + if ((old_l2 = pmap_load(l2)) != 0) { + KASSERT(l2pg == NULL || l2pg->ref_count > 1, + ("pmap_enter_l2: l2pg's ref count is too low")); + if ((flags & PMAP_ENTER_NOREPLACE) != 0 && (va < + VM_MAXUSER_ADDRESS || (old_l2 & ATTR_DESCR_MASK) == + L2_BLOCK || !pmap_every_pte_zero(old_l2 & ~ATTR_MASK))) { + if (l2pg != NULL) + l2pg->ref_count--; + CTR2(KTR_PMAP, "pmap_enter_l2: failure for va %#lx" + " in pmap %p", va, pmap); + return (KERN_FAILURE); + } + SLIST_INIT(&free); + if ((old_l2 & ATTR_DESCR_MASK) == L2_BLOCK) + (void)pmap_remove_l2(pmap, l2, va, + pmap_load(pmap_l1(pmap, va)), &free, lockp); + else + pmap_remove_l3_range(pmap, old_l2, va, va + L2_SIZE, + &free, lockp); + if (va < VM_MAXUSER_ADDRESS) { + vm_page_free_pages_toq(&free, true); + KASSERT(pmap_load(l2) == 0, + ("pmap_enter_l2: non-zero L2 entry %p", l2)); + } else { + KASSERT(SLIST_EMPTY(&free), + ("pmap_enter_l2: freed kernel page table page")); + + /* + * Both pmap_remove_l2() and pmap_remove_l3_range() + * will leave the kernel page table page zero filled. + * Nonetheless, the TLB could have an intermediate + * entry for the kernel page table page. + */ + mt = PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK); + if (pmap_insert_pt_page(pmap, mt, false)) + panic("pmap_enter_l2: trie insert failed"); + pmap_clear(l2); + pmap_invalidate_page(pmap, va); + } + } + + if ((new_l2 & ATTR_SW_MANAGED) != 0) { + /* + * Abort this mapping if its PV entry could not be created. + */ + if (!pmap_pv_insert_l2(pmap, va, new_l2, flags, lockp)) { + if (l2pg != NULL) + pmap_abort_ptp(pmap, va, l2pg); + CTR2(KTR_PMAP, + "pmap_enter_l2: failure for va %#lx in pmap %p", + va, pmap); + return (KERN_RESOURCE_SHORTAGE); + } + if ((new_l2 & ATTR_SW_DBM) != 0) + for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++) + vm_page_aflag_set(mt, PGA_WRITEABLE); + } + + /* + * Increment counters. + */ + if ((new_l2 & ATTR_SW_WIRED) != 0) + pmap->pm_stats.wired_count += L2_SIZE / PAGE_SIZE; + pmap->pm_stats.resident_count += L2_SIZE / PAGE_SIZE; + + /* + * Map the superpage. + */ + pmap_store(l2, new_l2); + dsb(ishst); + + atomic_add_long(&pmap_l2_mappings, 1); + CTR2(KTR_PMAP, "pmap_enter_l2: success for va %#lx in pmap %p", + va, pmap); + + return (KERN_SUCCESS); +} + +/* + * Maps a sequence of resident pages belonging to the same object. + * The sequence begins with the given page m_start. This page is + * mapped at the given virtual address start. Each subsequent page is + * mapped at a virtual address that is offset from start by the same + * amount as the page is offset from m_start within the object. The + * last page in the sequence is the page with the largest offset from + * m_start that can be mapped at a virtual address less than the given + * virtual address end. Not every virtual page between start and end + * is mapped; only those for which a resident page exists with the + * corresponding offset from m_start are mapped. + */ +void +pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, + vm_page_t m_start, vm_prot_t prot) +{ + struct rwlock *lock; + vm_offset_t va; + vm_page_t m, mpte; + vm_pindex_t diff, psize; + + VM_OBJECT_ASSERT_LOCKED(m_start->object); + + psize = atop(end - start); + mpte = NULL; + m = m_start; + lock = NULL; + PMAP_LOCK(pmap); + while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { + va = start + ptoa(diff); + if ((va & L2_OFFSET) == 0 && va + L2_SIZE <= end && + m->psind == 1 && pmap_ps_enabled(pmap) && + pmap_enter_2mpage(pmap, va, m, prot, &lock)) + m = &m[L2_SIZE / PAGE_SIZE - 1]; + else + mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, + &lock); + m = TAILQ_NEXT(m, listq); + } + if (lock != NULL) + rw_wunlock(lock); + PMAP_UNLOCK(pmap); +} + +/* + * this code makes some *MAJOR* assumptions: + * 1. Current pmap & pmap exists. + * 2. Not wired. + * 3. Read access. + * 4. No page table pages. + * but is *MUCH* faster than pmap_enter... + */ + +void +pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) +{ + struct rwlock *lock; + + lock = NULL; + PMAP_LOCK(pmap); + (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock); + if (lock != NULL) + rw_wunlock(lock); + PMAP_UNLOCK(pmap); +} + +static vm_page_t +pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, + vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp) +{ + pd_entry_t *pde; + pt_entry_t *l2, *l3, l3_val; + vm_paddr_t pa; + int lvl; + + KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || + (m->oflags & VPO_UNMANAGED) != 0, + ("pmap_enter_quick_locked: managed mapping within the clean submap")); + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + PMAP_ASSERT_STAGE1(pmap); + + CTR2(KTR_PMAP, "pmap_enter_quick_locked: %p %lx", pmap, va); + /* + * In the case that a page table page is not + * resident, we are creating it here. + */ + if (va < VM_MAXUSER_ADDRESS) { + vm_pindex_t l2pindex; + + /* + * Calculate pagetable page index + */ + l2pindex = pmap_l2_pindex(va); + if (mpte && (mpte->pindex == l2pindex)) { + mpte->ref_count++; + } else { + /* + * Get the l2 entry + */ + pde = pmap_pde(pmap, va, &lvl); + + /* + * If the page table page is mapped, we just increment + * the hold count, and activate it. Otherwise, we + * attempt to allocate a page table page. If this + * attempt fails, we don't retry. Instead, we give up. + */ + if (lvl == 1) { + l2 = pmap_l1_to_l2(pde, va); + if ((pmap_load(l2) & ATTR_DESCR_MASK) == + L2_BLOCK) + return (NULL); + } + if (lvl == 2 && pmap_load(pde) != 0) { + mpte = + PHYS_TO_VM_PAGE(pmap_load(pde) & ~ATTR_MASK); + mpte->ref_count++; + } else { + /* + * Pass NULL instead of the PV list lock + * pointer, because we don't intend to sleep. + */ + mpte = _pmap_alloc_l3(pmap, l2pindex, NULL); + if (mpte == NULL) + return (mpte); + } + } + l3 = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte)); + l3 = &l3[pmap_l3_index(va)]; + } else { + mpte = NULL; + pde = pmap_pde(kernel_pmap, va, &lvl); + KASSERT(pde != NULL, + ("pmap_enter_quick_locked: Invalid page entry, va: 0x%lx", + va)); + KASSERT(lvl == 2, + ("pmap_enter_quick_locked: Invalid level %d", lvl)); + l3 = pmap_l2_to_l3(pde, va); + } + + /* + * Abort if a mapping already exists. + */ + if (pmap_load(l3) != 0) { + if (mpte != NULL) + mpte->ref_count--; + return (NULL); + } + + /* + * Enter on the PV list if part of our managed memory. + */ + if ((m->oflags & VPO_UNMANAGED) == 0 && + !pmap_try_insert_pv_entry(pmap, va, m, lockp)) { + if (mpte != NULL) + pmap_abort_ptp(pmap, va, mpte); + return (NULL); + } + + /* + * Increment counters + */ + pmap_resident_count_inc(pmap, 1); + + pa = VM_PAGE_TO_PHYS(m); + l3_val = pa | ATTR_DEFAULT | ATTR_S1_IDX(m->md.pv_memattr) | + ATTR_S1_AP(ATTR_S1_AP_RO) | L3_PAGE; + if ((prot & VM_PROT_EXECUTE) == 0 || + m->md.pv_memattr == VM_MEMATTR_DEVICE) + l3_val |= ATTR_S1_XN; + if (va < VM_MAXUSER_ADDRESS) + l3_val |= ATTR_S1_AP(ATTR_S1_AP_USER) | ATTR_S1_PXN; + else + l3_val |= ATTR_S1_UXN; + if (pmap != kernel_pmap) + l3_val |= ATTR_S1_nG; + + /* + * Now validate mapping with RO protection + */ + if ((m->oflags & VPO_UNMANAGED) == 0) { + l3_val |= ATTR_SW_MANAGED; + l3_val &= ~ATTR_AF; + } + + /* Sync icache before the mapping is stored to PTE */ + if ((prot & VM_PROT_EXECUTE) && pmap != kernel_pmap && + m->md.pv_memattr == VM_MEMATTR_WRITE_BACK) + cpu_icache_sync_range(PHYS_TO_DMAP(pa), PAGE_SIZE); + + pmap_store(l3, l3_val); + dsb(ishst); + + return (mpte); +} + +/* + * This code maps large physical mmap regions into the + * processor address space. Note that some shortcuts + * are taken, but the code works. + */ +void +pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, + vm_pindex_t pindex, vm_size_t size) +{ + + VM_OBJECT_ASSERT_WLOCKED(object); + KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, + ("pmap_object_init_pt: non-device object")); +} + +/* + * Clear the wired attribute from the mappings for the specified range of + * addresses in the given pmap. Every valid mapping within that range + * must have the wired attribute set. In contrast, invalid mappings + * cannot have the wired attribute set, so they are ignored. + * + * The wired attribute of the page table entry is not a hardware feature, + * so there is no need to invalidate any TLB entries. + */ +void +pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) +{ + vm_offset_t va_next; + pd_entry_t *l0, *l1, *l2; + pt_entry_t *l3; + + PMAP_LOCK(pmap); + for (; sva < eva; sva = va_next) { + l0 = pmap_l0(pmap, sva); + if (pmap_load(l0) == 0) { + va_next = (sva + L0_SIZE) & ~L0_OFFSET; + if (va_next < sva) + va_next = eva; + continue; + } + + l1 = pmap_l0_to_l1(l0, sva); + if (pmap_load(l1) == 0) { + va_next = (sva + L1_SIZE) & ~L1_OFFSET; + if (va_next < sva) + va_next = eva; + continue; + } + + va_next = (sva + L2_SIZE) & ~L2_OFFSET; + if (va_next < sva) + va_next = eva; + + l2 = pmap_l1_to_l2(l1, sva); + if (pmap_load(l2) == 0) + continue; + + if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK) { + if ((pmap_load(l2) & ATTR_SW_WIRED) == 0) + panic("pmap_unwire: l2 %#jx is missing " + "ATTR_SW_WIRED", (uintmax_t)pmap_load(l2)); + + /* + * Are we unwiring the entire large page? If not, + * demote the mapping and fall through. + */ + if (sva + L2_SIZE == va_next && eva >= va_next) { + pmap_clear_bits(l2, ATTR_SW_WIRED); + pmap->pm_stats.wired_count -= L2_SIZE / + PAGE_SIZE; + continue; + } else if (pmap_demote_l2(pmap, l2, sva) == NULL) + panic("pmap_unwire: demotion failed"); + } + KASSERT((pmap_load(l2) & ATTR_DESCR_MASK) == L2_TABLE, + ("pmap_unwire: Invalid l2 entry after demotion")); + + if (va_next > eva) + va_next = eva; + for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, + sva += L3_SIZE) { + if (pmap_load(l3) == 0) + continue; + if ((pmap_load(l3) & ATTR_SW_WIRED) == 0) + panic("pmap_unwire: l3 %#jx is missing " + "ATTR_SW_WIRED", (uintmax_t)pmap_load(l3)); + + /* + * ATTR_SW_WIRED must be cleared atomically. Although + * the pmap lock synchronizes access to ATTR_SW_WIRED, + * the System MMU may write to the entry concurrently. + */ + pmap_clear_bits(l3, ATTR_SW_WIRED); + pmap->pm_stats.wired_count--; + } + } + PMAP_UNLOCK(pmap); +} + +/* + * Copy the range specified by src_addr/len + * from the source map to the range dst_addr/len + * in the destination map. + * + * This routine is only advisory and need not do anything. + * + * Because the executable mappings created by this routine are copied, + * it should not have to flush the instruction cache. + */ +void +pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, + vm_offset_t src_addr) +{ + struct rwlock *lock; + pd_entry_t *l0, *l1, *l2, srcptepaddr; + pt_entry_t *dst_pte, mask, nbits, ptetemp, *src_pte; + vm_offset_t addr, end_addr, va_next; + vm_page_t dst_l2pg, dstmpte, srcmpte; + + PMAP_ASSERT_STAGE1(dst_pmap); + PMAP_ASSERT_STAGE1(src_pmap); + + if (dst_addr != src_addr) + return; + end_addr = src_addr + len; + lock = NULL; + if (dst_pmap < src_pmap) { + PMAP_LOCK(dst_pmap); + PMAP_LOCK(src_pmap); + } else { + PMAP_LOCK(src_pmap); + PMAP_LOCK(dst_pmap); + } + for (addr = src_addr; addr < end_addr; addr = va_next) { + l0 = pmap_l0(src_pmap, addr); + if (pmap_load(l0) == 0) { + va_next = (addr + L0_SIZE) & ~L0_OFFSET; + if (va_next < addr) + va_next = end_addr; + continue; + } + l1 = pmap_l0_to_l1(l0, addr); + if (pmap_load(l1) == 0) { + va_next = (addr + L1_SIZE) & ~L1_OFFSET; + if (va_next < addr) + va_next = end_addr; + continue; + } + va_next = (addr + L2_SIZE) & ~L2_OFFSET; + if (va_next < addr) + va_next = end_addr; + l2 = pmap_l1_to_l2(l1, addr); + srcptepaddr = pmap_load(l2); + if (srcptepaddr == 0) + continue; + if ((srcptepaddr & ATTR_DESCR_MASK) == L2_BLOCK) { + if ((addr & L2_OFFSET) != 0 || + addr + L2_SIZE > end_addr) + continue; + l2 = pmap_alloc_l2(dst_pmap, addr, &dst_l2pg, NULL); + if (l2 == NULL) + break; + if (pmap_load(l2) == 0 && + ((srcptepaddr & ATTR_SW_MANAGED) == 0 || + pmap_pv_insert_l2(dst_pmap, addr, srcptepaddr, + PMAP_ENTER_NORECLAIM, &lock))) { + mask = ATTR_AF | ATTR_SW_WIRED; + nbits = 0; + if ((srcptepaddr & ATTR_SW_DBM) != 0) + nbits |= ATTR_S1_AP_RW_BIT; + pmap_store(l2, (srcptepaddr & ~mask) | nbits); + pmap_resident_count_inc(dst_pmap, L2_SIZE / + PAGE_SIZE); + atomic_add_long(&pmap_l2_mappings, 1); + } else + pmap_abort_ptp(dst_pmap, addr, dst_l2pg); + continue; + } + KASSERT((srcptepaddr & ATTR_DESCR_MASK) == L2_TABLE, + ("pmap_copy: invalid L2 entry")); + srcptepaddr &= ~ATTR_MASK; + srcmpte = PHYS_TO_VM_PAGE(srcptepaddr); + KASSERT(srcmpte->ref_count > 0, + ("pmap_copy: source page table page is unused")); + if (va_next > end_addr) + va_next = end_addr; + src_pte = (pt_entry_t *)PHYS_TO_DMAP(srcptepaddr); + src_pte = &src_pte[pmap_l3_index(addr)]; + dstmpte = NULL; + for (; addr < va_next; addr += PAGE_SIZE, src_pte++) { + ptetemp = pmap_load(src_pte); + + /* + * We only virtual copy managed pages. + */ + if ((ptetemp & ATTR_SW_MANAGED) == 0) + continue; + + if (dstmpte != NULL) { + KASSERT(dstmpte->pindex == pmap_l2_pindex(addr), + ("dstmpte pindex/addr mismatch")); + dstmpte->ref_count++; + } else if ((dstmpte = pmap_alloc_l3(dst_pmap, addr, + NULL)) == NULL) + goto out; + dst_pte = (pt_entry_t *) + PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dstmpte)); + dst_pte = &dst_pte[pmap_l3_index(addr)]; + if (pmap_load(dst_pte) == 0 && + pmap_try_insert_pv_entry(dst_pmap, addr, + PHYS_TO_VM_PAGE(ptetemp & ~ATTR_MASK), &lock)) { + /* + * Clear the wired, modified, and accessed + * (referenced) bits during the copy. + */ + mask = ATTR_AF | ATTR_SW_WIRED; + nbits = 0; + if ((ptetemp & ATTR_SW_DBM) != 0) + nbits |= ATTR_S1_AP_RW_BIT; + pmap_store(dst_pte, (ptetemp & ~mask) | nbits); + pmap_resident_count_inc(dst_pmap, 1); + } else { + pmap_abort_ptp(dst_pmap, addr, dstmpte); + goto out; + } + /* Have we copied all of the valid mappings? */ + if (dstmpte->ref_count >= srcmpte->ref_count) + break; + } + } +out: + /* + * XXX This barrier may not be needed because the destination pmap is + * not active. + */ + dsb(ishst); + + if (lock != NULL) + rw_wunlock(lock); + PMAP_UNLOCK(src_pmap); + PMAP_UNLOCK(dst_pmap); +} + +/* + * pmap_zero_page zeros the specified hardware page by mapping + * the page into KVM and using bzero to clear its contents. + */ +void +pmap_zero_page(vm_page_t m) +{ + vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); + + pagezero((void *)va); +} + +/* + * pmap_zero_page_area zeros the specified hardware page by mapping + * the page into KVM and using bzero to clear its contents. + * + * off and size may not cover an area beyond a single hardware page. + */ +void +pmap_zero_page_area(vm_page_t m, int off, int size) +{ + vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); + + if (off == 0 && size == PAGE_SIZE) + pagezero((void *)va); + else + bzero((char *)va + off, size); +} + +/* + * pmap_copy_page copies the specified (machine independent) + * page by mapping the page into virtual memory and using + * bcopy to copy the page, one machine dependent page at a + * time. + */ +void +pmap_copy_page(vm_page_t msrc, vm_page_t mdst) +{ + vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc)); + vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst)); + + pagecopy((void *)src, (void *)dst); +} + +int unmapped_buf_allowed = 1; + +void +pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], + vm_offset_t b_offset, int xfersize) +{ + void *a_cp, *b_cp; + vm_page_t m_a, m_b; + vm_paddr_t p_a, p_b; + vm_offset_t a_pg_offset, b_pg_offset; + int cnt; + + while (xfersize > 0) { + a_pg_offset = a_offset & PAGE_MASK; + m_a = ma[a_offset >> PAGE_SHIFT]; + p_a = m_a->phys_addr; + b_pg_offset = b_offset & PAGE_MASK; + m_b = mb[b_offset >> PAGE_SHIFT]; + p_b = m_b->phys_addr; + cnt = min(xfersize, PAGE_SIZE - a_pg_offset); + cnt = min(cnt, PAGE_SIZE - b_pg_offset); + if (__predict_false(!PHYS_IN_DMAP(p_a))) { + panic("!DMAP a %lx", p_a); + } else { + a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset; + } + if (__predict_false(!PHYS_IN_DMAP(p_b))) { + panic("!DMAP b %lx", p_b); + } else { + b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset; + } + bcopy(a_cp, b_cp, cnt); + a_offset += cnt; + b_offset += cnt; + xfersize -= cnt; + } +} + +vm_offset_t +pmap_quick_enter_page(vm_page_t m) +{ + + return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m))); +} + +void +pmap_quick_remove_page(vm_offset_t addr) +{ +} + +/* + * Returns true if the pmap's pv is one of the first + * 16 pvs linked to from this page. This count may + * be changed upwards or downwards in the future; it + * is only necessary that true be returned for a small + * subset of pmaps for proper page aging. + */ +boolean_t +pmap_page_exists_quick(pmap_t pmap, vm_page_t m) +{ + struct md_page *pvh; + struct rwlock *lock; + pv_entry_t pv; + int loops = 0; + boolean_t rv; + + KASSERT((m->oflags & VPO_UNMANAGED) == 0, + ("pmap_page_exists_quick: page %p is not managed", m)); + rv = FALSE; + lock = VM_PAGE_TO_PV_LIST_LOCK(m); + rw_rlock(lock); + TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { + if (PV_PMAP(pv) == pmap) { + rv = TRUE; + break; + } + loops++; + if (loops >= 16) + break; + } + if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) { + pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { + if (PV_PMAP(pv) == pmap) { + rv = TRUE; + break; + } + loops++; + if (loops >= 16) + break; + } + } + rw_runlock(lock); + return (rv); +} + +/* + * pmap_page_wired_mappings: + * + * Return the number of managed mappings to the given physical page + * that are wired. + */ +int +pmap_page_wired_mappings(vm_page_t m) +{ + struct rwlock *lock; + struct md_page *pvh; + pmap_t pmap; + pt_entry_t *pte; + pv_entry_t pv; + int count, lvl, md_gen, pvh_gen; + + if ((m->oflags & VPO_UNMANAGED) != 0) + return (0); + lock = VM_PAGE_TO_PV_LIST_LOCK(m); + rw_rlock(lock); +restart: + count = 0; + TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { + pmap = PV_PMAP(pv); + if (!PMAP_TRYLOCK(pmap)) { + md_gen = m->md.pv_gen; + rw_runlock(lock); + PMAP_LOCK(pmap); + rw_rlock(lock); + if (md_gen != m->md.pv_gen) { + PMAP_UNLOCK(pmap); + goto restart; + } + } + pte = pmap_pte(pmap, pv->pv_va, &lvl); + if (pte != NULL && (pmap_load(pte) & ATTR_SW_WIRED) != 0) + count++; + PMAP_UNLOCK(pmap); + } + if ((m->flags & PG_FICTITIOUS) == 0) { + pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { + pmap = PV_PMAP(pv); + if (!PMAP_TRYLOCK(pmap)) { + md_gen = m->md.pv_gen; + pvh_gen = pvh->pv_gen; + rw_runlock(lock); + PMAP_LOCK(pmap); + rw_rlock(lock); + if (md_gen != m->md.pv_gen || + pvh_gen != pvh->pv_gen) { + PMAP_UNLOCK(pmap); + goto restart; + } + } + pte = pmap_pte(pmap, pv->pv_va, &lvl); + if (pte != NULL && + (pmap_load(pte) & ATTR_SW_WIRED) != 0) + count++; + PMAP_UNLOCK(pmap); + } + } + rw_runlock(lock); + return (count); +} + +/* + * Returns true if the given page is mapped individually or as part of + * a 2mpage. Otherwise, returns false. + */ +bool +pmap_page_is_mapped(vm_page_t m) +{ + struct rwlock *lock; + bool rv; + + if ((m->oflags & VPO_UNMANAGED) != 0) + return (false); + lock = VM_PAGE_TO_PV_LIST_LOCK(m); + rw_rlock(lock); + rv = !TAILQ_EMPTY(&m->md.pv_list) || + ((m->flags & PG_FICTITIOUS) == 0 && + !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list)); + rw_runlock(lock); + return (rv); +} + +/* + * Destroy all managed, non-wired mappings in the given user-space + * pmap. This pmap cannot be active on any processor besides the + * caller. + * + * This function cannot be applied to the kernel pmap. Moreover, it + * is not intended for general use. It is only to be used during + * process termination. Consequently, it can be implemented in ways + * that make it faster than pmap_remove(). First, it can more quickly + * destroy mappings by iterating over the pmap's collection of PV + * entries, rather than searching the page table. Second, it doesn't + * have to test and clear the page table entries atomically, because + * no processor is currently accessing the user address space. In + * particular, a page table entry's dirty bit won't change state once + * this function starts. + */ +void +pmap_remove_pages(pmap_t pmap) +{ + pd_entry_t *pde; + pt_entry_t *pte, tpte; + struct spglist free; + vm_page_t m, ml3, mt; + pv_entry_t pv; + struct md_page *pvh; + struct pv_chunk *pc, *npc; + struct rwlock *lock; + int64_t bit; + uint64_t inuse, bitmask; + int allfree, field, freed, idx, lvl; + vm_paddr_t pa; + + KASSERT(pmap == PCPU_GET(curpmap), ("non-current pmap %p", pmap)); + + lock = NULL; + + SLIST_INIT(&free); + PMAP_LOCK(pmap); + TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { + allfree = 1; + freed = 0; + for (field = 0; field < _NPCM; field++) { + inuse = ~pc->pc_map[field] & pc_freemask[field]; + while (inuse != 0) { + bit = ffsl(inuse) - 1; + bitmask = 1UL << bit; + idx = field * 64 + bit; + pv = &pc->pc_pventry[idx]; + inuse &= ~bitmask; + + pde = pmap_pde(pmap, pv->pv_va, &lvl); + KASSERT(pde != NULL, + ("Attempting to remove an unmapped page")); + + switch(lvl) { + case 1: + pte = pmap_l1_to_l2(pde, pv->pv_va); + tpte = pmap_load(pte); + KASSERT((tpte & ATTR_DESCR_MASK) == + L2_BLOCK, + ("Attempting to remove an invalid " + "block: %lx", tpte)); + break; + case 2: + pte = pmap_l2_to_l3(pde, pv->pv_va); + tpte = pmap_load(pte); + KASSERT((tpte & ATTR_DESCR_MASK) == + L3_PAGE, + ("Attempting to remove an invalid " + "page: %lx", tpte)); + break; + default: + panic( + "Invalid page directory level: %d", + lvl); + } + +/* + * We cannot remove wired pages from a process' mapping at this time + */ + if (tpte & ATTR_SW_WIRED) { + allfree = 0; + continue; + } + + pa = tpte & ~ATTR_MASK; + + m = PHYS_TO_VM_PAGE(pa); + KASSERT(m->phys_addr == pa, + ("vm_page_t %p phys_addr mismatch %016jx %016jx", + m, (uintmax_t)m->phys_addr, + (uintmax_t)tpte)); + + KASSERT((m->flags & PG_FICTITIOUS) != 0 || + m < &vm_page_array[vm_page_array_size], + ("pmap_remove_pages: bad pte %#jx", + (uintmax_t)tpte)); + + /* + * Because this pmap is not active on other + * processors, the dirty bit cannot have + * changed state since we last loaded pte. + */ + pmap_clear(pte); + + /* + * Update the vm_page_t clean/reference bits. + */ + if (pmap_pte_dirty(pmap, tpte)) { + switch (lvl) { + case 1: + for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++) + vm_page_dirty(mt); + break; + case 2: + vm_page_dirty(m); + break; + } + } + + CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m); + + /* Mark free */ + pc->pc_map[field] |= bitmask; + switch (lvl) { + case 1: + pmap_resident_count_dec(pmap, + L2_SIZE / PAGE_SIZE); + pvh = pa_to_pvh(tpte & ~ATTR_MASK); + TAILQ_REMOVE(&pvh->pv_list, pv,pv_next); + pvh->pv_gen++; + if (TAILQ_EMPTY(&pvh->pv_list)) { + for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++) + if ((mt->a.flags & PGA_WRITEABLE) != 0 && + TAILQ_EMPTY(&mt->md.pv_list)) + vm_page_aflag_clear(mt, PGA_WRITEABLE); + } + ml3 = pmap_remove_pt_page(pmap, + pv->pv_va); + if (ml3 != NULL) { + KASSERT(ml3->valid == VM_PAGE_BITS_ALL, + ("pmap_remove_pages: l3 page not promoted")); + pmap_resident_count_dec(pmap,1); + KASSERT(ml3->ref_count == NL3PG, + ("pmap_remove_pages: l3 page ref count error")); + ml3->ref_count = 0; + pmap_add_delayed_free_list(ml3, + &free, FALSE); + } + break; + case 2: + pmap_resident_count_dec(pmap, 1); + TAILQ_REMOVE(&m->md.pv_list, pv, + pv_next); + m->md.pv_gen++; + if ((m->a.flags & PGA_WRITEABLE) != 0 && + TAILQ_EMPTY(&m->md.pv_list) && + (m->flags & PG_FICTITIOUS) == 0) { + pvh = pa_to_pvh( + VM_PAGE_TO_PHYS(m)); + if (TAILQ_EMPTY(&pvh->pv_list)) + vm_page_aflag_clear(m, + PGA_WRITEABLE); + } + break; + } + pmap_unuse_pt(pmap, pv->pv_va, pmap_load(pde), + &free); + freed++; + } + } + PV_STAT(atomic_add_long(&pv_entry_frees, freed)); + PV_STAT(atomic_add_int(&pv_entry_spare, freed)); + PV_STAT(atomic_subtract_long(&pv_entry_count, freed)); + if (allfree) { + TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); + free_pv_chunk(pc); + } + } + if (lock != NULL) + rw_wunlock(lock); + pmap_invalidate_all(pmap); + PMAP_UNLOCK(pmap); + vm_page_free_pages_toq(&free, true); +} + +/* + * This is used to check if a page has been accessed or modified. + */ +static boolean_t +pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified) +{ + struct rwlock *lock; + pv_entry_t pv; + struct md_page *pvh; + pt_entry_t *pte, mask, value; + pmap_t pmap; + int lvl, md_gen, pvh_gen; + boolean_t rv; + + rv = FALSE; + lock = VM_PAGE_TO_PV_LIST_LOCK(m); + rw_rlock(lock); +restart: + TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { + pmap = PV_PMAP(pv); + PMAP_ASSERT_STAGE1(pmap); + if (!PMAP_TRYLOCK(pmap)) { + md_gen = m->md.pv_gen; + rw_runlock(lock); + PMAP_LOCK(pmap); + rw_rlock(lock); + if (md_gen != m->md.pv_gen) { + PMAP_UNLOCK(pmap); + goto restart; + } + } + pte = pmap_pte(pmap, pv->pv_va, &lvl); + KASSERT(lvl == 3, + ("pmap_page_test_mappings: Invalid level %d", lvl)); + mask = 0; + value = 0; + if (modified) { + mask |= ATTR_S1_AP_RW_BIT; + value |= ATTR_S1_AP(ATTR_S1_AP_RW); + } + if (accessed) { + mask |= ATTR_AF | ATTR_DESCR_MASK; + value |= ATTR_AF | L3_PAGE; + } + rv = (pmap_load(pte) & mask) == value; + PMAP_UNLOCK(pmap); + if (rv) + goto out; + } + if ((m->flags & PG_FICTITIOUS) == 0) { + pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { + pmap = PV_PMAP(pv); + PMAP_ASSERT_STAGE1(pmap); + if (!PMAP_TRYLOCK(pmap)) { + md_gen = m->md.pv_gen; + pvh_gen = pvh->pv_gen; + rw_runlock(lock); + PMAP_LOCK(pmap); + rw_rlock(lock); + if (md_gen != m->md.pv_gen || + pvh_gen != pvh->pv_gen) { + PMAP_UNLOCK(pmap); + goto restart; + } + } + pte = pmap_pte(pmap, pv->pv_va, &lvl); + KASSERT(lvl == 2, + ("pmap_page_test_mappings: Invalid level %d", lvl)); + mask = 0; + value = 0; + if (modified) { + mask |= ATTR_S1_AP_RW_BIT; + value |= ATTR_S1_AP(ATTR_S1_AP_RW); + } + if (accessed) { + mask |= ATTR_AF | ATTR_DESCR_MASK; + value |= ATTR_AF | L2_BLOCK; + } + rv = (pmap_load(pte) & mask) == value; + PMAP_UNLOCK(pmap); + if (rv) + goto out; + } + } +out: + rw_runlock(lock); + return (rv); +} + +/* + * pmap_is_modified: + * + * Return whether or not the specified physical page was modified + * in any physical maps. + */ +boolean_t +pmap_is_modified(vm_page_t m) +{ + + KASSERT((m->oflags & VPO_UNMANAGED) == 0, + ("pmap_is_modified: page %p is not managed", m)); + + /* + * If the page is not busied then this check is racy. + */ + if (!pmap_page_is_write_mapped(m)) + return (FALSE); + return (pmap_page_test_mappings(m, FALSE, TRUE)); +} + +/* + * pmap_is_prefaultable: + * + * Return whether or not the specified virtual address is eligible + * for prefault. + */ +boolean_t +pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) +{ + pt_entry_t *pte; + boolean_t rv; + int lvl; + + rv = FALSE; + PMAP_LOCK(pmap); + pte = pmap_pte(pmap, addr, &lvl); + if (pte != NULL && pmap_load(pte) != 0) { + rv = TRUE; + } + PMAP_UNLOCK(pmap); + return (rv); +} + +/* + * pmap_is_referenced: + * + * Return whether or not the specified physical page was referenced + * in any physical maps. + */ +boolean_t +pmap_is_referenced(vm_page_t m) +{ + + KASSERT((m->oflags & VPO_UNMANAGED) == 0, + ("pmap_is_referenced: page %p is not managed", m)); + return (pmap_page_test_mappings(m, TRUE, FALSE)); +} + +/* + * Clear the write and modified bits in each of the given page's mappings. + */ +void +pmap_remove_write(vm_page_t m) +{ + struct md_page *pvh; + pmap_t pmap; + struct rwlock *lock; + pv_entry_t next_pv, pv; + pt_entry_t oldpte, *pte; + vm_offset_t va; + int lvl, md_gen, pvh_gen; + + KASSERT((m->oflags & VPO_UNMANAGED) == 0, + ("pmap_remove_write: page %p is not managed", m)); + vm_page_assert_busied(m); + + if (!pmap_page_is_write_mapped(m)) + return; + lock = VM_PAGE_TO_PV_LIST_LOCK(m); + pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : + pa_to_pvh(VM_PAGE_TO_PHYS(m)); +retry_pv_loop: + rw_wlock(lock); + TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { + pmap = PV_PMAP(pv); + PMAP_ASSERT_STAGE1(pmap); + if (!PMAP_TRYLOCK(pmap)) { + pvh_gen = pvh->pv_gen; + rw_wunlock(lock); + PMAP_LOCK(pmap); + rw_wlock(lock); + if (pvh_gen != pvh->pv_gen) { + PMAP_UNLOCK(pmap); + rw_wunlock(lock); + goto retry_pv_loop; + } + } + va = pv->pv_va; + pte = pmap_pte(pmap, pv->pv_va, &lvl); + if ((pmap_load(pte) & ATTR_SW_DBM) != 0) + (void)pmap_demote_l2_locked(pmap, pte, va, &lock); + KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), + ("inconsistent pv lock %p %p for page %p", + lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); + PMAP_UNLOCK(pmap); + } + TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { + pmap = PV_PMAP(pv); + PMAP_ASSERT_STAGE1(pmap); + if (!PMAP_TRYLOCK(pmap)) { + pvh_gen = pvh->pv_gen; + md_gen = m->md.pv_gen; + rw_wunlock(lock); + PMAP_LOCK(pmap); + rw_wlock(lock); + if (pvh_gen != pvh->pv_gen || + md_gen != m->md.pv_gen) { + PMAP_UNLOCK(pmap); + rw_wunlock(lock); + goto retry_pv_loop; + } + } + pte = pmap_pte(pmap, pv->pv_va, &lvl); + oldpte = pmap_load(pte); +retry: + if ((oldpte & ATTR_SW_DBM) != 0) { + if (!atomic_fcmpset_long(pte, &oldpte, + (oldpte | ATTR_S1_AP_RW_BIT) & ~ATTR_SW_DBM)) + goto retry; + if ((oldpte & ATTR_S1_AP_RW_BIT) == + ATTR_S1_AP(ATTR_S1_AP_RW)) + vm_page_dirty(m); + pmap_invalidate_page(pmap, pv->pv_va); + } + PMAP_UNLOCK(pmap); + } + rw_wunlock(lock); + vm_page_aflag_clear(m, PGA_WRITEABLE); +} + +/* + * pmap_ts_referenced: + * + * Return a count of reference bits for a page, clearing those bits. + * It is not necessary for every reference bit to be cleared, but it + * is necessary that 0 only be returned when there are truly no + * reference bits set. + * + * As an optimization, update the page's dirty field if a modified bit is + * found while counting reference bits. This opportunistic update can be + * performed at low cost and can eliminate the need for some future calls + * to pmap_is_modified(). However, since this function stops after + * finding PMAP_TS_REFERENCED_MAX reference bits, it may not detect some + * dirty pages. Those dirty pages will only be detected by a future call + * to pmap_is_modified(). + */ +int +pmap_ts_referenced(vm_page_t m) +{ + struct md_page *pvh; + pv_entry_t pv, pvf; + pmap_t pmap; + struct rwlock *lock; + pd_entry_t *pde, tpde; + pt_entry_t *pte, tpte; + vm_offset_t va; + vm_paddr_t pa; + int cleared, lvl, md_gen, not_cleared, pvh_gen; + struct spglist free; + + KASSERT((m->oflags & VPO_UNMANAGED) == 0, + ("pmap_ts_referenced: page %p is not managed", m)); + SLIST_INIT(&free); + cleared = 0; + pa = VM_PAGE_TO_PHYS(m); + lock = PHYS_TO_PV_LIST_LOCK(pa); + pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(pa); + rw_wlock(lock); +retry: + not_cleared = 0; + if ((pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL) + goto small_mappings; + pv = pvf; + do { + if (pvf == NULL) + pvf = pv; + pmap = PV_PMAP(pv); + if (!PMAP_TRYLOCK(pmap)) { + pvh_gen = pvh->pv_gen; + rw_wunlock(lock); + PMAP_LOCK(pmap); + rw_wlock(lock); + if (pvh_gen != pvh->pv_gen) { + PMAP_UNLOCK(pmap); + goto retry; + } + } + va = pv->pv_va; + pde = pmap_pde(pmap, pv->pv_va, &lvl); + KASSERT(pde != NULL, ("pmap_ts_referenced: no l1 table found")); + KASSERT(lvl == 1, + ("pmap_ts_referenced: invalid pde level %d", lvl)); + tpde = pmap_load(pde); + KASSERT((tpde & ATTR_DESCR_MASK) == L1_TABLE, + ("pmap_ts_referenced: found an invalid l1 table")); + pte = pmap_l1_to_l2(pde, pv->pv_va); + tpte = pmap_load(pte); + if (pmap_pte_dirty(pmap, tpte)) { + /* + * Although "tpte" is mapping a 2MB page, because + * this function is called at a 4KB page granularity, + * we only update the 4KB page under test. + */ + vm_page_dirty(m); + } + + if ((tpte & ATTR_AF) != 0) { + /* + * Since this reference bit is shared by 512 4KB pages, + * it should not be cleared every time it is tested. + * Apply a simple "hash" function on the physical page + * number, the virtual superpage number, and the pmap + * address to select one 4KB page out of the 512 on + * which testing the reference bit will result in + * clearing that reference bit. This function is + * designed to avoid the selection of the same 4KB page + * for every 2MB page mapping. + * + * On demotion, a mapping that hasn't been referenced + * is simply destroyed. To avoid the possibility of a + * subsequent page fault on a demoted wired mapping, + * always leave its reference bit set. Moreover, + * since the superpage is wired, the current state of + * its reference bit won't affect page replacement. + */ + if ((((pa >> PAGE_SHIFT) ^ (pv->pv_va >> L2_SHIFT) ^ + (uintptr_t)pmap) & (Ln_ENTRIES - 1)) == 0 && + (tpte & ATTR_SW_WIRED) == 0) { + pmap_clear_bits(pte, ATTR_AF); + pmap_invalidate_page(pmap, pv->pv_va); + cleared++; + } else + not_cleared++; + } + PMAP_UNLOCK(pmap); + /* Rotate the PV list if it has more than one entry. */ + if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) { + TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); + TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); + pvh->pv_gen++; + } + if (cleared + not_cleared >= PMAP_TS_REFERENCED_MAX) + goto out; + } while ((pv = TAILQ_FIRST(&pvh->pv_list)) != pvf); +small_mappings: + if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL) + goto out; + pv = pvf; + do { + if (pvf == NULL) + pvf = pv; + pmap = PV_PMAP(pv); + if (!PMAP_TRYLOCK(pmap)) { + pvh_gen = pvh->pv_gen; + md_gen = m->md.pv_gen; + rw_wunlock(lock); + PMAP_LOCK(pmap); + rw_wlock(lock); + if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { + PMAP_UNLOCK(pmap); + goto retry; + } + } + pde = pmap_pde(pmap, pv->pv_va, &lvl); + KASSERT(pde != NULL, ("pmap_ts_referenced: no l2 table found")); + KASSERT(lvl == 2, + ("pmap_ts_referenced: invalid pde level %d", lvl)); + tpde = pmap_load(pde); + KASSERT((tpde & ATTR_DESCR_MASK) == L2_TABLE, + ("pmap_ts_referenced: found an invalid l2 table")); + pte = pmap_l2_to_l3(pde, pv->pv_va); + tpte = pmap_load(pte); + if (pmap_pte_dirty(pmap, tpte)) + vm_page_dirty(m); + if ((tpte & ATTR_AF) != 0) { + if ((tpte & ATTR_SW_WIRED) == 0) { + pmap_clear_bits(pte, ATTR_AF); + pmap_invalidate_page(pmap, pv->pv_va); + cleared++; + } else + not_cleared++; + } + PMAP_UNLOCK(pmap); + /* Rotate the PV list if it has more than one entry. */ + if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) { + TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); + m->md.pv_gen++; + } + } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared + + not_cleared < PMAP_TS_REFERENCED_MAX); +out: + rw_wunlock(lock); + vm_page_free_pages_toq(&free, true); + return (cleared + not_cleared); +} + +/* + * Apply the given advice to the specified range of addresses within the + * given pmap. Depending on the advice, clear the referenced and/or + * modified flags in each mapping and set the mapped page's dirty field. + */ +void +pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) +{ + struct rwlock *lock; + vm_offset_t va, va_next; + vm_page_t m; + pd_entry_t *l0, *l1, *l2, oldl2; + pt_entry_t *l3, oldl3; + + PMAP_ASSERT_STAGE1(pmap); + + if (advice != MADV_DONTNEED && advice != MADV_FREE) + return; + + PMAP_LOCK(pmap); + for (; sva < eva; sva = va_next) { + l0 = pmap_l0(pmap, sva); + if (pmap_load(l0) == 0) { + va_next = (sva + L0_SIZE) & ~L0_OFFSET; + if (va_next < sva) + va_next = eva; + continue; + } + l1 = pmap_l0_to_l1(l0, sva); + if (pmap_load(l1) == 0) { + va_next = (sva + L1_SIZE) & ~L1_OFFSET; + if (va_next < sva) + va_next = eva; + continue; + } + va_next = (sva + L2_SIZE) & ~L2_OFFSET; + if (va_next < sva) + va_next = eva; + l2 = pmap_l1_to_l2(l1, sva); + oldl2 = pmap_load(l2); + if (oldl2 == 0) + continue; + if ((oldl2 & ATTR_DESCR_MASK) == L2_BLOCK) { + if ((oldl2 & ATTR_SW_MANAGED) == 0) + continue; + lock = NULL; + if (!pmap_demote_l2_locked(pmap, l2, sva, &lock)) { + if (lock != NULL) + rw_wunlock(lock); + + /* + * The 2MB page mapping was destroyed. + */ + continue; + } + + /* + * Unless the page mappings are wired, remove the + * mapping to a single page so that a subsequent + * access may repromote. Choosing the last page + * within the address range [sva, min(va_next, eva)) + * generally results in more repromotions. Since the + * underlying page table page is fully populated, this + * removal never frees a page table page. + */ + if ((oldl2 & ATTR_SW_WIRED) == 0) { + va = eva; + if (va > va_next) + va = va_next; + va -= PAGE_SIZE; + KASSERT(va >= sva, + ("pmap_advise: no address gap")); + l3 = pmap_l2_to_l3(l2, va); + KASSERT(pmap_load(l3) != 0, + ("pmap_advise: invalid PTE")); + pmap_remove_l3(pmap, l3, va, pmap_load(l2), + NULL, &lock); + } + if (lock != NULL) + rw_wunlock(lock); + } + KASSERT((pmap_load(l2) & ATTR_DESCR_MASK) == L2_TABLE, + ("pmap_advise: invalid L2 entry after demotion")); + if (va_next > eva) + va_next = eva; + va = va_next; + for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, + sva += L3_SIZE) { + oldl3 = pmap_load(l3); + if ((oldl3 & (ATTR_SW_MANAGED | ATTR_DESCR_MASK)) != + (ATTR_SW_MANAGED | L3_PAGE)) + goto maybe_invlrng; + else if (pmap_pte_dirty(pmap, oldl3)) { + if (advice == MADV_DONTNEED) { + /* + * Future calls to pmap_is_modified() + * can be avoided by making the page + * dirty now. + */ + m = PHYS_TO_VM_PAGE(oldl3 & ~ATTR_MASK); + vm_page_dirty(m); + } + while (!atomic_fcmpset_long(l3, &oldl3, + (oldl3 & ~ATTR_AF) | + ATTR_S1_AP(ATTR_S1_AP_RO))) + cpu_spinwait(); + } else if ((oldl3 & ATTR_AF) != 0) + pmap_clear_bits(l3, ATTR_AF); + else + goto maybe_invlrng; + if (va == va_next) + va = sva; + continue; +maybe_invlrng: + if (va != va_next) { + pmap_invalidate_range(pmap, va, sva); + va = va_next; + } + } + if (va != va_next) + pmap_invalidate_range(pmap, va, sva); + } + PMAP_UNLOCK(pmap); +} + +/* + * Clear the modify bits on the specified physical page. + */ +void +pmap_clear_modify(vm_page_t m) +{ + struct md_page *pvh; + struct rwlock *lock; + pmap_t pmap; + pv_entry_t next_pv, pv; + pd_entry_t *l2, oldl2; + pt_entry_t *l3, oldl3; + vm_offset_t va; + int md_gen, pvh_gen; + + KASSERT((m->oflags & VPO_UNMANAGED) == 0, + ("pmap_clear_modify: page %p is not managed", m)); + vm_page_assert_busied(m); + + if (!pmap_page_is_write_mapped(m)) + return; + pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : + pa_to_pvh(VM_PAGE_TO_PHYS(m)); + lock = VM_PAGE_TO_PV_LIST_LOCK(m); + rw_wlock(lock); +restart: + TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { + pmap = PV_PMAP(pv); + PMAP_ASSERT_STAGE1(pmap); + if (!PMAP_TRYLOCK(pmap)) { + pvh_gen = pvh->pv_gen; + rw_wunlock(lock); + PMAP_LOCK(pmap); + rw_wlock(lock); + if (pvh_gen != pvh->pv_gen) { + PMAP_UNLOCK(pmap); + goto restart; + } + } + va = pv->pv_va; + l2 = pmap_l2(pmap, va); + oldl2 = pmap_load(l2); + /* If oldl2 has ATTR_SW_DBM set, then it is also dirty. */ + if ((oldl2 & ATTR_SW_DBM) != 0 && + pmap_demote_l2_locked(pmap, l2, va, &lock) && + (oldl2 & ATTR_SW_WIRED) == 0) { + /* + * Write protect the mapping to a single page so that + * a subsequent write access may repromote. + */ + va += VM_PAGE_TO_PHYS(m) - (oldl2 & ~ATTR_MASK); + l3 = pmap_l2_to_l3(l2, va); + oldl3 = pmap_load(l3); + while (!atomic_fcmpset_long(l3, &oldl3, + (oldl3 & ~ATTR_SW_DBM) | ATTR_S1_AP(ATTR_S1_AP_RO))) + cpu_spinwait(); + vm_page_dirty(m); + pmap_invalidate_page(pmap, va); + } + PMAP_UNLOCK(pmap); + } + TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { + pmap = PV_PMAP(pv); + PMAP_ASSERT_STAGE1(pmap); + if (!PMAP_TRYLOCK(pmap)) { + md_gen = m->md.pv_gen; + pvh_gen = pvh->pv_gen; + rw_wunlock(lock); + PMAP_LOCK(pmap); + rw_wlock(lock); + if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { + PMAP_UNLOCK(pmap); + goto restart; + } + } + l2 = pmap_l2(pmap, pv->pv_va); + l3 = pmap_l2_to_l3(l2, pv->pv_va); + oldl3 = pmap_load(l3); + if (pmap_l3_valid(oldl3) && + (oldl3 & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) == ATTR_SW_DBM){ + pmap_set_bits(l3, ATTR_S1_AP(ATTR_S1_AP_RO)); + pmap_invalidate_page(pmap, pv->pv_va); + } + PMAP_UNLOCK(pmap); + } + rw_wunlock(lock); +} + +void * +pmap_mapbios(vm_paddr_t pa, vm_size_t size) +{ + struct pmap_preinit_mapping *ppim; + vm_offset_t va, offset; + pd_entry_t *pde; + pt_entry_t *l2; + int i, lvl, l2_blocks, free_l2_count, start_idx; + + if (!vm_initialized) { + /* + * No L3 ptables so map entire L2 blocks where start VA is: + * preinit_map_va + start_idx * L2_SIZE + * There may be duplicate mappings (multiple VA -> same PA) but + * ARM64 dcache is always PIPT so that's acceptable. + */ + if (size == 0) + return (NULL); + + /* Calculate how many L2 blocks are needed for the mapping */ + l2_blocks = (roundup2(pa + size, L2_SIZE) - + rounddown2(pa, L2_SIZE)) >> L2_SHIFT; + + offset = pa & L2_OFFSET; + + if (preinit_map_va == 0) + return (NULL); + + /* Map 2MiB L2 blocks from reserved VA space */ + + free_l2_count = 0; + start_idx = -1; + /* Find enough free contiguous VA space */ + for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) { + ppim = pmap_preinit_mapping + i; + if (free_l2_count > 0 && ppim->pa != 0) { + /* Not enough space here */ + free_l2_count = 0; + start_idx = -1; + continue; + } + + if (ppim->pa == 0) { + /* Free L2 block */ + if (start_idx == -1) + start_idx = i; + free_l2_count++; + if (free_l2_count == l2_blocks) + break; + } + } + if (free_l2_count != l2_blocks) + panic("%s: too many preinit mappings", __func__); + + va = preinit_map_va + (start_idx * L2_SIZE); + for (i = start_idx; i < start_idx + l2_blocks; i++) { + /* Mark entries as allocated */ + ppim = pmap_preinit_mapping + i; + ppim->pa = pa; + ppim->va = va + offset; + ppim->size = size; + } + + /* Map L2 blocks */ + pa = rounddown2(pa, L2_SIZE); + for (i = 0; i < l2_blocks; i++) { + pde = pmap_pde(kernel_pmap, va, &lvl); + KASSERT(pde != NULL, + ("pmap_mapbios: Invalid page entry, va: 0x%lx", + va)); + KASSERT(lvl == 1, + ("pmap_mapbios: Invalid level %d", lvl)); + + /* Insert L2_BLOCK */ + l2 = pmap_l1_to_l2(pde, va); + pmap_load_store(l2, + pa | ATTR_DEFAULT | ATTR_S1_XN | + ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK) | L2_BLOCK); + + va += L2_SIZE; + pa += L2_SIZE; + } + pmap_invalidate_all(kernel_pmap); + + va = preinit_map_va + (start_idx * L2_SIZE); + + } else { + /* kva_alloc may be used to map the pages */ + offset = pa & PAGE_MASK; + size = round_page(offset + size); + + va = kva_alloc(size); + if (va == 0) + panic("%s: Couldn't allocate KVA", __func__); + + pde = pmap_pde(kernel_pmap, va, &lvl); + KASSERT(lvl == 2, ("pmap_mapbios: Invalid level %d", lvl)); + + /* L3 table is linked */ + va = trunc_page(va); + pa = trunc_page(pa); + pmap_kenter(va, size, pa, memory_mapping_mode(pa)); + } + + return ((void *)(va + offset)); +} + +void +pmap_unmapbios(vm_offset_t va, vm_size_t size) +{ + struct pmap_preinit_mapping *ppim; + vm_offset_t offset, tmpsize, va_trunc; + pd_entry_t *pde; + pt_entry_t *l2; + int i, lvl, l2_blocks, block; + bool preinit_map; + + l2_blocks = + (roundup2(va + size, L2_SIZE) - rounddown2(va, L2_SIZE)) >> L2_SHIFT; + KASSERT(l2_blocks > 0, ("pmap_unmapbios: invalid size %lx", size)); + + /* Remove preinit mapping */ + preinit_map = false; + block = 0; + for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) { + ppim = pmap_preinit_mapping + i; + if (ppim->va == va) { + KASSERT(ppim->size == size, + ("pmap_unmapbios: size mismatch")); + ppim->va = 0; + ppim->pa = 0; + ppim->size = 0; + preinit_map = true; + offset = block * L2_SIZE; + va_trunc = rounddown2(va, L2_SIZE) + offset; + + /* Remove L2_BLOCK */ + pde = pmap_pde(kernel_pmap, va_trunc, &lvl); + KASSERT(pde != NULL, + ("pmap_unmapbios: Invalid page entry, va: 0x%lx", + va_trunc)); + l2 = pmap_l1_to_l2(pde, va_trunc); + pmap_clear(l2); + + if (block == (l2_blocks - 1)) + break; + block++; + } + } + if (preinit_map) { + pmap_invalidate_all(kernel_pmap); + return; + } + + /* Unmap the pages reserved with kva_alloc. */ + if (vm_initialized) { + offset = va & PAGE_MASK; + size = round_page(offset + size); + va = trunc_page(va); + + pde = pmap_pde(kernel_pmap, va, &lvl); + KASSERT(pde != NULL, + ("pmap_unmapbios: Invalid page entry, va: 0x%lx", va)); + KASSERT(lvl == 2, ("pmap_unmapbios: Invalid level %d", lvl)); + + /* Unmap and invalidate the pages */ + for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE) + pmap_kremove(va + tmpsize); + + kva_free(va, size); + } +} + +/* + * Sets the memory attribute for the specified page. + */ +void +pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) +{ + + m->md.pv_memattr = ma; + + /* + * If "m" is a normal page, update its direct mapping. This update + * can be relied upon to perform any cache operations that are + * required for data coherence. + */ + if ((m->flags & PG_FICTITIOUS) == 0 && + pmap_change_attr(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)), PAGE_SIZE, + m->md.pv_memattr) != 0) + panic("memory attribute change on the direct map failed"); +} + +/* + * Changes the specified virtual address range's memory type to that given by + * the parameter "mode". The specified virtual address range must be + * completely contained within either the direct map or the kernel map. If + * the virtual address range is contained within the kernel map, then the + * memory type for each of the corresponding ranges of the direct map is also + * changed. (The corresponding ranges of the direct map are those ranges that + * map the same physical pages as the specified virtual address range.) These + * changes to the direct map are necessary because Intel describes the + * behavior of their processors as "undefined" if two or more mappings to the + * same physical page have different memory types. + * + * Returns zero if the change completed successfully, and either EINVAL or + * ENOMEM if the change failed. Specifically, EINVAL is returned if some part + * of the virtual address range was not mapped, and ENOMEM is returned if + * there was insufficient memory available to complete the change. In the + * latter case, the memory type may have been changed on some part of the + * virtual address range or the direct map. + */ +int +pmap_change_attr(vm_offset_t va, vm_size_t size, int mode) +{ + int error; + + PMAP_LOCK(kernel_pmap); + error = pmap_change_attr_locked(va, size, mode); + PMAP_UNLOCK(kernel_pmap); + return (error); +} + +static int +pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode) +{ + vm_offset_t base, offset, tmpva; + pt_entry_t l3, *pte, *newpte; + int lvl; + + PMAP_LOCK_ASSERT(kernel_pmap, MA_OWNED); + base = trunc_page(va); + offset = va & PAGE_MASK; + size = round_page(offset + size); + + if (!VIRT_IN_DMAP(base) && + !(base >= VM_MIN_KERNEL_ADDRESS && base < VM_MAX_KERNEL_ADDRESS)) + return (EINVAL); + + for (tmpva = base; tmpva < base + size; ) { + pte = pmap_pte(kernel_pmap, tmpva, &lvl); + if (pte == NULL) + return (EINVAL); + + if ((pmap_load(pte) & ATTR_S1_IDX_MASK) == ATTR_S1_IDX(mode)) { + /* + * We already have the correct attribute, + * ignore this entry. + */ + switch (lvl) { + default: + panic("Invalid DMAP table level: %d\n", lvl); + case 1: + tmpva = (tmpva & ~L1_OFFSET) + L1_SIZE; + break; + case 2: + tmpva = (tmpva & ~L2_OFFSET) + L2_SIZE; + break; + case 3: + tmpva += PAGE_SIZE; + break; + } + } else { + /* + * Split the entry to an level 3 table, then + * set the new attribute. + */ + switch (lvl) { + default: + panic("Invalid DMAP table level: %d\n", lvl); + case 1: + newpte = pmap_demote_l1(kernel_pmap, pte, + tmpva & ~L1_OFFSET); + if (newpte == NULL) + return (EINVAL); + pte = pmap_l1_to_l2(pte, tmpva); + case 2: + newpte = pmap_demote_l2(kernel_pmap, pte, + tmpva); + if (newpte == NULL) + return (EINVAL); + pte = pmap_l2_to_l3(pte, tmpva); + case 3: + /* Update the entry */ + l3 = pmap_load(pte); + l3 &= ~ATTR_S1_IDX_MASK; + l3 |= ATTR_S1_IDX(mode); + if (mode == VM_MEMATTR_DEVICE) + l3 |= ATTR_S1_XN; + + pmap_update_entry(kernel_pmap, pte, l3, tmpva, + PAGE_SIZE); + + /* + * If moving to a non-cacheable entry flush + * the cache. + */ + if (mode == VM_MEMATTR_UNCACHEABLE) + cpu_dcache_wbinv_range(tmpva, L3_SIZE); + + break; + } + tmpva += PAGE_SIZE; + } + } + + return (0); +} + +/* + * Create an L2 table to map all addresses within an L1 mapping. + */ +static pt_entry_t * +pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va) +{ + pt_entry_t *l2, newl2, oldl1; + vm_offset_t tmpl1; + vm_paddr_t l2phys, phys; + vm_page_t ml2; + int i; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + oldl1 = pmap_load(l1); + KASSERT((oldl1 & ATTR_DESCR_MASK) == L1_BLOCK, + ("pmap_demote_l1: Demoting a non-block entry")); + KASSERT((va & L1_OFFSET) == 0, + ("pmap_demote_l1: Invalid virtual address %#lx", va)); + KASSERT((oldl1 & ATTR_SW_MANAGED) == 0, + ("pmap_demote_l1: Level 1 table shouldn't be managed")); + + tmpl1 = 0; + if (va <= (vm_offset_t)l1 && va + L1_SIZE > (vm_offset_t)l1) { + tmpl1 = kva_alloc(PAGE_SIZE); + if (tmpl1 == 0) + return (NULL); + } + + if ((ml2 = vm_page_alloc(NULL, 0, VM_ALLOC_INTERRUPT | + VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { + CTR2(KTR_PMAP, "pmap_demote_l1: failure for va %#lx" + " in pmap %p", va, pmap); + return (NULL); + } + + l2phys = VM_PAGE_TO_PHYS(ml2); + l2 = (pt_entry_t *)PHYS_TO_DMAP(l2phys); + + /* Address the range points at */ + phys = oldl1 & ~ATTR_MASK; + /* The attributed from the old l1 table to be copied */ + newl2 = oldl1 & ATTR_MASK; + + /* Create the new entries */ + for (i = 0; i < Ln_ENTRIES; i++) { + l2[i] = newl2 | phys; + phys += L2_SIZE; + } + KASSERT(l2[0] == ((oldl1 & ~ATTR_DESCR_MASK) | L2_BLOCK), + ("Invalid l2 page (%lx != %lx)", l2[0], + (oldl1 & ~ATTR_DESCR_MASK) | L2_BLOCK)); + + if (tmpl1 != 0) { + pmap_kenter(tmpl1, PAGE_SIZE, + DMAP_TO_PHYS((vm_offset_t)l1) & ~L3_OFFSET, + VM_MEMATTR_WRITE_BACK); + l1 = (pt_entry_t *)(tmpl1 + ((vm_offset_t)l1 & PAGE_MASK)); + } + + pmap_update_entry(pmap, l1, l2phys | L1_TABLE, va, PAGE_SIZE); + + if (tmpl1 != 0) { + pmap_kremove(tmpl1); + kva_free(tmpl1, PAGE_SIZE); + } + + return (l2); +} + +static void +pmap_fill_l3(pt_entry_t *firstl3, pt_entry_t newl3) +{ + pt_entry_t *l3; + + for (l3 = firstl3; l3 - firstl3 < Ln_ENTRIES; l3++) { + *l3 = newl3; + newl3 += L3_SIZE; + } +} + +static void +pmap_demote_l2_abort(pmap_t pmap, vm_offset_t va, pt_entry_t *l2, + struct rwlock **lockp) +{ + struct spglist free; + + SLIST_INIT(&free); + (void)pmap_remove_l2(pmap, l2, va, pmap_load(pmap_l1(pmap, va)), &free, + lockp); + vm_page_free_pages_toq(&free, true); +} + +/* + * Create an L3 table to map all addresses within an L2 mapping. + */ +static pt_entry_t * +pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, vm_offset_t va, + struct rwlock **lockp) +{ + pt_entry_t *l3, newl3, oldl2; + vm_offset_t tmpl2; + vm_paddr_t l3phys; + vm_page_t ml3; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + PMAP_ASSERT_STAGE1(pmap); + l3 = NULL; + oldl2 = pmap_load(l2); + KASSERT((oldl2 & ATTR_DESCR_MASK) == L2_BLOCK, + ("pmap_demote_l2: Demoting a non-block entry")); + va &= ~L2_OFFSET; + + tmpl2 = 0; + if (va <= (vm_offset_t)l2 && va + L2_SIZE > (vm_offset_t)l2) { + tmpl2 = kva_alloc(PAGE_SIZE); + if (tmpl2 == 0) + return (NULL); + } + + /* + * Invalidate the 2MB page mapping and return "failure" if the + * mapping was never accessed. + */ + if ((oldl2 & ATTR_AF) == 0) { + KASSERT((oldl2 & ATTR_SW_WIRED) == 0, + ("pmap_demote_l2: a wired mapping is missing ATTR_AF")); + pmap_demote_l2_abort(pmap, va, l2, lockp); + CTR2(KTR_PMAP, "pmap_demote_l2: failure for va %#lx in pmap %p", + va, pmap); + goto fail; + } + + if ((ml3 = pmap_remove_pt_page(pmap, va)) == NULL) { + KASSERT((oldl2 & ATTR_SW_WIRED) == 0, + ("pmap_demote_l2: page table page for a wired mapping" + " is missing")); + + /* + * If the page table page is missing and the mapping + * is for a kernel address, the mapping must belong to + * the direct map. Page table pages are preallocated + * for every other part of the kernel address space, + * so the direct map region is the only part of the + * kernel address space that must be handled here. + */ + KASSERT(va < VM_MAXUSER_ADDRESS || VIRT_IN_DMAP(va), + ("pmap_demote_l2: No saved mpte for va %#lx", va)); + + /* + * If the 2MB page mapping belongs to the direct map + * region of the kernel's address space, then the page + * allocation request specifies the highest possible + * priority (VM_ALLOC_INTERRUPT). Otherwise, the + * priority is normal. + */ + ml3 = vm_page_alloc(NULL, pmap_l2_pindex(va), + (VIRT_IN_DMAP(va) ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL) | + VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); + + /* + * If the allocation of the new page table page fails, + * invalidate the 2MB page mapping and return "failure". + */ + if (ml3 == NULL) { + pmap_demote_l2_abort(pmap, va, l2, lockp); + CTR2(KTR_PMAP, "pmap_demote_l2: failure for va %#lx" + " in pmap %p", va, pmap); + goto fail; + } + + if (va < VM_MAXUSER_ADDRESS) { + ml3->ref_count = NL3PG; + pmap_resident_count_inc(pmap, 1); + } + } + l3phys = VM_PAGE_TO_PHYS(ml3); + l3 = (pt_entry_t *)PHYS_TO_DMAP(l3phys); + newl3 = (oldl2 & ~ATTR_DESCR_MASK) | L3_PAGE; + KASSERT((oldl2 & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) != + (ATTR_S1_AP(ATTR_S1_AP_RO) | ATTR_SW_DBM), + ("pmap_demote_l2: L2 entry is writeable but not dirty")); + + /* + * If the page table page is not leftover from an earlier promotion, + * or the mapping attributes have changed, (re)initialize the L3 table. + * + * When pmap_update_entry() clears the old L2 mapping, it (indirectly) + * performs a dsb(). That dsb() ensures that the stores for filling + * "l3" are visible before "l3" is added to the page table. + */ + if (ml3->valid == 0 || (l3[0] & ATTR_MASK) != (newl3 & ATTR_MASK)) + pmap_fill_l3(l3, newl3); + + /* + * Map the temporary page so we don't lose access to the l2 table. + */ + if (tmpl2 != 0) { + pmap_kenter(tmpl2, PAGE_SIZE, + DMAP_TO_PHYS((vm_offset_t)l2) & ~L3_OFFSET, + VM_MEMATTR_WRITE_BACK); + l2 = (pt_entry_t *)(tmpl2 + ((vm_offset_t)l2 & PAGE_MASK)); + } + + /* + * The spare PV entries must be reserved prior to demoting the + * mapping, that is, prior to changing the PDE. Otherwise, the state + * of the L2 and the PV lists will be inconsistent, which can result + * in reclaim_pv_chunk() attempting to remove a PV entry from the + * wrong PV list and pmap_pv_demote_l2() failing to find the expected + * PV entry for the 2MB page mapping that is being demoted. + */ + if ((oldl2 & ATTR_SW_MANAGED) != 0) + reserve_pv_entries(pmap, Ln_ENTRIES - 1, lockp); + + /* + * Pass PAGE_SIZE so that a single TLB invalidation is performed on + * the 2MB page mapping. + */ + pmap_update_entry(pmap, l2, l3phys | L2_TABLE, va, PAGE_SIZE); + + /* + * Demote the PV entry. + */ + if ((oldl2 & ATTR_SW_MANAGED) != 0) + pmap_pv_demote_l2(pmap, va, oldl2 & ~ATTR_MASK, lockp); + + atomic_add_long(&pmap_l2_demotions, 1); + CTR3(KTR_PMAP, "pmap_demote_l2: success for va %#lx" + " in pmap %p %lx", va, pmap, l3[0]); + +fail: + if (tmpl2 != 0) { + pmap_kremove(tmpl2); + kva_free(tmpl2, PAGE_SIZE); + } + + return (l3); + +} + +static pt_entry_t * +pmap_demote_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va) +{ + struct rwlock *lock; + pt_entry_t *l3; + + lock = NULL; + l3 = pmap_demote_l2_locked(pmap, l2, va, &lock); + if (lock != NULL) + rw_wunlock(lock); + return (l3); +} + +/* + * Perform the pmap work for mincore(2). If the page is not both referenced and + * modified by this pmap, returns its physical address so that the caller can + * find other mappings. + */ +int +pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *pap) +{ + pt_entry_t *pte, tpte; + vm_paddr_t mask, pa; + int lvl, val; + bool managed; + + PMAP_ASSERT_STAGE1(pmap); + PMAP_LOCK(pmap); + pte = pmap_pte(pmap, addr, &lvl); + if (pte != NULL) { + tpte = pmap_load(pte); + + switch (lvl) { + case 3: + mask = L3_OFFSET; + break; + case 2: + mask = L2_OFFSET; + break; + case 1: + mask = L1_OFFSET; + break; + default: + panic("pmap_mincore: invalid level %d", lvl); + } + + managed = (tpte & ATTR_SW_MANAGED) != 0; + val = MINCORE_INCORE; + if (lvl != 3) + val |= MINCORE_PSIND(3 - lvl); + if ((managed && pmap_pte_dirty(pmap, tpte)) || (!managed && + (tpte & ATTR_S1_AP_RW_BIT) == ATTR_S1_AP(ATTR_S1_AP_RW))) + val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; + if ((tpte & ATTR_AF) == ATTR_AF) + val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; + + pa = (tpte & ~ATTR_MASK) | (addr & mask); + } else { + managed = false; + val = 0; + } + + if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != + (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) { + *pap = pa; + } + PMAP_UNLOCK(pmap); + return (val); +} + +/* + * Garbage collect every ASID that is neither active on a processor nor + * reserved. + */ +static void +pmap_reset_asid_set(pmap_t pmap) +{ + pmap_t curpmap; + int asid, cpuid, epoch; + struct asid_set *set; + enum pmap_stage stage; + + set = pmap->pm_asid_set; + stage = pmap->pm_stage; + + set = pmap->pm_asid_set; + KASSERT(set != NULL, ("%s: NULL asid set", __func__)); + mtx_assert(&set->asid_set_mutex, MA_OWNED); + + /* + * Ensure that the store to asid_epoch is globally visible before the + * loads from pc_curpmap are performed. + */ + epoch = set->asid_epoch + 1; + if (epoch == INT_MAX) + epoch = 0; + set->asid_epoch = epoch; + dsb(ishst); + if (stage == PM_STAGE1) { + __asm __volatile("tlbi vmalle1is"); + } else { + KASSERT(pmap_clean_stage2_tlbi != NULL, + ("%s: Unset stage 2 tlb invalidation callback\n", + __func__)); + pmap_clean_stage2_tlbi(); + } + dsb(ish); + bit_nclear(set->asid_set, ASID_FIRST_AVAILABLE, + set->asid_set_size - 1); + CPU_FOREACH(cpuid) { + if (cpuid == curcpu) + continue; + if (stage == PM_STAGE1) { + curpmap = pcpu_find(cpuid)->pc_curpmap; + PMAP_ASSERT_STAGE1(pmap); + } else { + curpmap = pcpu_find(cpuid)->pc_curvmpmap; + if (curpmap == NULL) + continue; + PMAP_ASSERT_STAGE2(pmap); + } + KASSERT(curpmap->pm_asid_set == set, ("Incorrect set")); + asid = COOKIE_TO_ASID(curpmap->pm_cookie); + if (asid == -1) + continue; + bit_set(set->asid_set, asid); + curpmap->pm_cookie = COOKIE_FROM(asid, epoch); + } +} + +/* + * Allocate a new ASID for the specified pmap. + */ +static void +pmap_alloc_asid(pmap_t pmap) +{ + struct asid_set *set; + int new_asid; + + set = pmap->pm_asid_set; + KASSERT(set != NULL, ("%s: NULL asid set", __func__)); + + mtx_lock_spin(&set->asid_set_mutex); + + /* + * While this processor was waiting to acquire the asid set mutex, + * pmap_reset_asid_set() running on another processor might have + * updated this pmap's cookie to the current epoch. In which case, we + * don't need to allocate a new ASID. + */ + if (COOKIE_TO_EPOCH(pmap->pm_cookie) == set->asid_epoch) + goto out; + + bit_ffc_at(set->asid_set, set->asid_next, set->asid_set_size, + &new_asid); + if (new_asid == -1) { + bit_ffc_at(set->asid_set, ASID_FIRST_AVAILABLE, + set->asid_next, &new_asid); + if (new_asid == -1) { + pmap_reset_asid_set(pmap); + bit_ffc_at(set->asid_set, ASID_FIRST_AVAILABLE, + set->asid_set_size, &new_asid); + KASSERT(new_asid != -1, ("ASID allocation failure")); + } + } + bit_set(set->asid_set, new_asid); + set->asid_next = new_asid + 1; + pmap->pm_cookie = COOKIE_FROM(new_asid, set->asid_epoch); +out: + mtx_unlock_spin(&set->asid_set_mutex); +} + +/* + * Compute the value that should be stored in ttbr0 to activate the specified + * pmap. This value may change from time to time. + */ +uint64_t +pmap_to_ttbr0(pmap_t pmap) +{ + + return (ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie)) | + pmap->pm_l0_paddr); +} + +static bool +pmap_activate_int(pmap_t pmap) +{ + struct asid_set *set; + int epoch; + + KASSERT(PCPU_GET(curpmap) != NULL, ("no active pmap")); + KASSERT(pmap != kernel_pmap, ("kernel pmap activation")); + + if ((pmap->pm_stage == PM_STAGE1 && pmap == PCPU_GET(curpmap)) || + (pmap->pm_stage == PM_STAGE2 && pmap == PCPU_GET(curvmpmap))) { + /* + * Handle the possibility that the old thread was preempted + * after an "ic" or "tlbi" instruction but before it performed + * a "dsb" instruction. If the old thread migrates to a new + * processor, its completion of a "dsb" instruction on that + * new processor does not guarantee that the "ic" or "tlbi" + * instructions performed on the old processor have completed. + */ + dsb(ish); + return (false); + } + + set = pmap->pm_asid_set; + KASSERT(set != NULL, ("%s: NULL asid set", __func__)); + + /* + * Ensure that the store to curpmap is globally visible before the + * load from asid_epoch is performed. + */ + if (pmap->pm_stage == PM_STAGE1) + PCPU_SET(curpmap, pmap); + else + PCPU_SET(curvmpmap, pmap); + dsb(ish); + epoch = COOKIE_TO_EPOCH(pmap->pm_cookie); + if (epoch >= 0 && epoch != set->asid_epoch) + pmap_alloc_asid(pmap); + + if (pmap->pm_stage == PM_STAGE1) { + set_ttbr0(pmap_to_ttbr0(pmap)); + if (PCPU_GET(bcast_tlbi_workaround) != 0) + invalidate_local_icache(); + } + return (true); +} + +void +pmap_activate_vm(pmap_t pmap) +{ + + PMAP_ASSERT_STAGE2(pmap); + + (void)pmap_activate_int(pmap); +} + +void +pmap_activate(struct thread *td) +{ + pmap_t pmap; + + pmap = vmspace_pmap(td->td_proc->p_vmspace); + PMAP_ASSERT_STAGE1(pmap); + critical_enter(); + (void)pmap_activate_int(pmap); + critical_exit(); +} + +/* + * To eliminate the unused parameter "old", we would have to add an instruction + * to cpu_switch(). + */ +struct pcb * +pmap_switch(struct thread *old __unused, struct thread *new) +{ + pcpu_bp_harden bp_harden; + struct pcb *pcb; + + /* Store the new curthread */ + PCPU_SET(curthread, new); + + /* And the new pcb */ + pcb = new->td_pcb; + PCPU_SET(curpcb, pcb); + + /* + * TODO: We may need to flush the cache here if switching + * to a user process. + */ + + if (pmap_activate_int(vmspace_pmap(new->td_proc->p_vmspace))) { + /* + * Stop userspace from training the branch predictor against + * other processes. This will call into a CPU specific + * function that clears the branch predictor state. + */ + bp_harden = PCPU_GET(bp_harden); + if (bp_harden != NULL) + bp_harden(); + } + + return (pcb); +} + +void +pmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t sz) +{ + + PMAP_ASSERT_STAGE1(pmap); + if (va >= VM_MIN_KERNEL_ADDRESS) { + cpu_icache_sync_range(va, sz); + } else { + u_int len, offset; + vm_paddr_t pa; + + /* Find the length of data in this page to flush */ + offset = va & PAGE_MASK; + len = imin(PAGE_SIZE - offset, sz); + + while (sz != 0) { + /* Extract the physical address & find it in the DMAP */ + pa = pmap_extract(pmap, va); + if (pa != 0) + cpu_icache_sync_range(PHYS_TO_DMAP(pa), len); + + /* Move to the next page */ + sz -= len; + va += len; + /* Set the length for the next iteration */ + len = imin(PAGE_SIZE, sz); + } + } +} + +static int +pmap_stage2_fault(pmap_t pmap, uint64_t esr, uint64_t far) +{ + pd_entry_t *pdep; + pt_entry_t *ptep, pte; + int rv, lvl, dfsc; + + PMAP_ASSERT_STAGE2(pmap); + rv = KERN_FAILURE; + + /* Data and insn aborts use same encoding for FSC field. */ + dfsc = esr & ISS_DATA_DFSC_MASK; + switch (dfsc) { + case ISS_DATA_DFSC_TF_L0: + case ISS_DATA_DFSC_TF_L1: + case ISS_DATA_DFSC_TF_L2: + case ISS_DATA_DFSC_TF_L3: + PMAP_LOCK(pmap); + pdep = pmap_pde(pmap, far, &lvl); + if (pdep == NULL || lvl != (dfsc - ISS_DATA_DFSC_TF_L1)) { + PMAP_LOCK(pmap); + break; + } + + switch (lvl) { + case 0: + ptep = pmap_l0_to_l1(pdep, far); + break; + case 1: + ptep = pmap_l1_to_l2(pdep, far); + break; + case 2: + ptep = pmap_l2_to_l3(pdep, far); + break; + default: + panic("%s: Invalid pde level %d", __func__,lvl); + } + goto fault_exec; + + case ISS_DATA_DFSC_AFF_L1: + case ISS_DATA_DFSC_AFF_L2: + case ISS_DATA_DFSC_AFF_L3: + PMAP_LOCK(pmap); + ptep = pmap_pte(pmap, far, &lvl); +fault_exec: + if (ptep != NULL && (pte = pmap_load(ptep)) != 0) { + if (icache_vmid) { + pmap_invalidate_vpipt_icache(); + } else { + /* + * If accessing an executable page invalidate + * the I-cache so it will be valid when we + * continue execution in the guest. The D-cache + * is assumed to already be clean to the Point + * of Coherency. + */ + if ((pte & ATTR_S2_XN_MASK) != + ATTR_S2_XN(ATTR_S2_XN_NONE)) { + invalidate_icache(); + } + } + pmap_set_bits(ptep, ATTR_AF | ATTR_DESCR_VALID); + rv = KERN_SUCCESS; + } + PMAP_UNLOCK(pmap); + break; + } + + return (rv); +} + +int +pmap_fault(pmap_t pmap, uint64_t esr, uint64_t far) +{ + pt_entry_t pte, *ptep; + register_t intr; + uint64_t ec, par; + int lvl, rv; + + rv = KERN_FAILURE; + + ec = ESR_ELx_EXCEPTION(esr); + switch (ec) { + case EXCP_INSN_ABORT_L: + case EXCP_INSN_ABORT: + case EXCP_DATA_ABORT_L: + case EXCP_DATA_ABORT: + break; + default: + return (rv); + } + + if (pmap->pm_stage == PM_STAGE2) + return (pmap_stage2_fault(pmap, esr, far)); + + /* Data and insn aborts use same encoding for FSC field. */ + switch (esr & ISS_DATA_DFSC_MASK) { + case ISS_DATA_DFSC_AFF_L1: + case ISS_DATA_DFSC_AFF_L2: + case ISS_DATA_DFSC_AFF_L3: + PMAP_LOCK(pmap); + ptep = pmap_pte(pmap, far, &lvl); + if (ptep != NULL) { + pmap_set_bits(ptep, ATTR_AF); + rv = KERN_SUCCESS; + /* + * XXXMJ as an optimization we could mark the entry + * dirty if this is a write fault. + */ + } + PMAP_UNLOCK(pmap); + break; + case ISS_DATA_DFSC_PF_L1: + case ISS_DATA_DFSC_PF_L2: + case ISS_DATA_DFSC_PF_L3: + if ((ec != EXCP_DATA_ABORT_L && ec != EXCP_DATA_ABORT) || + (esr & ISS_DATA_WnR) == 0) + return (rv); + PMAP_LOCK(pmap); + ptep = pmap_pte(pmap, far, &lvl); + if (ptep != NULL && + ((pte = pmap_load(ptep)) & ATTR_SW_DBM) != 0) { + if ((pte & ATTR_S1_AP_RW_BIT) == + ATTR_S1_AP(ATTR_S1_AP_RO)) { + pmap_clear_bits(ptep, ATTR_S1_AP_RW_BIT); + pmap_invalidate_page(pmap, far); + } + rv = KERN_SUCCESS; + } + PMAP_UNLOCK(pmap); + break; + case ISS_DATA_DFSC_TF_L0: + case ISS_DATA_DFSC_TF_L1: + case ISS_DATA_DFSC_TF_L2: + case ISS_DATA_DFSC_TF_L3: + /* + * Retry the translation. A break-before-make sequence can + * produce a transient fault. + */ + if (pmap == kernel_pmap) { + /* + * The translation fault may have occurred within a + * critical section. Therefore, we must check the + * address without acquiring the kernel pmap's lock. + */ + if (pmap_kextract(far) != 0) + rv = KERN_SUCCESS; + } else { + PMAP_LOCK(pmap); + /* Ask the MMU to check the address. */ + intr = intr_disable(); + par = arm64_address_translate_s1e0r(far); + intr_restore(intr); + PMAP_UNLOCK(pmap); + + /* + * If the translation was successful, then we can + * return success to the trap handler. + */ + if (PAR_SUCCESS(par)) + rv = KERN_SUCCESS; + } + break; + } + + return (rv); +} + +/* + * Increase the starting virtual address of the given mapping if a + * different alignment might result in more superpage mappings. + */ +void +pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, + vm_offset_t *addr, vm_size_t size) +{ + vm_offset_t superpage_offset; + + if (size < L2_SIZE) + return; + if (object != NULL && (object->flags & OBJ_COLORED) != 0) + offset += ptoa(object->pg_color); + superpage_offset = offset & L2_OFFSET; + if (size - ((L2_SIZE - superpage_offset) & L2_OFFSET) < L2_SIZE || + (*addr & L2_OFFSET) == superpage_offset) + return; + if ((*addr & L2_OFFSET) < superpage_offset) + *addr = (*addr & ~L2_OFFSET) + superpage_offset; + else + *addr = ((*addr + L2_OFFSET) & ~L2_OFFSET) + superpage_offset; +} + +/** + * Get the kernel virtual address of a set of physical pages. If there are + * physical addresses not covered by the DMAP perform a transient mapping + * that will be removed when calling pmap_unmap_io_transient. + * + * \param page The pages the caller wishes to obtain the virtual + * address on the kernel memory map. + * \param vaddr On return contains the kernel virtual memory address + * of the pages passed in the page parameter. + * \param count Number of pages passed in. + * \param can_fault TRUE if the thread using the mapped pages can take + * page faults, FALSE otherwise. + * + * \returns TRUE if the caller must call pmap_unmap_io_transient when + * finished or FALSE otherwise. + * + */ +boolean_t +pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, + boolean_t can_fault) +{ + vm_paddr_t paddr; + boolean_t needs_mapping; + int error, i; + + /* + * Allocate any KVA space that we need, this is done in a separate + * loop to prevent calling vmem_alloc while pinned. + */ + needs_mapping = FALSE; + for (i = 0; i < count; i++) { + paddr = VM_PAGE_TO_PHYS(page[i]); + if (__predict_false(!PHYS_IN_DMAP(paddr))) { + error = vmem_alloc(kernel_arena, PAGE_SIZE, + M_BESTFIT | M_WAITOK, &vaddr[i]); + KASSERT(error == 0, ("vmem_alloc failed: %d", error)); + needs_mapping = TRUE; + } else { + vaddr[i] = PHYS_TO_DMAP(paddr); + } + } + + /* Exit early if everything is covered by the DMAP */ + if (!needs_mapping) + return (FALSE); + + if (!can_fault) + sched_pin(); + for (i = 0; i < count; i++) { + paddr = VM_PAGE_TO_PHYS(page[i]); + if (!PHYS_IN_DMAP(paddr)) { + panic( + "pmap_map_io_transient: TODO: Map out of DMAP data"); + } + } + + return (needs_mapping); +} + +void +pmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, + boolean_t can_fault) +{ + vm_paddr_t paddr; + int i; + + if (!can_fault) + sched_unpin(); + for (i = 0; i < count; i++) { + paddr = VM_PAGE_TO_PHYS(page[i]); + if (!PHYS_IN_DMAP(paddr)) { + panic("ARM64TODO: pmap_unmap_io_transient: Unmap data"); + } + } +} + +boolean_t +pmap_is_valid_memattr(pmap_t pmap __unused, vm_memattr_t mode) +{ + + return (mode >= VM_MEMATTR_DEVICE && mode <= VM_MEMATTR_WRITE_THROUGH); +} + +/* + * Track a range of the kernel's virtual address space that is contiguous + * in various mapping attributes. + */ +struct pmap_kernel_map_range { + vm_offset_t sva; + pt_entry_t attrs; + int l3pages; + int l3contig; + int l2blocks; + int l1blocks; +}; + +static void +sysctl_kmaps_dump(struct sbuf *sb, struct pmap_kernel_map_range *range, + vm_offset_t eva) +{ + const char *mode; + int index; + + if (eva <= range->sva) + return; + + index = range->attrs & ATTR_S1_IDX_MASK; + switch (index) { + case ATTR_S1_IDX(VM_MEMATTR_DEVICE): + mode = "DEV"; + break; + case ATTR_S1_IDX(VM_MEMATTR_UNCACHEABLE): + mode = "UC"; + break; + case ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK): + mode = "WB"; + break; + case ATTR_S1_IDX(VM_MEMATTR_WRITE_THROUGH): + mode = "WT"; + break; + default: + printf( + "%s: unknown memory type %x for range 0x%016lx-0x%016lx\n", + __func__, index, range->sva, eva); + mode = "??"; + break; + } + + sbuf_printf(sb, "0x%016lx-0x%016lx r%c%c%c %3s %d %d %d %d\n", + range->sva, eva, + (range->attrs & ATTR_S1_AP_RW_BIT) == ATTR_S1_AP_RW ? 'w' : '-', + (range->attrs & ATTR_S1_PXN) != 0 ? '-' : 'x', + (range->attrs & ATTR_S1_AP_USER) != 0 ? 'u' : 's', + mode, range->l1blocks, range->l2blocks, range->l3contig, + range->l3pages); + + /* Reset to sentinel value. */ + range->sva = 0xfffffffffffffffful; +} + +/* + * Determine whether the attributes specified by a page table entry match those + * being tracked by the current range. + */ +static bool +sysctl_kmaps_match(struct pmap_kernel_map_range *range, pt_entry_t attrs) +{ + + return (range->attrs == attrs); +} + +static void +sysctl_kmaps_reinit(struct pmap_kernel_map_range *range, vm_offset_t va, + pt_entry_t attrs) +{ + + memset(range, 0, sizeof(*range)); + range->sva = va; + range->attrs = attrs; +} + +/* + * Given a leaf PTE, derive the mapping's attributes. If they do not match + * those of the current run, dump the address range and its attributes, and + * begin a new run. + */ +static void +sysctl_kmaps_check(struct sbuf *sb, struct pmap_kernel_map_range *range, + vm_offset_t va, pd_entry_t l0e, pd_entry_t l1e, pd_entry_t l2e, + pt_entry_t l3e) +{ + pt_entry_t attrs; + + attrs = l0e & (ATTR_S1_AP_MASK | ATTR_S1_XN); + attrs |= l1e & (ATTR_S1_AP_MASK | ATTR_S1_XN); + if ((l1e & ATTR_DESCR_MASK) == L1_BLOCK) + attrs |= l1e & ATTR_S1_IDX_MASK; + attrs |= l2e & (ATTR_S1_AP_MASK | ATTR_S1_XN); + if ((l2e & ATTR_DESCR_MASK) == L2_BLOCK) + attrs |= l2e & ATTR_S1_IDX_MASK; + attrs |= l3e & (ATTR_S1_AP_MASK | ATTR_S1_XN | ATTR_S1_IDX_MASK); + + if (range->sva > va || !sysctl_kmaps_match(range, attrs)) { + sysctl_kmaps_dump(sb, range, va); + sysctl_kmaps_reinit(range, va, attrs); + } +} + +static int +sysctl_kmaps(SYSCTL_HANDLER_ARGS) +{ + struct pmap_kernel_map_range range; + struct sbuf sbuf, *sb; + pd_entry_t l0e, *l1, l1e, *l2, l2e; + pt_entry_t *l3, l3e; + vm_offset_t sva; + vm_paddr_t pa; + int error, i, j, k, l; + + error = sysctl_wire_old_buffer(req, 0); + if (error != 0) + return (error); + sb = &sbuf; + sbuf_new_for_sysctl(sb, NULL, PAGE_SIZE, req); + + /* Sentinel value. */ + range.sva = 0xfffffffffffffffful; + + /* + * Iterate over the kernel page tables without holding the kernel pmap + * lock. Kernel page table pages are never freed, so at worst we will + * observe inconsistencies in the output. + */ + for (sva = 0xffff000000000000ul, i = pmap_l0_index(sva); i < Ln_ENTRIES; + i++) { + if (i == pmap_l0_index(DMAP_MIN_ADDRESS)) + sbuf_printf(sb, "\nDirect map:\n"); + else if (i == pmap_l0_index(VM_MIN_KERNEL_ADDRESS)) + sbuf_printf(sb, "\nKernel map:\n"); + + l0e = kernel_pmap->pm_l0[i]; + if ((l0e & ATTR_DESCR_VALID) == 0) { + sysctl_kmaps_dump(sb, &range, sva); + sva += L0_SIZE; + continue; + } + pa = l0e & ~ATTR_MASK; + l1 = (pd_entry_t *)PHYS_TO_DMAP(pa); + + for (j = pmap_l1_index(sva); j < Ln_ENTRIES; j++) { + l1e = l1[j]; + if ((l1e & ATTR_DESCR_VALID) == 0) { + sysctl_kmaps_dump(sb, &range, sva); + sva += L1_SIZE; + continue; + } + if ((l1e & ATTR_DESCR_MASK) == L1_BLOCK) { + sysctl_kmaps_check(sb, &range, sva, l0e, l1e, + 0, 0); + range.l1blocks++; + sva += L1_SIZE; + continue; + } + pa = l1e & ~ATTR_MASK; + l2 = (pd_entry_t *)PHYS_TO_DMAP(pa); + + for (k = pmap_l2_index(sva); k < Ln_ENTRIES; k++) { + l2e = l2[k]; + if ((l2e & ATTR_DESCR_VALID) == 0) { + sysctl_kmaps_dump(sb, &range, sva); + sva += L2_SIZE; + continue; + } + if ((l2e & ATTR_DESCR_MASK) == L2_BLOCK) { + sysctl_kmaps_check(sb, &range, sva, + l0e, l1e, l2e, 0); + range.l2blocks++; + sva += L2_SIZE; + continue; + } + pa = l2e & ~ATTR_MASK; + l3 = (pt_entry_t *)PHYS_TO_DMAP(pa); + + for (l = pmap_l3_index(sva); l < Ln_ENTRIES; + l++, sva += L3_SIZE) { + l3e = l3[l]; + if ((l3e & ATTR_DESCR_VALID) == 0) { + sysctl_kmaps_dump(sb, &range, + sva); + continue; + } + sysctl_kmaps_check(sb, &range, sva, + l0e, l1e, l2e, l3e); + if ((l3e & ATTR_CONTIGUOUS) != 0) + range.l3contig += l % 16 == 0 ? + 1 : 0; + else + range.l3pages++; + } + } + } + } + + error = sbuf_finish(sb); + sbuf_delete(sb); + return (error); +} +SYSCTL_OID(_vm_pmap, OID_AUTO, kernel_maps, + CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, + NULL, 0, sysctl_kmaps, "A", + "Dump kernel address layout"); diff --git a/sys/arm64/arm64/stack_machdep.c b/sys/arm64/arm64/stack_machdep.c new file mode 100644 index 000000000000..5af5dde2d461 --- /dev/null +++ b/sys/arm64/arm64/stack_machdep.c @@ -0,0 +1,93 @@ +/*- + * Copyright (c) 2015 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Andrew Turner under + * sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/proc.h> +#include <sys/stack.h> + +#include <machine/vmparam.h> +#include <machine/pcb.h> +#include <machine/stack.h> + +static void +stack_capture(struct stack *st, struct unwind_state *frame) +{ + + stack_zero(st); + while (1) { + unwind_frame(frame); + if (!INKERNEL((vm_offset_t)frame->fp) || + !INKERNEL((vm_offset_t)frame->pc)) + break; + if (stack_put(st, frame->pc) == -1) + break; + } +} + +int +stack_save_td(struct stack *st, struct thread *td) +{ + struct unwind_state frame; + + THREAD_LOCK_ASSERT(td, MA_OWNED); + KASSERT(!TD_IS_SWAPPED(td), + ("stack_save_td: thread %p is swapped", td)); + + if (TD_IS_RUNNING(td)) + return (EOPNOTSUPP); + + frame.sp = td->td_pcb->pcb_sp; + frame.fp = td->td_pcb->pcb_x[29]; + frame.pc = td->td_pcb->pcb_x[30]; + + stack_capture(st, &frame); + return (0); +} + +void +stack_save(struct stack *st) +{ + struct unwind_state frame; + uint64_t sp; + + __asm __volatile("mov %0, sp" : "=&r" (sp)); + + frame.sp = sp; + frame.fp = (uint64_t)__builtin_frame_address(0); + frame.pc = (uint64_t)stack_save; + + stack_capture(st, &frame); +} diff --git a/sys/arm64/arm64/support.S b/sys/arm64/arm64/support.S new file mode 100644 index 000000000000..c5aba58c95f1 --- /dev/null +++ b/sys/arm64/arm64/support.S @@ -0,0 +1,290 @@ +/*- + * Copyright (c) 2014 Andrew Turner + * Copyright (c) 2014-2015 The FreeBSD Foundation + * All rights reserved. + * + * Portions of this software were developed by Andrew Turner + * under sponsorship from the FreeBSD Foundation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include <machine/asm.h> +__FBSDID("$FreeBSD$"); + +#include <machine/setjmp.h> +#include <machine/param.h> +#include <machine/vmparam.h> + +#include "assym.inc" + +/* + * One of the fu* or su* functions failed, return -1. + */ +ENTRY(fsu_fault) + SET_FAULT_HANDLER(xzr, x1) /* Reset the handler function */ + EXIT_USER_ACCESS_CHECK(w0, x1) +fsu_fault_nopcb: + mov x0, #-1 + ret +END(fsu_fault) + +/* + * int casueword32(volatile uint32_t *, uint32_t, uint32_t *, uint32_t) + */ +ENTRY(casueword32) + ldr x4, =(VM_MAXUSER_ADDRESS-3) + cmp x0, x4 + b.cs fsu_fault_nopcb + adr x6, fsu_fault /* Load the fault handler */ + mov w5, #1 + SET_FAULT_HANDLER(x6, x4) /* And set it */ + ENTER_USER_ACCESS(w6, x4) +1: ldxr w4, [x0] /* Load-exclusive the data */ + cmp w4, w1 /* Compare */ + b.ne 2f /* Not equal, exit */ + stxr w5, w3, [x0] /* Store the new data */ +2: EXIT_USER_ACCESS(w6) + SET_FAULT_HANDLER(xzr, x6) /* Reset the fault handler */ + str w4, [x2] /* Store the read data */ + mov w0, w5 /* Result same as store status */ + ret /* Return */ +END(casueword32) + +/* + * int casueword(volatile u_long *, u_long, u_long *, u_long) + */ +ENTRY(casueword) + ldr x4, =(VM_MAXUSER_ADDRESS-7) + cmp x0, x4 + b.cs fsu_fault_nopcb + adr x6, fsu_fault /* Load the fault handler */ + mov w5, #1 + SET_FAULT_HANDLER(x6, x4) /* And set it */ + ENTER_USER_ACCESS(w6, x4) +1: ldxr x4, [x0] /* Load-exclusive the data */ + cmp x4, x1 /* Compare */ + b.ne 2f /* Not equal, exit */ + stxr w5, x3, [x0] /* Store the new data */ +2: EXIT_USER_ACCESS(w6) + SET_FAULT_HANDLER(xzr, x6) /* Reset the fault handler */ + str x4, [x2] /* Store the read data */ + mov w0, w5 /* Result same as store status */ + ret /* Return */ +END(casueword) + +/* + * int fubyte(volatile const void *) + */ +ENTRY(fubyte) + ldr x1, =VM_MAXUSER_ADDRESS + cmp x0, x1 + b.cs fsu_fault_nopcb + adr x6, fsu_fault /* Load the fault handler */ + SET_FAULT_HANDLER(x6, x1) /* And set it */ + ldtrb w0, [x0] /* Try loading the data */ + SET_FAULT_HANDLER(xzr, x1) /* Reset the fault handler */ + ret /* Return */ +END(fubyte) + +/* + * int fuword(volatile const void *) + */ +ENTRY(fuword16) + ldr x1, =(VM_MAXUSER_ADDRESS-1) + cmp x0, x1 + b.cs fsu_fault_nopcb + adr x6, fsu_fault /* Load the fault handler */ + SET_FAULT_HANDLER(x6, x1) /* And set it */ + ldtrh w0, [x0] /* Try loading the data */ + SET_FAULT_HANDLER(xzr, x1) /* Reset the fault handler */ + ret /* Return */ +END(fuword16) + +/* + * int32_t fueword32(volatile const void *, int32_t *) + */ +ENTRY(fueword32) + ldr x2, =(VM_MAXUSER_ADDRESS-3) + cmp x0, x2 + b.cs fsu_fault_nopcb + adr x6, fsu_fault /* Load the fault handler */ + SET_FAULT_HANDLER(x6, x2) /* And set it */ + ldtr w0, [x0] /* Try loading the data */ + SET_FAULT_HANDLER(xzr, x2) /* Reset the fault handler */ + str w0, [x1] /* Save the data in kernel space */ + mov w0, #0 /* Success */ + ret /* Return */ +END(fueword32) + +/* + * long fueword(volatile const void *, int64_t *) + * int64_t fueword64(volatile const void *, int64_t *) + */ +ENTRY(fueword) +EENTRY(fueword64) + ldr x2, =(VM_MAXUSER_ADDRESS-7) + cmp x0, x2 + b.cs fsu_fault_nopcb + adr x6, fsu_fault /* Load the fault handler */ + SET_FAULT_HANDLER(x6, x2) /* And set it */ + ldtr x0, [x0] /* Try loading the data */ + SET_FAULT_HANDLER(xzr, x2) /* Reset the fault handler */ + str x0, [x1] /* Save the data in kernel space */ + mov x0, #0 /* Success */ + ret /* Return */ +EEND(fueword64) +END(fueword) + +/* + * int subyte(volatile void *, int) + */ +ENTRY(subyte) + ldr x2, =VM_MAXUSER_ADDRESS + cmp x0, x2 + b.cs fsu_fault_nopcb + adr x6, fsu_fault /* Load the fault handler */ + SET_FAULT_HANDLER(x6, x2) /* And set it */ + sttrb w1, [x0] /* Try storing the data */ + SET_FAULT_HANDLER(xzr, x2) /* Reset the fault handler */ + mov x0, #0 /* Success */ + ret /* Return */ +END(subyte) + +/* + * int suword16(volatile void *, int) + */ +ENTRY(suword16) + ldr x2, =(VM_MAXUSER_ADDRESS-1) + cmp x0, x2 + b.cs fsu_fault_nopcb + adr x6, fsu_fault /* Load the fault handler */ + SET_FAULT_HANDLER(x6, x2) /* And set it */ + sttrh w1, [x0] /* Try storing the data */ + SET_FAULT_HANDLER(xzr, x2) /* Reset the fault handler */ + mov x0, #0 /* Success */ + ret /* Return */ +END(suword16) + +/* + * int suword32(volatile void *, int) + */ +ENTRY(suword32) + ldr x2, =(VM_MAXUSER_ADDRESS-3) + cmp x0, x2 + b.cs fsu_fault_nopcb + adr x6, fsu_fault /* Load the fault handler */ + SET_FAULT_HANDLER(x6, x2) /* And set it */ + sttr w1, [x0] /* Try storing the data */ + SET_FAULT_HANDLER(xzr, x2) /* Reset the fault handler */ + mov x0, #0 /* Success */ + ret /* Return */ +END(suword32) + +/* + * int suword(volatile void *, long) + */ +ENTRY(suword) +EENTRY(suword64) + ldr x2, =(VM_MAXUSER_ADDRESS-7) + cmp x0, x2 + b.cs fsu_fault_nopcb + adr x6, fsu_fault /* Load the fault handler */ + SET_FAULT_HANDLER(x6, x2) /* And set it */ + sttr x1, [x0] /* Try storing the data */ + SET_FAULT_HANDLER(xzr, x2) /* Reset the fault handler */ + mov x0, #0 /* Success */ + ret /* Return */ +EEND(suword64) +END(suword) + +ENTRY(setjmp) + /* Store the stack pointer */ + mov x8, sp + str x8, [x0], #8 + + /* Store the general purpose registers and lr */ + stp x19, x20, [x0], #16 + stp x21, x22, [x0], #16 + stp x23, x24, [x0], #16 + stp x25, x26, [x0], #16 + stp x27, x28, [x0], #16 + stp x29, lr, [x0], #16 + + /* Return value */ + mov x0, #0 + ret +END(setjmp) + +ENTRY(longjmp) + /* Restore the stack pointer */ + ldr x8, [x0], #8 + mov sp, x8 + + /* Restore the general purpose registers and lr */ + ldp x19, x20, [x0], #16 + ldp x21, x22, [x0], #16 + ldp x23, x24, [x0], #16 + ldp x25, x26, [x0], #16 + ldp x27, x28, [x0], #16 + ldp x29, lr, [x0], #16 + + /* Load the return value */ + mov x0, x1 + ret +END(longjmp) + +/* + * pagezero, simple implementation + */ +ENTRY(pagezero_simple) + add x1, x0, #PAGE_SIZE + +1: + stp xzr, xzr, [x0], #0x10 + stp xzr, xzr, [x0], #0x10 + stp xzr, xzr, [x0], #0x10 + stp xzr, xzr, [x0], #0x10 + cmp x0, x1 + b.ne 1b + ret + +END(pagezero_simple) + +/* + * pagezero, cache assisted + */ +ENTRY(pagezero_cache) + add x1, x0, #PAGE_SIZE + + ldr x2, =dczva_line_size + ldr x2, [x2] + +1: + dc zva, x0 + add x0, x0, x2 + cmp x0, x1 + b.ne 1b + ret + +END(pagezero_cache) diff --git a/sys/arm64/arm64/swtch.S b/sys/arm64/arm64/swtch.S new file mode 100644 index 000000000000..144cc0873f68 --- /dev/null +++ b/sys/arm64/arm64/swtch.S @@ -0,0 +1,292 @@ +/*- + * Copyright (c) 2014 Andrew Turner + * Copyright (c) 2014 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Andrew Turner under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include "assym.inc" +#include "opt_kstack_pages.h" +#include "opt_sched.h" + +#include <machine/asm.h> + +__FBSDID("$FreeBSD$"); + +.macro clear_step_flag pcbflags, tmp + tbz \pcbflags, #PCB_SINGLE_STEP_SHIFT, 999f + mrs \tmp, mdscr_el1 + bic \tmp, \tmp, #1 + msr mdscr_el1, \tmp + isb +999: +.endm + +.macro set_step_flag pcbflags, tmp + tbz \pcbflags, #PCB_SINGLE_STEP_SHIFT, 999f + mrs \tmp, mdscr_el1 + orr \tmp, \tmp, #1 + msr mdscr_el1, \tmp + isb +999: +.endm + +/* + * void cpu_throw(struct thread *old, struct thread *new) + */ +ENTRY(cpu_throw) + /* Of old == NULL skip disabling stepping */ + cbz x0, 1f + + /* If we were single stepping, disable it */ + ldr x4, [x0, #TD_PCB] + ldr w5, [x4, #PCB_FLAGS] + clear_step_flag w5, x6 +1: + +#ifdef VFP + /* Backup the new thread pointer around a call to C code */ + mov x19, x0 + mov x20, x1 + bl vfp_discard + mov x1, x20 + mov x0, x19 +#endif + + bl pmap_switch + mov x4, x0 + + /* If we are single stepping, enable it */ + ldr w5, [x4, #PCB_FLAGS] + set_step_flag w5, x6 + + /* Restore the registers */ + ldp x5, x6, [x4, #PCB_SP] + mov sp, x5 + msr tpidr_el0, x6 + ldr x6, [x4, #PCB_TPIDRRO] + msr tpidrro_el0, x6 + ldp x8, x9, [x4, #PCB_REGS + 8 * 8] + ldp x10, x11, [x4, #PCB_REGS + 10 * 8] + ldp x12, x13, [x4, #PCB_REGS + 12 * 8] + ldp x14, x15, [x4, #PCB_REGS + 14 * 8] + ldp x16, x17, [x4, #PCB_REGS + 16 * 8] + ldr x19, [x4, #PCB_REGS + 19 * 8] + ldp x20, x21, [x4, #PCB_REGS + 20 * 8] + ldp x22, x23, [x4, #PCB_REGS + 22 * 8] + ldp x24, x25, [x4, #PCB_REGS + 24 * 8] + ldp x26, x27, [x4, #PCB_REGS + 26 * 8] + ldp x28, x29, [x4, #PCB_REGS + 28 * 8] + ldr x30, [x4, #PCB_REGS + 30 * 8] + + ret +END(cpu_throw) + +/* + * void cpu_switch(struct thread *old, struct thread *new, struct mtx *mtx) + * + * x0 = old + * x1 = new + * x2 = mtx + * x3 to x7, x16 and x17 are caller saved + */ +ENTRY(cpu_switch) + /* + * Save the old context. + */ + ldr x4, [x0, #TD_PCB] + + /* Store the callee-saved registers */ + stp x8, x9, [x4, #PCB_REGS + 8 * 8] + stp x10, x11, [x4, #PCB_REGS + 10 * 8] + stp x12, x13, [x4, #PCB_REGS + 12 * 8] + stp x14, x15, [x4, #PCB_REGS + 14 * 8] + stp x16, x17, [x4, #PCB_REGS + 16 * 8] + stp x18, x19, [x4, #PCB_REGS + 18 * 8] + stp x20, x21, [x4, #PCB_REGS + 20 * 8] + stp x22, x23, [x4, #PCB_REGS + 22 * 8] + stp x24, x25, [x4, #PCB_REGS + 24 * 8] + stp x26, x27, [x4, #PCB_REGS + 26 * 8] + stp x28, x29, [x4, #PCB_REGS + 28 * 8] + str x30, [x4, #PCB_REGS + 30 * 8] + /* And the old stack pointer */ + mov x5, sp + mrs x6, tpidrro_el0 + str x6, [x4, #PCB_TPIDRRO] + mrs x6, tpidr_el0 + stp x5, x6, [x4, #PCB_SP] + + /* If we were single stepping, disable it */ + ldr w5, [x4, #PCB_FLAGS] + clear_step_flag w5, x6 + + mov x19, x0 + mov x20, x1 + mov x21, x2 + +#ifdef VFP + /* Load the pcb address */ + mov x1, x4 + bl vfp_save_state + mov x1, x20 + mov x0, x19 +#endif + + bl pmap_switch + /* Move the new pcb out of the way */ + mov x4, x0 + + mov x2, x21 + mov x1, x20 + mov x0, x19 + + /* + * Release the old thread. + */ + stlr x2, [x0, #TD_LOCK] +#if defined(SCHED_ULE) && defined(SMP) + /* Spin if TD_LOCK points to a blocked_lock */ + ldr x2, =_C_LABEL(blocked_lock) +1: + ldar x3, [x1, #TD_LOCK] + cmp x3, x2 + b.eq 1b +#endif + + /* If we are single stepping, enable it */ + ldr w5, [x4, #PCB_FLAGS] + set_step_flag w5, x6 + + /* Restore the registers */ + ldp x5, x6, [x4, #PCB_SP] + mov sp, x5 + msr tpidr_el0, x6 + ldr x6, [x4, #PCB_TPIDRRO] + msr tpidrro_el0, x6 + ldp x8, x9, [x4, #PCB_REGS + 8 * 8] + ldp x10, x11, [x4, #PCB_REGS + 10 * 8] + ldp x12, x13, [x4, #PCB_REGS + 12 * 8] + ldp x14, x15, [x4, #PCB_REGS + 14 * 8] + ldp x16, x17, [x4, #PCB_REGS + 16 * 8] + ldr x19, [x4, #PCB_REGS + 19 * 8] + ldp x20, x21, [x4, #PCB_REGS + 20 * 8] + ldp x22, x23, [x4, #PCB_REGS + 22 * 8] + ldp x24, x25, [x4, #PCB_REGS + 24 * 8] + ldp x26, x27, [x4, #PCB_REGS + 26 * 8] + ldp x28, x29, [x4, #PCB_REGS + 28 * 8] + ldr x30, [x4, #PCB_REGS + 30 * 8] + + str xzr, [x4, #PCB_REGS + 18 * 8] + ret +.Lcpu_switch_panic_str: + .asciz "cpu_switch: %p\0" +END(cpu_switch) + +ENTRY(fork_trampoline) + mov x0, x8 + mov x1, x9 + mov x2, sp + mov fp, #0 /* Stack traceback stops here. */ + bl _C_LABEL(fork_exit) + + /* Restore the registers other than x0 and x1 */ + ldp x2, x3, [sp, #TF_X + 2 * 8] + ldp x4, x5, [sp, #TF_X + 4 * 8] + ldp x6, x7, [sp, #TF_X + 6 * 8] + ldp x8, x9, [sp, #TF_X + 8 * 8] + ldp x10, x11, [sp, #TF_X + 10 * 8] + ldp x12, x13, [sp, #TF_X + 12 * 8] + ldp x14, x15, [sp, #TF_X + 14 * 8] + ldp x16, x17, [sp, #TF_X + 16 * 8] + ldr x19, [sp, #TF_X + 19 * 8] + ldp x20, x21, [sp, #TF_X + 20 * 8] + ldp x22, x23, [sp, #TF_X + 22 * 8] + ldp x24, x25, [sp, #TF_X + 24 * 8] + ldp x26, x27, [sp, #TF_X + 26 * 8] + ldp x28, x29, [sp, #TF_X + 28 * 8] + + /* + * Disable interrupts to avoid + * overwriting spsr_el1 and sp_el0 by an IRQ exception. + */ + msr daifset, #2 + + /* Restore sp and lr */ + ldp x0, x1, [sp, #TF_SP] + msr sp_el0, x0 + mov lr, x1 + + /* Restore elr and spsr */ + ldp x0, x1, [sp, #TF_ELR] + msr elr_el1, x0 + msr spsr_el1, x1 + + /* Finally x0 and x1 */ + ldp x0, x1, [sp, #TF_X + 0 * 8] + ldr x18, [sp, #TF_X + 18 * 8] + + /* + * No need for interrupts reenabling since PSR + * will be set to the desired value anyway. + */ + ERET + +END(fork_trampoline) + +ENTRY(savectx) + /* Store the callee-saved registers */ + stp x8, x9, [x0, #PCB_REGS + 8 * 8] + stp x10, x11, [x0, #PCB_REGS + 10 * 8] + stp x12, x13, [x0, #PCB_REGS + 12 * 8] + stp x14, x15, [x0, #PCB_REGS + 14 * 8] + stp x16, x17, [x0, #PCB_REGS + 16 * 8] + stp x18, x19, [x0, #PCB_REGS + 18 * 8] + stp x20, x21, [x0, #PCB_REGS + 20 * 8] + stp x22, x23, [x0, #PCB_REGS + 22 * 8] + stp x24, x25, [x0, #PCB_REGS + 24 * 8] + stp x26, x27, [x0, #PCB_REGS + 26 * 8] + stp x28, x29, [x0, #PCB_REGS + 28 * 8] + str x30, [x0, #PCB_REGS + 30 * 8] + /* And the old stack pointer */ + mov x5, sp + mrs x6, tpidrro_el0 + str x6, [x0, #PCB_TPIDRRO] + mrs x6, tpidr_el0 + stp x5, x6, [x0, #PCB_SP] + + /* Store the VFP registers */ +#ifdef VFP + mov x28, lr + mov x1, x0 /* move pcb to the correct register */ + mov x0, xzr /* td = NULL */ + bl vfp_save_state + mov lr, x28 +#endif + + ret +END(savectx) + diff --git a/sys/arm64/arm64/sys_machdep.c b/sys/arm64/arm64/sys_machdep.c new file mode 100644 index 000000000000..dfb2c4ad85b8 --- /dev/null +++ b/sys/arm64/arm64/sys_machdep.c @@ -0,0 +1,45 @@ +/*- + * Copyright (c) 2015 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Andrew Turner under + * sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/sysproto.h> + +#include <machine/sysarch.h> + +int +sysarch(struct thread *td, struct sysarch_args *uap) +{ + + return (ENOTSUP); +} diff --git a/sys/arm64/arm64/trap.c b/sys/arm64/arm64/trap.c new file mode 100644 index 000000000000..9856a35d0010 --- /dev/null +++ b/sys/arm64/arm64/trap.c @@ -0,0 +1,567 @@ +/*- + * Copyright (c) 2014 Andrew Turner + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/ktr.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/proc.h> +#include <sys/ptrace.h> +#include <sys/syscall.h> +#include <sys/sysent.h> +#ifdef KDB +#include <sys/kdb.h> +#endif + +#include <vm/vm.h> +#include <vm/pmap.h> +#include <vm/vm_kern.h> +#include <vm/vm_map.h> +#include <vm/vm_param.h> +#include <vm/vm_extern.h> + +#include <machine/frame.h> +#include <machine/pcb.h> +#include <machine/pcpu.h> +#include <machine/undefined.h> + +#ifdef KDTRACE_HOOKS +#include <sys/dtrace_bsd.h> +#endif + +#ifdef VFP +#include <machine/vfp.h> +#endif + +#ifdef KDB +#include <machine/db_machdep.h> +#endif + +#ifdef DDB +#include <ddb/db_output.h> +#endif + +extern register_t fsu_intr_fault; + +/* Called from exception.S */ +void do_el1h_sync(struct thread *, struct trapframe *); +void do_el0_sync(struct thread *, struct trapframe *); +void do_el0_error(struct trapframe *); +void do_serror(struct trapframe *); +void unhandled_exception(struct trapframe *); + +static void print_registers(struct trapframe *frame); + +int (*dtrace_invop_jump_addr)(struct trapframe *); + +typedef void (abort_handler)(struct thread *, struct trapframe *, uint64_t, + uint64_t, int); + +static abort_handler align_abort; +static abort_handler data_abort; + +static abort_handler *abort_handlers[] = { + [ISS_DATA_DFSC_TF_L0] = data_abort, + [ISS_DATA_DFSC_TF_L1] = data_abort, + [ISS_DATA_DFSC_TF_L2] = data_abort, + [ISS_DATA_DFSC_TF_L3] = data_abort, + [ISS_DATA_DFSC_AFF_L1] = data_abort, + [ISS_DATA_DFSC_AFF_L2] = data_abort, + [ISS_DATA_DFSC_AFF_L3] = data_abort, + [ISS_DATA_DFSC_PF_L1] = data_abort, + [ISS_DATA_DFSC_PF_L2] = data_abort, + [ISS_DATA_DFSC_PF_L3] = data_abort, + [ISS_DATA_DFSC_ALIGN] = align_abort, +}; + +static __inline void +call_trapsignal(struct thread *td, int sig, int code, void *addr, int trapno) +{ + ksiginfo_t ksi; + + ksiginfo_init_trap(&ksi); + ksi.ksi_signo = sig; + ksi.ksi_code = code; + ksi.ksi_addr = addr; + ksi.ksi_trapno = trapno; + trapsignal(td, &ksi); +} + +int +cpu_fetch_syscall_args(struct thread *td) +{ + struct proc *p; + register_t *ap; + struct syscall_args *sa; + int nap; + + nap = 8; + p = td->td_proc; + ap = td->td_frame->tf_x; + sa = &td->td_sa; + + sa->code = td->td_frame->tf_x[8]; + + if (sa->code == SYS_syscall || sa->code == SYS___syscall) { + sa->code = *ap++; + nap--; + } + + if (sa->code >= p->p_sysent->sv_size) + sa->callp = &p->p_sysent->sv_table[0]; + else + sa->callp = &p->p_sysent->sv_table[sa->code]; + + sa->narg = sa->callp->sy_narg; + memcpy(sa->args, ap, nap * sizeof(register_t)); + if (sa->narg > nap) + panic("ARM64TODO: Could we have more than 8 args?"); + + td->td_retval[0] = 0; + td->td_retval[1] = 0; + + return (0); +} + +#include "../../kern/subr_syscall.c" + +static void +svc_handler(struct thread *td, struct trapframe *frame) +{ + + if ((frame->tf_esr & ESR_ELx_ISS_MASK) == 0) { + syscallenter(td); + syscallret(td); + } else { + call_trapsignal(td, SIGILL, ILL_ILLOPN, (void *)frame->tf_elr, + ESR_ELx_EXCEPTION(frame->tf_esr)); + userret(td, frame); + } +} + +static void +align_abort(struct thread *td, struct trapframe *frame, uint64_t esr, + uint64_t far, int lower) +{ + if (!lower) { + print_registers(frame); + printf(" far: %16lx\n", far); + printf(" esr: %.8lx\n", esr); + panic("Misaligned access from kernel space!"); + } + + call_trapsignal(td, SIGBUS, BUS_ADRALN, (void *)frame->tf_elr, + ESR_ELx_EXCEPTION(frame->tf_esr)); + userret(td, frame); +} + +static void +data_abort(struct thread *td, struct trapframe *frame, uint64_t esr, + uint64_t far, int lower) +{ + struct vm_map *map; + struct proc *p; + struct pcb *pcb; + vm_prot_t ftype; + int error, sig, ucode; +#ifdef KDB + bool handled; +#endif + + /* + * According to the ARMv8-A rev. A.g, B2.10.5 "Load-Exclusive + * and Store-Exclusive instruction usage restrictions", state + * of the exclusive monitors after data abort exception is unknown. + */ + clrex(); + +#ifdef KDB + if (kdb_active) { + kdb_reenter(); + return; + } +#endif + + pcb = td->td_pcb; + p = td->td_proc; + if (lower) + map = &p->p_vmspace->vm_map; + else { + intr_enable(); + + /* The top bit tells us which range to use */ + if (far >= VM_MAXUSER_ADDRESS) { + map = kernel_map; + } else { + map = &p->p_vmspace->vm_map; + if (map == NULL) + map = kernel_map; + } + } + + /* + * Try to handle translation, access flag, and permission faults. + * Translation faults may occur as a result of the required + * break-before-make sequence used when promoting or demoting + * superpages. Such faults must not occur while holding the pmap lock, + * or pmap_fault() will recurse on that lock. + */ + if ((lower || map == kernel_map || pcb->pcb_onfault != 0) && + pmap_fault(map->pmap, esr, far) == KERN_SUCCESS) + return; + + KASSERT(td->td_md.md_spinlock_count == 0, + ("data abort with spinlock held")); + if (td->td_critnest != 0 || WITNESS_CHECK(WARN_SLEEPOK | + WARN_GIANTOK, NULL, "Kernel page fault") != 0) { + print_registers(frame); + printf(" far: %16lx\n", far); + printf(" esr: %.8lx\n", esr); + panic("data abort in critical section or under mutex"); + } + + switch (ESR_ELx_EXCEPTION(esr)) { + case EXCP_INSN_ABORT: + case EXCP_INSN_ABORT_L: + ftype = VM_PROT_EXECUTE; + break; + default: + ftype = (esr & ISS_DATA_WnR) == 0 ? VM_PROT_READ : + VM_PROT_READ | VM_PROT_WRITE; + break; + } + + /* Fault in the page. */ + error = vm_fault_trap(map, far, ftype, VM_FAULT_NORMAL, &sig, &ucode); + if (error != KERN_SUCCESS) { + if (lower) { + call_trapsignal(td, sig, ucode, (void *)far, + ESR_ELx_EXCEPTION(esr)); + } else { + if (td->td_intr_nesting_level == 0 && + pcb->pcb_onfault != 0) { + frame->tf_x[0] = error; + frame->tf_elr = pcb->pcb_onfault; + return; + } + + printf("Fatal data abort:\n"); + print_registers(frame); + printf(" far: %16lx\n", far); + printf(" esr: %.8lx\n", esr); + +#ifdef KDB + if (debugger_on_trap) { + kdb_why = KDB_WHY_TRAP; + handled = kdb_trap(ESR_ELx_EXCEPTION(esr), 0, + frame); + kdb_why = KDB_WHY_UNSET; + if (handled) + return; + } +#endif + panic("vm_fault failed: %lx", frame->tf_elr); + } + } + + if (lower) + userret(td, frame); +} + +static void +print_registers(struct trapframe *frame) +{ + u_int reg; + + for (reg = 0; reg < nitems(frame->tf_x); reg++) { + printf(" %sx%d: %16lx\n", (reg < 10) ? " " : "", reg, + frame->tf_x[reg]); + } + printf(" sp: %16lx\n", frame->tf_sp); + printf(" lr: %16lx\n", frame->tf_lr); + printf(" elr: %16lx\n", frame->tf_elr); + printf("spsr: %8x\n", frame->tf_spsr); +} + +void +do_el1h_sync(struct thread *td, struct trapframe *frame) +{ + struct trapframe *oframe; + uint32_t exception; + uint64_t esr, far; + int dfsc; + + /* Read the esr register to get the exception details */ + esr = frame->tf_esr; + exception = ESR_ELx_EXCEPTION(esr); + +#ifdef KDTRACE_HOOKS + if (dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, exception)) + return; +#endif + + CTR4(KTR_TRAP, + "do_el1_sync: curthread: %p, esr %lx, elr: %lx, frame: %p", td, + esr, frame->tf_elr, frame); + + oframe = td->td_frame; + + switch (exception) { + case EXCP_BRK: + case EXCP_WATCHPT_EL1: + case EXCP_SOFTSTP_EL1: + break; + default: + td->td_frame = frame; + break; + } + + switch (exception) { + case EXCP_FP_SIMD: + case EXCP_TRAP_FP: +#ifdef VFP + if ((td->td_pcb->pcb_fpflags & PCB_FP_KERN) != 0) { + vfp_restore_state(); + } else +#endif + { + print_registers(frame); + printf(" esr: %.8lx\n", esr); + panic("VFP exception in the kernel"); + } + break; + case EXCP_INSN_ABORT: + case EXCP_DATA_ABORT: + far = READ_SPECIALREG(far_el1); + dfsc = esr & ISS_DATA_DFSC_MASK; + if (dfsc < nitems(abort_handlers) && + abort_handlers[dfsc] != NULL) { + abort_handlers[dfsc](td, frame, esr, far, 0); + } else { + print_registers(frame); + printf(" far: %16lx\n", far); + printf(" esr: %.8lx\n", esr); + panic("Unhandled EL1 %s abort: %x", + exception == EXCP_INSN_ABORT ? "instruction" : + "data", dfsc); + } + break; + case EXCP_BRK: +#ifdef KDTRACE_HOOKS + if ((esr & ESR_ELx_ISS_MASK) == 0x40d && \ + dtrace_invop_jump_addr != 0) { + dtrace_invop_jump_addr(frame); + break; + } +#endif +#ifdef KDB + kdb_trap(exception, 0, + (td->td_frame != NULL) ? td->td_frame : frame); +#else + panic("No debugger in kernel.\n"); +#endif + frame->tf_elr += 4; + break; + case EXCP_WATCHPT_EL1: + case EXCP_SOFTSTP_EL1: +#ifdef KDB + kdb_trap(exception, 0, + (td->td_frame != NULL) ? td->td_frame : frame); +#else + panic("No debugger in kernel.\n"); +#endif + break; + case EXCP_UNKNOWN: + if (undef_insn(1, frame)) + break; + /* FALLTHROUGH */ + default: + print_registers(frame); + printf(" far: %16lx\n", READ_SPECIALREG(far_el1)); + panic("Unknown kernel exception %x esr_el1 %lx\n", exception, + esr); + } + + td->td_frame = oframe; +} + +void +do_el0_sync(struct thread *td, struct trapframe *frame) +{ + pcpu_bp_harden bp_harden; + uint32_t exception; + uint64_t esr, far; + int dfsc; + + /* Check we have a sane environment when entering from userland */ + KASSERT((uintptr_t)get_pcpu() >= VM_MIN_KERNEL_ADDRESS, + ("Invalid pcpu address from userland: %p (tpidr %lx)", + get_pcpu(), READ_SPECIALREG(tpidr_el1))); + + esr = frame->tf_esr; + exception = ESR_ELx_EXCEPTION(esr); + switch (exception) { + case EXCP_INSN_ABORT_L: + far = READ_SPECIALREG(far_el1); + + /* + * Userspace may be trying to train the branch predictor to + * attack the kernel. If we are on a CPU affected by this + * call the handler to clear the branch predictor state. + */ + if (far > VM_MAXUSER_ADDRESS) { + bp_harden = PCPU_GET(bp_harden); + if (bp_harden != NULL) + bp_harden(); + } + break; + case EXCP_UNKNOWN: + case EXCP_DATA_ABORT_L: + case EXCP_DATA_ABORT: + far = READ_SPECIALREG(far_el1); + break; + } + intr_enable(); + + CTR4(KTR_TRAP, + "do_el0_sync: curthread: %p, esr %lx, elr: %lx, frame: %p", td, esr, + frame->tf_elr, frame); + + switch (exception) { + case EXCP_FP_SIMD: + case EXCP_TRAP_FP: +#ifdef VFP + vfp_restore_state(); +#else + panic("VFP exception in userland"); +#endif + break; + case EXCP_SVC32: + case EXCP_SVC64: + svc_handler(td, frame); + break; + case EXCP_INSN_ABORT_L: + case EXCP_DATA_ABORT_L: + case EXCP_DATA_ABORT: + dfsc = esr & ISS_DATA_DFSC_MASK; + if (dfsc < nitems(abort_handlers) && + abort_handlers[dfsc] != NULL) + abort_handlers[dfsc](td, frame, esr, far, 1); + else { + print_registers(frame); + printf(" far: %16lx\n", far); + printf(" esr: %.8lx\n", esr); + panic("Unhandled EL0 %s abort: %x", + exception == EXCP_INSN_ABORT_L ? "instruction" : + "data", dfsc); + } + break; + case EXCP_UNKNOWN: + if (!undef_insn(0, frame)) + call_trapsignal(td, SIGILL, ILL_ILLTRP, (void *)far, + exception); + userret(td, frame); + break; + case EXCP_SP_ALIGN: + call_trapsignal(td, SIGBUS, BUS_ADRALN, (void *)frame->tf_sp, + exception); + userret(td, frame); + break; + case EXCP_PC_ALIGN: + call_trapsignal(td, SIGBUS, BUS_ADRALN, (void *)frame->tf_elr, + exception); + userret(td, frame); + break; + case EXCP_BRKPT_EL0: + case EXCP_BRK: + call_trapsignal(td, SIGTRAP, TRAP_BRKPT, (void *)frame->tf_elr, + exception); + userret(td, frame); + break; + case EXCP_MSR: + call_trapsignal(td, SIGILL, ILL_PRVOPC, (void *)frame->tf_elr, + exception); + userret(td, frame); + break; + case EXCP_SOFTSTP_EL0: + td->td_frame->tf_spsr &= ~PSR_SS; + td->td_pcb->pcb_flags &= ~PCB_SINGLE_STEP; + WRITE_SPECIALREG(mdscr_el1, + READ_SPECIALREG(mdscr_el1) & ~DBG_MDSCR_SS); + call_trapsignal(td, SIGTRAP, TRAP_TRACE, + (void *)frame->tf_elr, exception); + userret(td, frame); + break; + default: + call_trapsignal(td, SIGBUS, BUS_OBJERR, (void *)frame->tf_elr, + exception); + userret(td, frame); + break; + } + + KASSERT((td->td_pcb->pcb_fpflags & ~PCB_FP_USERMASK) == 0, + ("Kernel VFP flags set while entering userspace")); + KASSERT( + td->td_pcb->pcb_fpusaved == &td->td_pcb->pcb_fpustate, + ("Kernel VFP state in use when entering userspace")); +} + +/* + * TODO: We will need to handle these later when we support ARMv8.2 RAS. + */ +void +do_serror(struct trapframe *frame) +{ + uint64_t esr, far; + + far = READ_SPECIALREG(far_el1); + esr = frame->tf_esr; + + print_registers(frame); + printf(" far: %16lx\n", far); + printf(" esr: %.8lx\n", esr); + panic("Unhandled System Error"); +} + +void +unhandled_exception(struct trapframe *frame) +{ + uint64_t esr, far; + + far = READ_SPECIALREG(far_el1); + esr = frame->tf_esr; + + print_registers(frame); + printf(" far: %16lx\n", far); + printf(" esr: %.8lx\n", esr); + panic("Unhandled exception"); +} diff --git a/sys/arm64/arm64/uio_machdep.c b/sys/arm64/arm64/uio_machdep.c new file mode 100644 index 000000000000..11ed239fa9dd --- /dev/null +++ b/sys/arm64/arm64/uio_machdep.c @@ -0,0 +1,134 @@ +/*- + * Copyright (c) 2004 Alan L. Cox <alc@cs.rice.edu> + * Copyright (c) 1982, 1986, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * (c) UNIX System Laboratories, Inc. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)kern_subr.c 8.3 (Berkeley) 1/21/94 + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/proc.h> +#include <sys/systm.h> +#include <sys/uio.h> + +#include <vm/vm.h> +#include <vm/vm_page.h> + +#include <machine/vmparam.h> + +/* + * Implement uiomove(9) from physical memory using the direct map to + * avoid the creation and destruction of ephemeral mappings. + */ +int +uiomove_fromphys(vm_page_t ma[], vm_offset_t offset, int n, struct uio *uio) +{ + struct thread *td = curthread; + struct iovec *iov; + void *cp; + vm_offset_t page_offset, vaddr; + size_t cnt; + int error = 0; + int save = 0; + boolean_t mapped; + + KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, + ("uiomove_fromphys: mode")); + KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, + ("uiomove_fromphys proc")); + save = td->td_pflags & TDP_DEADLKTREAT; + td->td_pflags |= TDP_DEADLKTREAT; + mapped = FALSE; + while (n > 0 && uio->uio_resid) { + iov = uio->uio_iov; + cnt = iov->iov_len; + if (cnt == 0) { + uio->uio_iov++; + uio->uio_iovcnt--; + continue; + } + if (cnt > n) + cnt = n; + page_offset = offset & PAGE_MASK; + cnt = min(cnt, PAGE_SIZE - page_offset); + if (uio->uio_segflg != UIO_NOCOPY) { + mapped = pmap_map_io_transient( + &ma[offset >> PAGE_SHIFT], &vaddr, 1, TRUE); + cp = (char *)vaddr + page_offset; + } + switch (uio->uio_segflg) { + case UIO_USERSPACE: + maybe_yield(); + if (uio->uio_rw == UIO_READ) + error = copyout(cp, iov->iov_base, cnt); + else + error = copyin(iov->iov_base, cp, cnt); + if (error) + goto out; + break; + case UIO_SYSSPACE: + if (uio->uio_rw == UIO_READ) + bcopy(cp, iov->iov_base, cnt); + else + bcopy(iov->iov_base, cp, cnt); + break; + case UIO_NOCOPY: + break; + } + if (__predict_false(mapped)) { + pmap_unmap_io_transient(&ma[offset >> PAGE_SHIFT], + &vaddr, 1, TRUE); + mapped = FALSE; + } + iov->iov_base = (char *)iov->iov_base + cnt; + iov->iov_len -= cnt; + uio->uio_resid -= cnt; + uio->uio_offset += cnt; + offset += cnt; + n -= cnt; + } +out: + if (__predict_false(mapped)) { + panic("ARM64TODO: uiomove_fromphys"); + pmap_unmap_io_transient(&ma[offset >> PAGE_SHIFT], &vaddr, 1, + TRUE); + } + if (save == 0) + td->td_pflags &= ~TDP_DEADLKTREAT; + return (error); +} diff --git a/sys/arm64/arm64/uma_machdep.c b/sys/arm64/arm64/uma_machdep.c new file mode 100644 index 000000000000..4ab256ed2179 --- /dev/null +++ b/sys/arm64/arm64/uma_machdep.c @@ -0,0 +1,77 @@ +/*- + * Copyright (c) 2003 Alan L. Cox <alc@cs.rice.edu> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/mutex.h> +#include <sys/systm.h> +#include <sys/vmmeter.h> +#include <vm/vm.h> +#include <vm/vm_page.h> +#include <vm/vm_pageout.h> +#include <vm/uma.h> +#include <vm/uma_int.h> +#include <machine/md_var.h> +#include <machine/vmparam.h> + +void * +uma_small_alloc(uma_zone_t zone, vm_size_t bytes, int domain, u_int8_t *flags, + int wait) +{ + vm_page_t m; + vm_paddr_t pa; + void *va; + + *flags = UMA_SLAB_PRIV; + m = vm_page_alloc_domain(NULL, 0, domain, + malloc2vm_flags(wait) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); + if (m == NULL) + return (NULL); + pa = m->phys_addr; + if ((wait & M_NODUMP) == 0) + dump_add_page(pa); + va = (void *)PHYS_TO_DMAP(pa); + if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0) + bzero(va, PAGE_SIZE); + return (va); +} + +void +uma_small_free(void *mem, vm_size_t size, u_int8_t flags) +{ + vm_page_t m; + vm_paddr_t pa; + + pa = DMAP_TO_PHYS((vm_offset_t)mem); + dump_drop_page(pa); + m = PHYS_TO_VM_PAGE(pa); + vm_page_unwire_noq(m); + vm_page_free(m); +} diff --git a/sys/arm64/arm64/undefined.c b/sys/arm64/arm64/undefined.c new file mode 100644 index 000000000000..2cdb5f9a97fb --- /dev/null +++ b/sys/arm64/arm64/undefined.c @@ -0,0 +1,177 @@ +/*- + * Copyright (c) 2017 Andrew Turner + * All rights reserved. + * + * This software was developed by SRI International and the University of + * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-10-C-0237 + * ("CTSRD"), as part of the DARPA CRASH research programme. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/malloc.h> +#include <sys/proc.h> +#include <sys/queue.h> +#include <sys/signal.h> +#include <sys/signalvar.h> +#include <sys/sysent.h> + +#include <machine/frame.h> +#include <machine/undefined.h> +#include <machine/vmparam.h> + +MALLOC_DEFINE(M_UNDEF, "undefhandler", "Undefined instruction handler data"); + +struct undef_handler { + LIST_ENTRY(undef_handler) uh_link; + undef_handler_t uh_handler; +}; + +/* + * Create two undefined instruction handler lists, one for userspace, one for + * the kernel. This allows us to handle instructions that will trap + */ +LIST_HEAD(, undef_handler) undef_handlers[2]; + +/* + * Work around a bug in QEMU prior to 2.5.1 where reading unknown ID + * registers would raise an exception when they should return 0. + */ +static int +id_aa64mmfr2_handler(vm_offset_t va, uint32_t insn, struct trapframe *frame, + uint32_t esr) +{ + int reg; + +#define MRS_ID_AA64MMFR2_EL0_MASK (MRS_MASK | 0x000fffe0) +#define MRS_ID_AA64MMFR2_EL0_VALUE (MRS_VALUE | 0x00080740) + + /* mrs xn, id_aa64mfr2_el1 */ + if ((insn & MRS_ID_AA64MMFR2_EL0_MASK) == MRS_ID_AA64MMFR2_EL0_VALUE) { + reg = MRS_REGISTER(insn); + + frame->tf_elr += INSN_SIZE; + if (reg < nitems(frame->tf_x)) { + frame->tf_x[reg] = 0; + } else if (reg == 30) { + frame->tf_lr = 0; + } + /* If reg is 32 then write to xzr, i.e. do nothing */ + + return (1); + } + return (0); +} + +#ifdef COMPAT_FREEBSD32 +/* arm32 GDB breakpoints */ +#define GDB_BREAKPOINT 0xe6000011 +#define GDB5_BREAKPOINT 0xe7ffdefe +static int +gdb_trapper(vm_offset_t va, uint32_t insn, struct trapframe *frame, + uint32_t esr) +{ + struct thread *td = curthread; + + if (insn == GDB_BREAKPOINT || insn == GDB5_BREAKPOINT) { + if (SV_PROC_FLAG(td->td_proc, SV_ILP32) && + va < VM_MAXUSER_ADDRESS) { + ksiginfo_t ksi; + + ksiginfo_init_trap(&ksi); + ksi.ksi_signo = SIGTRAP; + ksi.ksi_code = TRAP_TRACE; + ksi.ksi_addr = (void *)va; + trapsignal(td, &ksi); + return 1; + } + } + return 0; +} +#endif + +void +undef_init(void) +{ + + LIST_INIT(&undef_handlers[0]); + LIST_INIT(&undef_handlers[1]); + + install_undef_handler(false, id_aa64mmfr2_handler); +#ifdef COMPAT_FREEBSD32 + install_undef_handler(true, gdb_trapper); +#endif +} + +void * +install_undef_handler(bool user, undef_handler_t func) +{ + struct undef_handler *uh; + + uh = malloc(sizeof(*uh), M_UNDEF, M_WAITOK); + uh->uh_handler = func; + LIST_INSERT_HEAD(&undef_handlers[user ? 0 : 1], uh, uh_link); + + return (uh); +} + +void +remove_undef_handler(void *handle) +{ + struct undef_handler *uh; + + uh = handle; + LIST_REMOVE(uh, uh_link); + free(handle, M_UNDEF); +} + +int +undef_insn(u_int el, struct trapframe *frame) +{ + struct undef_handler *uh; + uint32_t insn; + int ret; + + KASSERT(el < 2, ("Invalid exception level %u", el)); + + if (el == 0) { + ret = fueword32((uint32_t *)frame->tf_elr, &insn); + if (ret != 0) + panic("Unable to read userspace faulting instruction"); + } else { + insn = *(uint32_t *)frame->tf_elr; + } + + LIST_FOREACH(uh, &undef_handlers[el], uh_link) { + ret = uh->uh_handler(frame->tf_elr, insn, frame, frame->tf_esr); + if (ret) + return (1); + } + + return (0); +} diff --git a/sys/arm64/arm64/unwind.c b/sys/arm64/arm64/unwind.c new file mode 100644 index 000000000000..bef9c6fa31f1 --- /dev/null +++ b/sys/arm64/arm64/unwind.c @@ -0,0 +1,53 @@ +/*- + * Copyright (c) 2015 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Semihalf under + * the sponsorship of the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); +#include <sys/param.h> + +#include <machine/stack.h> +#include <machine/vmparam.h> + +int +unwind_frame(struct unwind_state *frame) +{ + uint64_t fp; + + fp = frame->fp; + if (!INKERNEL(fp)) + return (-1); + + frame->sp = fp + 0x10; + /* FP to previous frame (X29) */ + frame->fp = *(uint64_t *)(fp); + /* LR (X30) */ + frame->pc = *(uint64_t *)(fp + 8) - 4; + + return (0); +} diff --git a/sys/arm64/arm64/vfp.c b/sys/arm64/arm64/vfp.c new file mode 100644 index 000000000000..51fba7a8a300 --- /dev/null +++ b/sys/arm64/arm64/vfp.c @@ -0,0 +1,380 @@ +/*- + * Copyright (c) 2015-2016 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Andrew Turner under + * sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#ifdef VFP +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/malloc.h> +#include <sys/pcpu.h> +#include <sys/proc.h> + +#include <machine/armreg.h> +#include <machine/pcb.h> +#include <machine/vfp.h> + +/* Sanity check we can store all the VFP registers */ +CTASSERT(sizeof(((struct pcb *)0)->pcb_fpustate.vfp_regs) == 16 * 32); + +static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx", + "Kernel contexts for VFP state"); + +struct fpu_kern_ctx { + struct vfpstate *prev; +#define FPU_KERN_CTX_DUMMY 0x01 /* avoided save for the kern thread */ +#define FPU_KERN_CTX_INUSE 0x02 + uint32_t flags; + struct vfpstate state; +}; + +static void +vfp_enable(void) +{ + uint32_t cpacr; + + cpacr = READ_SPECIALREG(cpacr_el1); + cpacr = (cpacr & ~CPACR_FPEN_MASK) | CPACR_FPEN_TRAP_NONE; + WRITE_SPECIALREG(cpacr_el1, cpacr); + isb(); +} + +static void +vfp_disable(void) +{ + uint32_t cpacr; + + cpacr = READ_SPECIALREG(cpacr_el1); + cpacr = (cpacr & ~CPACR_FPEN_MASK) | CPACR_FPEN_TRAP_ALL1; + WRITE_SPECIALREG(cpacr_el1, cpacr); + isb(); +} + +/* + * Called when the thread is dying or when discarding the kernel VFP state. + * If the thread was the last to use the VFP unit mark it as unused to tell + * the kernel the fp state is unowned. Ensure the VFP unit is off so we get + * an exception on the next access. + */ +void +vfp_discard(struct thread *td) +{ + +#ifdef INVARIANTS + if (td != NULL) + CRITICAL_ASSERT(td); +#endif + if (PCPU_GET(fpcurthread) == td) + PCPU_SET(fpcurthread, NULL); + + vfp_disable(); +} + +static void +vfp_store(struct vfpstate *state) +{ + __int128_t *vfp_state; + uint64_t fpcr, fpsr; + + vfp_state = state->vfp_regs; + __asm __volatile( + "mrs %0, fpcr \n" + "mrs %1, fpsr \n" + "stp q0, q1, [%2, #16 * 0]\n" + "stp q2, q3, [%2, #16 * 2]\n" + "stp q4, q5, [%2, #16 * 4]\n" + "stp q6, q7, [%2, #16 * 6]\n" + "stp q8, q9, [%2, #16 * 8]\n" + "stp q10, q11, [%2, #16 * 10]\n" + "stp q12, q13, [%2, #16 * 12]\n" + "stp q14, q15, [%2, #16 * 14]\n" + "stp q16, q17, [%2, #16 * 16]\n" + "stp q18, q19, [%2, #16 * 18]\n" + "stp q20, q21, [%2, #16 * 20]\n" + "stp q22, q23, [%2, #16 * 22]\n" + "stp q24, q25, [%2, #16 * 24]\n" + "stp q26, q27, [%2, #16 * 26]\n" + "stp q28, q29, [%2, #16 * 28]\n" + "stp q30, q31, [%2, #16 * 30]\n" + : "=&r"(fpcr), "=&r"(fpsr) : "r"(vfp_state)); + + state->vfp_fpcr = fpcr; + state->vfp_fpsr = fpsr; +} + +static void +vfp_restore(struct vfpstate *state) +{ + __int128_t *vfp_state; + uint64_t fpcr, fpsr; + + vfp_state = state->vfp_regs; + fpcr = state->vfp_fpcr; + fpsr = state->vfp_fpsr; + + __asm __volatile( + "ldp q0, q1, [%2, #16 * 0]\n" + "ldp q2, q3, [%2, #16 * 2]\n" + "ldp q4, q5, [%2, #16 * 4]\n" + "ldp q6, q7, [%2, #16 * 6]\n" + "ldp q8, q9, [%2, #16 * 8]\n" + "ldp q10, q11, [%2, #16 * 10]\n" + "ldp q12, q13, [%2, #16 * 12]\n" + "ldp q14, q15, [%2, #16 * 14]\n" + "ldp q16, q17, [%2, #16 * 16]\n" + "ldp q18, q19, [%2, #16 * 18]\n" + "ldp q20, q21, [%2, #16 * 20]\n" + "ldp q22, q23, [%2, #16 * 22]\n" + "ldp q24, q25, [%2, #16 * 24]\n" + "ldp q26, q27, [%2, #16 * 26]\n" + "ldp q28, q29, [%2, #16 * 28]\n" + "ldp q30, q31, [%2, #16 * 30]\n" + "msr fpcr, %0 \n" + "msr fpsr, %1 \n" + : : "r"(fpcr), "r"(fpsr), "r"(vfp_state)); +} + +void +vfp_save_state(struct thread *td, struct pcb *pcb) +{ + uint32_t cpacr; + + KASSERT(pcb != NULL, ("NULL vfp pcb")); + KASSERT(td == NULL || td->td_pcb == pcb, ("Invalid vfp pcb")); + + /* + * savectx() will be called on panic with dumppcb as an argument, + * dumppcb doesn't have pcb_fpusaved set, so set it to save + * the VFP registers. + */ + if (pcb->pcb_fpusaved == NULL) + pcb->pcb_fpusaved = &pcb->pcb_fpustate; + + if (td == NULL) + td = curthread; + + critical_enter(); + /* + * Only store the registers if the VFP is enabled, + * i.e. return if we are trapping on FP access. + */ + cpacr = READ_SPECIALREG(cpacr_el1); + if ((cpacr & CPACR_FPEN_MASK) == CPACR_FPEN_TRAP_NONE) { + KASSERT(PCPU_GET(fpcurthread) == td, + ("Storing an invalid VFP state")); + + vfp_store(pcb->pcb_fpusaved); + dsb(ish); + vfp_disable(); + } + critical_exit(); +} + +void +vfp_restore_state(void) +{ + struct pcb *curpcb; + u_int cpu; + + critical_enter(); + + cpu = PCPU_GET(cpuid); + curpcb = curthread->td_pcb; + curpcb->pcb_fpflags |= PCB_FP_STARTED; + + vfp_enable(); + + /* + * If the previous thread on this cpu to use the VFP was not the + * current thread, or the current thread last used it on a different + * cpu we need to restore the old state. + */ + if (PCPU_GET(fpcurthread) != curthread || cpu != curpcb->pcb_vfpcpu) { + vfp_restore(curthread->td_pcb->pcb_fpusaved); + PCPU_SET(fpcurthread, curthread); + curpcb->pcb_vfpcpu = cpu; + } + + critical_exit(); +} + +void +vfp_init(void) +{ + uint64_t pfr; + + /* Check if there is a vfp unit present */ + pfr = READ_SPECIALREG(id_aa64pfr0_el1); + if ((pfr & ID_AA64PFR0_FP_MASK) == ID_AA64PFR0_FP_NONE) + return; + + /* Disable to be enabled when it's used */ + vfp_disable(); +} + +SYSINIT(vfp, SI_SUB_CPU, SI_ORDER_ANY, vfp_init, NULL); + +struct fpu_kern_ctx * +fpu_kern_alloc_ctx(u_int flags) +{ + struct fpu_kern_ctx *res; + size_t sz; + + sz = sizeof(struct fpu_kern_ctx); + res = malloc(sz, M_FPUKERN_CTX, ((flags & FPU_KERN_NOWAIT) ? + M_NOWAIT : M_WAITOK) | M_ZERO); + return (res); +} + +void +fpu_kern_free_ctx(struct fpu_kern_ctx *ctx) +{ + + KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) == 0, ("free'ing inuse ctx")); + /* XXXAndrew clear the memory ? */ + free(ctx, M_FPUKERN_CTX); +} + +void +fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags) +{ + struct pcb *pcb; + + pcb = td->td_pcb; + KASSERT((flags & FPU_KERN_NOCTX) != 0 || ctx != NULL, + ("ctx is required when !FPU_KERN_NOCTX")); + KASSERT(ctx == NULL || (ctx->flags & FPU_KERN_CTX_INUSE) == 0, + ("using inuse ctx")); + KASSERT((pcb->pcb_fpflags & PCB_FP_NOSAVE) == 0, + ("recursive fpu_kern_enter while in PCB_FP_NOSAVE state")); + + if ((flags & FPU_KERN_NOCTX) != 0) { + critical_enter(); + if (curthread == PCPU_GET(fpcurthread)) { + vfp_save_state(curthread, pcb); + } + PCPU_SET(fpcurthread, NULL); + + vfp_enable(); + pcb->pcb_fpflags |= PCB_FP_KERN | PCB_FP_NOSAVE | + PCB_FP_STARTED; + return; + } + + if ((flags & FPU_KERN_KTHR) != 0 && is_fpu_kern_thread(0)) { + ctx->flags = FPU_KERN_CTX_DUMMY | FPU_KERN_CTX_INUSE; + return; + } + /* + * Check either we are already using the VFP in the kernel, or + * the the saved state points to the default user space. + */ + KASSERT((pcb->pcb_fpflags & PCB_FP_KERN) != 0 || + pcb->pcb_fpusaved == &pcb->pcb_fpustate, + ("Mangled pcb_fpusaved %x %p %p", pcb->pcb_fpflags, pcb->pcb_fpusaved, &pcb->pcb_fpustate)); + ctx->flags = FPU_KERN_CTX_INUSE; + vfp_save_state(curthread, pcb); + ctx->prev = pcb->pcb_fpusaved; + pcb->pcb_fpusaved = &ctx->state; + pcb->pcb_fpflags |= PCB_FP_KERN; + pcb->pcb_fpflags &= ~PCB_FP_STARTED; + + return; +} + +int +fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx) +{ + struct pcb *pcb; + + pcb = td->td_pcb; + + if ((pcb->pcb_fpflags & PCB_FP_NOSAVE) != 0) { + KASSERT(ctx == NULL, ("non-null ctx after FPU_KERN_NOCTX")); + KASSERT(PCPU_GET(fpcurthread) == NULL, + ("non-NULL fpcurthread for PCB_FP_NOSAVE")); + CRITICAL_ASSERT(td); + + vfp_disable(); + pcb->pcb_fpflags &= ~(PCB_FP_NOSAVE | PCB_FP_STARTED); + critical_exit(); + } else { + KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) != 0, + ("FPU context not inuse")); + ctx->flags &= ~FPU_KERN_CTX_INUSE; + + if (is_fpu_kern_thread(0) && + (ctx->flags & FPU_KERN_CTX_DUMMY) != 0) + return (0); + KASSERT((ctx->flags & FPU_KERN_CTX_DUMMY) == 0, ("dummy ctx")); + critical_enter(); + vfp_discard(td); + critical_exit(); + pcb->pcb_fpflags &= ~PCB_FP_STARTED; + pcb->pcb_fpusaved = ctx->prev; + } + + if (pcb->pcb_fpusaved == &pcb->pcb_fpustate) { + pcb->pcb_fpflags &= ~PCB_FP_KERN; + } else { + KASSERT((pcb->pcb_fpflags & PCB_FP_KERN) != 0, + ("unpaired fpu_kern_leave")); + } + + return (0); +} + +int +fpu_kern_thread(u_int flags) +{ + struct pcb *pcb = curthread->td_pcb; + + KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0, + ("Only kthread may use fpu_kern_thread")); + KASSERT(pcb->pcb_fpusaved == &pcb->pcb_fpustate, + ("Mangled pcb_fpusaved")); + KASSERT((pcb->pcb_fpflags & PCB_FP_KERN) == 0, + ("Thread already setup for the VFP")); + pcb->pcb_fpflags |= PCB_FP_KERN; + return (0); +} + +int +is_fpu_kern_thread(u_int flags) +{ + struct pcb *curpcb; + + if ((curthread->td_pflags & TDP_KTHREAD) == 0) + return (0); + curpcb = curthread->td_pcb; + return ((curpcb->pcb_fpflags & PCB_FP_KERN) != 0); +} +#endif diff --git a/sys/arm64/arm64/vm_machdep.c b/sys/arm64/arm64/vm_machdep.c new file mode 100644 index 000000000000..3b928ad7cabf --- /dev/null +++ b/sys/arm64/arm64/vm_machdep.c @@ -0,0 +1,300 @@ +/*- + * Copyright (c) 2014 Andrew Turner + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include "opt_platform.h" + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/limits.h> +#include <sys/proc.h> +#include <sys/sf_buf.h> +#include <sys/signal.h> +#include <sys/sysent.h> +#include <sys/unistd.h> + +#include <vm/vm.h> +#include <vm/vm_page.h> +#include <vm/vm_map.h> +#include <vm/uma.h> +#include <vm/uma_int.h> + +#include <machine/armreg.h> +#include <machine/cpu.h> +#include <machine/md_var.h> +#include <machine/pcb.h> +#include <machine/frame.h> + +#ifdef VFP +#include <machine/vfp.h> +#endif + +#include <dev/psci/psci.h> + +/* + * Finish a fork operation, with process p2 nearly set up. + * Copy and update the pcb, set up the stack so that the child + * ready to run and return to user mode. + */ +void +cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags) +{ + struct pcb *pcb2; + struct trapframe *tf; + + if ((flags & RFPROC) == 0) + return; + + if (td1 == curthread) { + /* + * Save the tpidr_el0 and the vfp state, these normally happen + * in cpu_switch, but if userland changes these then forks + * this may not have happened. + */ + td1->td_pcb->pcb_tpidr_el0 = READ_SPECIALREG(tpidr_el0); + td1->td_pcb->pcb_tpidrro_el0 = READ_SPECIALREG(tpidrro_el0); +#ifdef VFP + if ((td1->td_pcb->pcb_fpflags & PCB_FP_STARTED) != 0) + vfp_save_state(td1, td1->td_pcb); +#endif + } + + pcb2 = (struct pcb *)(td2->td_kstack + + td2->td_kstack_pages * PAGE_SIZE) - 1; + + td2->td_pcb = pcb2; + bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); + + tf = (struct trapframe *)STACKALIGN((struct trapframe *)pcb2 - 1); + bcopy(td1->td_frame, tf, sizeof(*tf)); + tf->tf_x[0] = 0; + tf->tf_x[1] = 0; + tf->tf_spsr = td1->td_frame->tf_spsr & (PSR_M_32 | PSR_DAIF); + + td2->td_frame = tf; + + /* Set the return value registers for fork() */ + td2->td_pcb->pcb_x[8] = (uintptr_t)fork_return; + td2->td_pcb->pcb_x[9] = (uintptr_t)td2; + td2->td_pcb->pcb_x[PCB_LR] = (uintptr_t)fork_trampoline; + td2->td_pcb->pcb_sp = (uintptr_t)td2->td_frame; + td2->td_pcb->pcb_fpusaved = &td2->td_pcb->pcb_fpustate; + td2->td_pcb->pcb_vfpcpu = UINT_MAX; + + /* Setup to release spin count in fork_exit(). */ + td2->td_md.md_spinlock_count = 1; + td2->td_md.md_saved_daif = td1->td_md.md_saved_daif & ~DAIF_I_MASKED; +} + +void +cpu_reset(void) +{ + + psci_reset(); + + printf("cpu_reset failed"); + while(1) + __asm volatile("wfi" ::: "memory"); +} + +void +cpu_thread_swapin(struct thread *td) +{ +} + +void +cpu_thread_swapout(struct thread *td) +{ +} + +void +cpu_set_syscall_retval(struct thread *td, int error) +{ + struct trapframe *frame; + + frame = td->td_frame; + + switch (error) { + case 0: + frame->tf_x[0] = td->td_retval[0]; + frame->tf_x[1] = td->td_retval[1]; + frame->tf_spsr &= ~PSR_C; /* carry bit */ + break; + case ERESTART: + frame->tf_elr -= 4; + break; + case EJUSTRETURN: + break; + default: + frame->tf_spsr |= PSR_C; /* carry bit */ + frame->tf_x[0] = SV_ABI_ERRNO(td->td_proc, error); + break; + } +} + +/* + * Initialize machine state, mostly pcb and trap frame for a new + * thread, about to return to userspace. Put enough state in the new + * thread's PCB to get it to go back to the fork_return(), which + * finalizes the thread state and handles peculiarities of the first + * return to userspace for the new thread. + */ +void +cpu_copy_thread(struct thread *td, struct thread *td0) +{ + bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe)); + bcopy(td0->td_pcb, td->td_pcb, sizeof(struct pcb)); + + td->td_pcb->pcb_x[8] = (uintptr_t)fork_return; + td->td_pcb->pcb_x[9] = (uintptr_t)td; + td->td_pcb->pcb_x[PCB_LR] = (uintptr_t)fork_trampoline; + td->td_pcb->pcb_sp = (uintptr_t)td->td_frame; + td->td_pcb->pcb_fpusaved = &td->td_pcb->pcb_fpustate; + td->td_pcb->pcb_vfpcpu = UINT_MAX; + + /* Setup to release spin count in fork_exit(). */ + td->td_md.md_spinlock_count = 1; + td->td_md.md_saved_daif = td0->td_md.md_saved_daif & ~DAIF_I_MASKED; +} + +/* + * Set that machine state for performing an upcall that starts + * the entry function with the given argument. + */ +void +cpu_set_upcall(struct thread *td, void (*entry)(void *), void *arg, + stack_t *stack) +{ + struct trapframe *tf = td->td_frame; + + /* 32bits processes use r13 for sp */ + if (td->td_frame->tf_spsr & PSR_M_32) + tf->tf_x[13] = STACKALIGN((uintptr_t)stack->ss_sp + stack->ss_size); + else + tf->tf_sp = STACKALIGN((uintptr_t)stack->ss_sp + stack->ss_size); + tf->tf_elr = (register_t)entry; + tf->tf_x[0] = (register_t)arg; +} + +int +cpu_set_user_tls(struct thread *td, void *tls_base) +{ + struct pcb *pcb; + + if ((uintptr_t)tls_base >= VM_MAXUSER_ADDRESS) + return (EINVAL); + + pcb = td->td_pcb; + if (td->td_frame->tf_spsr & PSR_M_32) { + /* 32bits arm stores the user TLS into tpidrro */ + pcb->pcb_tpidrro_el0 = (register_t)tls_base; + pcb->pcb_tpidr_el0 = (register_t)tls_base; + if (td == curthread) { + WRITE_SPECIALREG(tpidrro_el0, tls_base); + WRITE_SPECIALREG(tpidr_el0, tls_base); + } + } else { + pcb->pcb_tpidr_el0 = (register_t)tls_base; + if (td == curthread) + WRITE_SPECIALREG(tpidr_el0, tls_base); + } + + return (0); +} + +void +cpu_thread_exit(struct thread *td) +{ +} + +void +cpu_thread_alloc(struct thread *td) +{ + + td->td_pcb = (struct pcb *)(td->td_kstack + + td->td_kstack_pages * PAGE_SIZE) - 1; + td->td_frame = (struct trapframe *)STACKALIGN( + (struct trapframe *)td->td_pcb - 1); +} + +void +cpu_thread_free(struct thread *td) +{ +} + +void +cpu_thread_clean(struct thread *td) +{ +} + +/* + * Intercept the return address from a freshly forked process that has NOT + * been scheduled yet. + * + * This is needed to make kernel threads stay in kernel mode. + */ +void +cpu_fork_kthread_handler(struct thread *td, void (*func)(void *), void *arg) +{ + + td->td_pcb->pcb_x[8] = (uintptr_t)func; + td->td_pcb->pcb_x[9] = (uintptr_t)arg; + td->td_pcb->pcb_x[PCB_LR] = (uintptr_t)fork_trampoline; + td->td_pcb->pcb_sp = (uintptr_t)td->td_frame; + td->td_pcb->pcb_fpusaved = &td->td_pcb->pcb_fpustate; + td->td_pcb->pcb_vfpcpu = UINT_MAX; +} + +void +cpu_exit(struct thread *td) +{ +} + +bool +cpu_exec_vmspace_reuse(struct proc *p __unused, vm_map_t map __unused) +{ + + return (true); +} + +int +cpu_procctl(struct thread *td __unused, int idtype __unused, id_t id __unused, + int com __unused, void *data __unused) +{ + + return (EINVAL); +} + +void +swi_vm(void *v) +{ + + if (busdma_swi_pending != 0) + busdma_swi(); +} |