aboutsummaryrefslogtreecommitdiff
path: root/sys/arm64/arm64/disassem.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/arm64/arm64/disassem.c')
-rw-r--r--sys/arm64/arm64/disassem.c545
1 files changed, 545 insertions, 0 deletions
diff --git a/sys/arm64/arm64/disassem.c b/sys/arm64/arm64/disassem.c
new file mode 100644
index 000000000000..ce0bf7660b02
--- /dev/null
+++ b/sys/arm64/arm64/disassem.c
@@ -0,0 +1,545 @@
+/*-
+ * Copyright (c) 2016 Cavium
+ * All rights reserved.
+ *
+ * This software was developed by Semihalf.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+#include <sys/param.h>
+
+#include <sys/systm.h>
+#include <machine/disassem.h>
+#include <machine/armreg.h>
+#include <ddb/ddb.h>
+
+#define ARM64_MAX_TOKEN_LEN 8
+#define ARM64_MAX_TOKEN_CNT 10
+
+#define ARM_INSN_SIZE_OFFSET 30
+#define ARM_INSN_SIZE_MASK 0x3
+
+/* Special options for instruction printing */
+#define OP_SIGN_EXT (1UL << 0) /* Sign-extend immediate value */
+#define OP_LITERAL (1UL << 1) /* Use literal (memory offset) */
+#define OP_MULT_4 (1UL << 2) /* Multiply immediate by 4 */
+#define OP_SF32 (1UL << 3) /* Force 32-bit access */
+#define OP_SF_INV (1UL << 6) /* SF is inverted (1 means 32 bit access) */
+
+static const char *w_reg[] = {
+ "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
+ "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
+ "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
+ "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wSP",
+};
+
+static const char *x_reg[] = {
+ "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
+ "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
+ "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
+ "x24", "x25", "x26", "x27", "x28", "x29", "LR", "SP",
+};
+
+static const char *shift_2[] = {
+ "LSL", "LSR", "ASR", "RSV"
+};
+
+/*
+ * Structure representing single token (operand) inside instruction.
+ * name - name of operand
+ * pos - position within the instruction (in bits)
+ * len - operand length (in bits)
+ */
+struct arm64_insn_token {
+ char name[ARM64_MAX_TOKEN_LEN];
+ int pos;
+ int len;
+};
+
+/*
+ * Define generic types for instruction printing.
+ */
+enum arm64_format_type {
+ TYPE_01, /* OP <RD>, <RN>, <RM>{, <shift [LSL, LSR, ASR]> #<imm>} SF32/64
+ OP <RD>, <RN>, #<imm>{, <shift [0, 12]>} SF32/64 */
+ TYPE_02, /* OP <RT>, [<RN>, #<imm>]{!}] SF32/64
+ OP <RT>, [<RN>], #<imm>{!} SF32/64
+ OP <RT>, <RN>, <RM> {, EXTEND AMOUNT } */
+ TYPE_03, /* OP <RT>, #imm SF32/64 */
+};
+
+/*
+ * Structure representing single parsed instruction format.
+ * name - opcode name
+ * format - opcode format in a human-readable way
+ * type - syntax type for printing
+ * special_ops - special options passed to a printer (if any)
+ * mask - bitmask for instruction matching
+ * pattern - pattern to look for
+ * tokens - array of tokens (operands) inside instruction
+ */
+struct arm64_insn {
+ char* name;
+ char* format;
+ enum arm64_format_type type;
+ uint64_t special_ops;
+ uint32_t mask;
+ uint32_t pattern;
+ struct arm64_insn_token tokens[ARM64_MAX_TOKEN_CNT];
+};
+
+/*
+ * Specify instruction opcode format in a human-readable way. Use notation
+ * obtained from ARM Architecture Reference Manual for ARMv8-A.
+ *
+ * Format string description:
+ * Each group must be separated by "|". Group made of 0/1 is used to
+ * generate mask and pattern for instruction matching. Groups containing
+ * an operand token (in format NAME(length_bits)) are used to retrieve any
+ * operand data from the instruction. Names here must be meaningful
+ * and match the one described in the Manual.
+ *
+ * Token description:
+ * SF - "0" represents 32-bit access, "1" represents 64-bit access
+ * SHIFT - type of shift (instruction dependent)
+ * IMM - immediate value
+ * Rx - register number
+ * OPTION - command specific options
+ * SCALE - scaling of immediate value
+ */
+static struct arm64_insn arm64_i[] = {
+ { "add", "SF(1)|0001011|SHIFT(2)|0|RM(5)|IMM(6)|RN(5)|RD(5)",
+ TYPE_01, 0 },
+ { "mov", "SF(1)|001000100000000000000|RN(5)|RD(5)",
+ TYPE_01, 0 },
+ { "add", "SF(1)|0010001|SHIFT(2)|IMM(12)|RN(5)|RD(5)",
+ TYPE_01, 0 },
+ { "ldr", "1|SF(1)|111000010|IMM(9)|OPTION(2)|RN(5)|RT(5)",
+ TYPE_02, OP_SIGN_EXT }, /* ldr immediate post/pre index */
+ { "ldr", "1|SF(1)|11100101|IMM(12)|RN(5)|RT(5)",
+ TYPE_02, 0 }, /* ldr immediate unsigned */
+ { "ldr", "1|SF(1)|111000011|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
+ TYPE_02, 0 }, /* ldr register */
+ { "ldr", "0|SF(1)|011000|IMM(19)|RT(5)",
+ TYPE_03, OP_SIGN_EXT | OP_LITERAL | OP_MULT_4 }, /* ldr literal */
+ { "ldrb", "00|111000010|IMM(9)|OPTION(2)|RN(5)|RT(5)",
+ TYPE_02, OP_SIGN_EXT | OP_SF32 }, /* ldrb immediate post/pre index */
+ { "ldrb", "00|11100101|IMM(12)|RN(5)|RT(5)",
+ TYPE_02, OP_SF32 }, /* ldrb immediate unsigned */
+ { "ldrb", "00|111000011|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
+ TYPE_02, OP_SF32 }, /* ldrb register */
+ { "ldrh", "01|111000010|IMM(9)|OPTION(2)|RN(5)|RT(5)", TYPE_02,
+ OP_SIGN_EXT | OP_SF32 }, /* ldrh immediate post/pre index */
+ { "ldrh", "01|11100101|IMM(12)|RN(5)|RT(5)",
+ TYPE_02, OP_SF32 }, /* ldrh immediate unsigned */
+ { "ldrh", "01|111000011|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
+ TYPE_02, OP_SF32 }, /* ldrh register */
+ { "ldrsb", "001110001|SF(1)|0|IMM(9)|OPTION(2)|RN(5)|RT(5)",
+ TYPE_02, OP_SIGN_EXT | OP_SF_INV }, /* ldrsb immediate post/pre index */
+ { "ldrsb", "001110011|SF(1)|IMM(12)|RN(5)|RT(5)",\
+ TYPE_02, OP_SF_INV}, /* ldrsb immediate unsigned */
+ { "ldrsb", "001110001|SF(1)|1|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
+ TYPE_02, OP_SF_INV }, /* ldrsb register */
+ { "ldrsh", "011110001|SF(1)|0|IMM(9)|OPTION(2)|RN(5)|RT(5)",
+ TYPE_02, OP_SIGN_EXT | OP_SF_INV }, /* ldrsh immediate post/pre index */
+ { "ldrsh", "011110011|SF(1)|IMM(12)|RN(5)|RT(5)",
+ TYPE_02, OP_SF_INV}, /* ldrsh immediate unsigned */
+ { "ldrsh", "011110001|SF(1)|1|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
+ TYPE_02, OP_SF_INV }, /* ldrsh register */
+ { "ldrsw", "10111000100|IMM(9)|OPTION(2)|RN(5)|RT(5)",
+ TYPE_02, OP_SIGN_EXT }, /* ldrsw immediate post/pre index */
+ { "ldrsw", "1011100110|IMM(12)|RN(5)|RT(5)",
+ TYPE_02, 0 }, /* ldrsw immediate unsigned */
+ { "ldrsw", "10111000101|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
+ TYPE_02, 0 }, /* ldrsw register */
+ { "ldrsw", "10011000|IMM(19)|RT(5)",
+ TYPE_03, OP_SIGN_EXT | OP_LITERAL | OP_MULT_4 }, /* ldr literal */
+ { NULL, NULL }
+};
+
+static void
+arm64_disasm_generate_masks(struct arm64_insn *tab)
+{
+ uint32_t mask, val;
+ int a, i;
+ int len, ret;
+ int token = 0;
+ char *format;
+ int error;
+
+ while (tab->name != NULL) {
+ mask = 0;
+ val = 0;
+ format = tab->format;
+ token = 0;
+ error = 0;
+
+ /*
+ * For each entry analyze format strings from the
+ * left (i.e. from the MSB).
+ */
+ a = (INSN_SIZE * NBBY) - 1;
+ while (*format != '\0' && (a >= 0)) {
+ switch(*format) {
+ case '0':
+ /* Bit is 0, add to mask and pattern */
+ mask |= (1 << a);
+ a--;
+ format++;
+ break;
+ case '1':
+ /* Bit is 1, add to mask and pattern */
+ mask |= (1 << a);
+ val |= (1 << a);
+ a--;
+ format++;
+ break;
+ case '|':
+ /* skip */
+ format++;
+ break;
+ default:
+ /* Token found, copy the name */
+ memset(tab->tokens[token].name, 0,
+ sizeof(tab->tokens[token].name));
+ i = 0;
+ while (*format != '(') {
+ tab->tokens[token].name[i] = *format;
+ i++;
+ format++;
+ if (i >= ARM64_MAX_TOKEN_LEN) {
+ printf("ERROR: token too long in op %s\n",
+ tab->name);
+ error = 1;
+ break;
+ }
+ }
+ if (error != 0)
+ break;
+
+ /* Read the length value */
+ ret = sscanf(format, "(%d)", &len);
+ if (ret == 1) {
+ if (token >= ARM64_MAX_TOKEN_CNT) {
+ printf("ERROR: to many tokens in op %s\n",
+ tab->name);
+ error = 1;
+ break;
+ }
+
+ a -= len;
+ tab->tokens[token].pos = a + 1;
+ tab->tokens[token].len = len;
+ token++;
+ }
+
+ /* Skip to the end of the token */
+ while (*format != 0 && *format != '|')
+ format++;
+ }
+ }
+
+ /* Write mask and pattern to the instruction array */
+ tab->mask = mask;
+ tab->pattern = val;
+
+ /*
+ * If we got here, format string must be parsed and "a"
+ * should point to -1. If it's not, wrong number of bits
+ * in format string. Mark this as invalid and prevent
+ * from being matched.
+ */
+ if (*format != 0 || (a != -1) || (error != 0)) {
+ tab->mask = 0;
+ tab->pattern = 0xffffffff;
+ printf("ERROR: skipping instruction op %s\n",
+ tab->name);
+ }
+
+ tab++;
+ }
+}
+
+static int
+arm64_disasm_read_token(struct arm64_insn *insn, u_int opcode,
+ const char *token, int *val)
+{
+ int i;
+
+ for (i = 0; i < ARM64_MAX_TOKEN_CNT; i++) {
+ if (strcmp(insn->tokens[i].name, token) == 0) {
+ *val = (opcode >> insn->tokens[i].pos &
+ ((1 << insn->tokens[i].len) - 1));
+ return (0);
+ }
+ }
+
+ return (EINVAL);
+}
+
+static int
+arm64_disasm_read_token_sign_ext(struct arm64_insn *insn, u_int opcode,
+ const char *token, int *val)
+{
+ int i;
+ int msk;
+
+ for (i = 0; i < ARM64_MAX_TOKEN_CNT; i++) {
+ if (strcmp(insn->tokens[i].name, token) == 0) {
+ msk = (1 << insn->tokens[i].len) - 1;
+ *val = ((opcode >> insn->tokens[i].pos) & msk);
+
+ /* If last bit is 1, sign-extend the value */
+ if (*val & (1 << (insn->tokens[i].len - 1)))
+ *val |= ~msk;
+
+ return (0);
+ }
+ }
+
+ return (EINVAL);
+}
+
+static const char *
+arm64_reg(int b64, int num)
+{
+
+ if (b64 != 0)
+ return (x_reg[num]);
+
+ return (w_reg[num]);
+}
+
+vm_offset_t
+disasm(const struct disasm_interface *di, vm_offset_t loc, int altfmt)
+{
+ struct arm64_insn *i_ptr = arm64_i;
+ uint32_t insn;
+ int matchp;
+ int ret;
+ int shift, rm, rt, rd, rn, imm, sf, idx, option, scale, amount;
+ int sign_ext;
+ int rm_absent;
+ /* Indicate if immediate should be outside or inside brackets */
+ int inside;
+ /* Print exclamation mark if pre-incremented */
+ int pre;
+
+ /* Initialize defaults, all are 0 except SF indicating 64bit access */
+ shift = rd = rm = rn = imm = idx = option = amount = scale = 0;
+ sign_ext = 0;
+ sf = 1;
+
+ matchp = 0;
+ insn = di->di_readword(loc);
+ while (i_ptr->name) {
+ /* If mask is 0 then the parser was not initialized yet */
+ if ((i_ptr->mask != 0) &&
+ ((insn & i_ptr->mask) == i_ptr->pattern)) {
+ matchp = 1;
+ break;
+ }
+ i_ptr++;
+ }
+ if (matchp == 0)
+ goto undefined;
+
+ /* Global options */
+ if (i_ptr->special_ops & OP_SF32)
+ sf = 0;
+
+ /* Global optional tokens */
+ arm64_disasm_read_token(i_ptr, insn, "SF", &sf);
+ if (i_ptr->special_ops & OP_SF_INV)
+ sf = 1 - sf;
+ if (arm64_disasm_read_token(i_ptr, insn, "SIGN", &sign_ext) == 0)
+ sign_ext = 1 - sign_ext;
+ if (i_ptr->special_ops & OP_SIGN_EXT)
+ sign_ext = 1;
+ if (sign_ext != 0)
+ arm64_disasm_read_token_sign_ext(i_ptr, insn, "IMM", &imm);
+ else
+ arm64_disasm_read_token(i_ptr, insn, "IMM", &imm);
+ if (i_ptr->special_ops & OP_MULT_4)
+ imm <<= 2;
+
+ /* Print opcode by type */
+ switch (i_ptr->type) {
+ case TYPE_01:
+ /* OP <RD>, <RN>, <RM>{, <shift [LSL, LSR, ASR]> #<imm>} SF32/64
+ OP <RD>, <RN>, #<imm>{, <shift [0, 12]>} SF32/64 */
+
+ /* Mandatory tokens */
+ ret = arm64_disasm_read_token(i_ptr, insn, "RD", &rd);
+ ret |= arm64_disasm_read_token(i_ptr, insn, "RN", &rn);
+ if (ret != 0) {
+ printf("ERROR: Missing mandatory token for op %s type %d\n",
+ i_ptr->name, i_ptr->type);
+ goto undefined;
+ }
+
+ /* Optional tokens */
+ arm64_disasm_read_token(i_ptr, insn, "SHIFT", &shift);
+ rm_absent = arm64_disasm_read_token(i_ptr, insn, "RM", &rm);
+
+ di->di_printf("%s\t%s, %s", i_ptr->name, arm64_reg(sf, rd),
+ arm64_reg(sf, rn));
+
+ /* If RM is present use it, otherwise use immediate notation */
+ if (rm_absent == 0) {
+ di->di_printf(", %s", arm64_reg(sf, rm));
+ if (imm != 0)
+ di->di_printf(", %s #%d", shift_2[shift], imm);
+ } else {
+ if (imm != 0 || shift != 0)
+ di->di_printf(", #0x%x", imm);
+ if (shift != 0)
+ di->di_printf(" LSL #12");
+ }
+ break;
+ case TYPE_02:
+ /* OP <RT>, [<RN>, #<imm>]{!}] SF32/64
+ OP <RT>, [<RN>], #<imm>{!} SF32/64
+ OP <RT>, <RN>, <RM> {, EXTEND AMOUNT } */
+
+ /* Mandatory tokens */
+ ret = arm64_disasm_read_token(i_ptr, insn, "RT", &rt);
+ ret |= arm64_disasm_read_token(i_ptr, insn, "RN", &rn);
+ if (ret != 0) {
+ printf("ERROR: Missing mandatory token for op %s type %d\n",
+ i_ptr->name, i_ptr->type);
+ goto undefined;
+ }
+
+ /* Optional tokens */
+ arm64_disasm_read_token(i_ptr, insn, "OPTION", &option);
+ arm64_disasm_read_token(i_ptr, insn, "SCALE", &scale);
+ rm_absent = arm64_disasm_read_token(i_ptr, insn, "RM", &rm);
+
+ if (rm_absent) {
+ /*
+ * In unsigned operation, shift immediate value
+ * and reset options to default.
+ */
+ if (sign_ext == 0) {
+ imm = imm << ((insn >> ARM_INSN_SIZE_OFFSET) &
+ ARM_INSN_SIZE_MASK);
+ option = 0;
+ }
+ switch (option) {
+ case 0x0:
+ pre = 0;
+ inside = 1;
+ break;
+ case 0x1:
+ pre = 0;
+ inside = 0;
+ break;
+ case 0x2:
+ default:
+ pre = 1;
+ inside = 1;
+ break;
+ }
+
+ di->di_printf("%s\t%s, ", i_ptr->name, arm64_reg(sf, rt));
+ if (inside != 0) {
+ di->di_printf("[%s", arm64_reg(1, rn));
+ if (imm != 0)
+ di->di_printf(", #%d", imm);
+ di->di_printf("]");
+ } else {
+ di->di_printf("[%s]", arm64_reg(1, rn));
+ if (imm != 0)
+ di->di_printf(", #%d", imm);
+ }
+ if (pre != 0)
+ di->di_printf("!");
+ } else {
+ /* Last bit of option field determines 32/64 bit offset */
+ di->di_printf("%s\t%s, [%s, %s", i_ptr->name,
+ arm64_reg(sf, rt), arm64_reg(1, rn),
+ arm64_reg(option & 1, rm));
+
+ /* Calculate amount, it's op(31:30) */
+ amount = (insn >> ARM_INSN_SIZE_OFFSET) &
+ ARM_INSN_SIZE_MASK;
+
+ switch (option) {
+ case 0x2:
+ di->di_printf(", uxtw #%d", amount);
+ break;
+ case 0x3:
+ if (scale != 0)
+ di->di_printf(", lsl #%d", amount);
+ break;
+ case 0x6:
+ di->di_printf(", sxtw #%d", amount);
+ break;
+ case 0x7:
+ di->di_printf(", sxts #%d", amount);
+ break;
+ default:
+ di->di_printf(", RSVD");
+ break;
+ }
+ di->di_printf("]");
+ }
+
+ break;
+
+ case TYPE_03:
+ /* OP <RT>, #imm SF32/64 */
+
+ /* Mandatory tokens */
+ ret = arm64_disasm_read_token(i_ptr, insn, "RT", &rt);
+ if (ret != 0) {
+ printf("ERROR: Missing mandatory token for op %s type %d\n",
+ i_ptr->name, i_ptr->type);
+ goto undefined;
+ }
+
+ di->di_printf("%s\t%s, ", i_ptr->name, arm64_reg(sf, rt));
+ if (i_ptr->special_ops & OP_LITERAL)
+ di->di_printf("0x%lx", loc + imm);
+ else
+ di->di_printf("#%d", imm);
+
+ break;
+ default:
+ goto undefined;
+ }
+
+ di->di_printf("\n");
+ return(loc + INSN_SIZE);
+
+undefined:
+ di->di_printf("undefined\t%08x\n", insn);
+ return(loc + INSN_SIZE);
+}
+
+/* Parse format strings at the very beginning */
+SYSINIT(arm64_disasm_generate_masks, SI_SUB_DDB_SERVICES,
+ SI_ORDER_FIRST, arm64_disasm_generate_masks, arm64_i);