lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379

/*===- X86DisassemblerDecoderCommon.h - Disassembler decoder -------*- C -*-==*
 *
 *                     The LLVM Compiler Infrastructure
 *
 * This file is distributed under the University of Illinois Open Source
 * License. See LICENSE.TXT for details.
 *
 *===----------------------------------------------------------------------===*
 *
 * This file is part of the X86 Disassembler.
 * It contains common definitions used by both the disassembler and the table
 *  generator.
 * Documentation for the disassembler can be found in X86Disassembler.h.
 *
 *===----------------------------------------------------------------------===*/

/*
 * This header file provides those definitions that need to be shared between
 * the decoder and the table generator in a C-friendly manner.
 */

#ifndef X86DISASSEMBLERDECODERCOMMON_H
#define X86DISASSEMBLERDECODERCOMMON_H

#include "llvm/Support/DataTypes.h"

#define INSTRUCTIONS_SYM  x86DisassemblerInstrSpecifiers
#define CONTEXTS_SYM      x86DisassemblerContexts
#define ONEBYTE_SYM       x86DisassemblerOneByteOpcodes
#define TWOBYTE_SYM       x86DisassemblerTwoByteOpcodes
#define THREEBYTE38_SYM   x86DisassemblerThreeByte38Opcodes
#define THREEBYTE3A_SYM   x86DisassemblerThreeByte3AOpcodes
#define THREEBYTEA6_SYM   x86DisassemblerThreeByteA6Opcodes
#define THREEBYTEA7_SYM   x86DisassemblerThreeByteA7Opcodes

#define INSTRUCTIONS_STR  "x86DisassemblerInstrSpecifiers"
#define CONTEXTS_STR      "x86DisassemblerContexts"
#define ONEBYTE_STR       "x86DisassemblerOneByteOpcodes"
#define TWOBYTE_STR       "x86DisassemblerTwoByteOpcodes"
#define THREEBYTE38_STR   "x86DisassemblerThreeByte38Opcodes"
#define THREEBYTE3A_STR   "x86DisassemblerThreeByte3AOpcodes"
#define THREEBYTEA6_STR   "x86DisassemblerThreeByteA6Opcodes"
#define THREEBYTEA7_STR   "x86DisassemblerThreeByteA7Opcodes"

/*
 * Attributes of an instruction that must be known before the opcode can be
 * processed correctly.  Most of these indicate the presence of particular
 * prefixes, but ATTR_64BIT is simply an attribute of the decoding context.
 */
#define ATTRIBUTE_BITS          \
  ENUM_ENTRY(ATTR_NONE,   0x00) \
  ENUM_ENTRY(ATTR_64BIT,  0x01) \
  ENUM_ENTRY(ATTR_XS,     0x02) \
  ENUM_ENTRY(ATTR_XD,     0x04) \
  ENUM_ENTRY(ATTR_REXW,   0x08) \
  ENUM_ENTRY(ATTR_OPSIZE, 0x10) \
  ENUM_ENTRY(ATTR_VEX,    0x20) \
  ENUM_ENTRY(ATTR_VEXL,   0x40)

#define ENUM_ENTRY(n, v) n = v,
enum attributeBits {
  ATTRIBUTE_BITS
  ATTR_max
};
#undef ENUM_ENTRY

/*
 * Combinations of the above attributes that are relevant to instruction
 * decode.  Although other combinations are possible, they can be reduced to
 * these without affecting the ultimately decoded instruction.
 */

/*           Class name           Rank  Rationale for rank assignment         */
#define INSTRUCTION_CONTEXTS                                                   \
  ENUM_ENTRY(IC,                    0,  "says nothing about the instruction")  \
  ENUM_ENTRY(IC_64BIT,              1,  "says the instruction applies in "     \
                                        "64-bit mode but no more")             \
  ENUM_ENTRY(IC_OPSIZE,             3,  "requires an OPSIZE prefix, so "       \
                                        "operands change width")               \
  ENUM_ENTRY(IC_XD,                 2,  "may say something about the opcode "  \
                                        "but not the operands")                \
  ENUM_ENTRY(IC_XS,                 2,  "may say something about the opcode "  \
                                        "but not the operands")                \
  ENUM_ENTRY(IC_64BIT_REXW,         4,  "requires a REX.W prefix, so operands "\
                                        "change width; overrides IC_OPSIZE")   \
  ENUM_ENTRY(IC_64BIT_OPSIZE,       3,  "Just as meaningful as IC_OPSIZE")     \
  ENUM_ENTRY(IC_64BIT_XD,           5,  "XD instructions are SSE; REX.W is "   \
                                        "secondary")                           \
  ENUM_ENTRY(IC_64BIT_XS,           5,  "Just as meaningful as IC_64BIT_XD")   \
  ENUM_ENTRY(IC_64BIT_REXW_XS,      6,  "OPSIZE could mean a different "       \
                                        "opcode")                              \
  ENUM_ENTRY(IC_64BIT_REXW_XD,      6,  "Just as meaningful as "               \
                                        "IC_64BIT_REXW_XS")                    \
  ENUM_ENTRY(IC_64BIT_REXW_OPSIZE,  7,  "The Dynamic Duo!  Prefer over all "   \
                                        "else because this changes most "      \
                                        "operands' meaning")                   \
  ENUM_ENTRY(IC_VEX,                1,  "requires a VEX prefix")               \
  ENUM_ENTRY(IC_VEX_XS,             2,  "requires VEX and the XS prefix")      \
  ENUM_ENTRY(IC_VEX_XD,             2,  "requires VEX and the XD prefix")      \
  ENUM_ENTRY(IC_VEX_OPSIZE,         2,  "requires VEX and the OpSize prefix")  \
  ENUM_ENTRY(IC_VEX_W,              3,  "requires VEX and the W prefix")       \
  ENUM_ENTRY(IC_VEX_W_XS,           4,  "requires VEX, W, and XS prefix")      \
  ENUM_ENTRY(IC_VEX_W_XD,           4,  "requires VEX, W, and XD prefix")      \
  ENUM_ENTRY(IC_VEX_W_OPSIZE,       4,  "requires VEX, W, and OpSize")         \
  ENUM_ENTRY(IC_VEX_L,              3,  "requires VEX and the L prefix")       \
  ENUM_ENTRY(IC_VEX_L_XS,           4,  "requires VEX and the L and XS prefix")\
  ENUM_ENTRY(IC_VEX_L_XD,           4,  "requires VEX and the L and XS prefix")\
  ENUM_ENTRY(IC_VEX_L_OPSIZE,       4,  "requires VEX, L, and OpSize")


#define ENUM_ENTRY(n, r, d) n,    
typedef enum {
  INSTRUCTION_CONTEXTS
  IC_max
} InstructionContext;
#undef ENUM_ENTRY

/*
 * Opcode types, which determine which decode table to use, both in the Intel
 * manual and also for the decoder.
 */
typedef enum {
  ONEBYTE       = 0,
  TWOBYTE       = 1,
  THREEBYTE_38  = 2,
  THREEBYTE_3A  = 3,
  THREEBYTE_A6  = 4,
  THREEBYTE_A7  = 5
} OpcodeType;

/*
 * The following structs are used for the hierarchical decode table.  After
 * determining the instruction's class (i.e., which IC_* constant applies to
 * it), the decoder reads the opcode.  Some instructions require specific
 * values of the ModR/M byte, so the ModR/M byte indexes into the final table.
 *
 * If a ModR/M byte is not required, "required" is left unset, and the values
 * for each instructionID are identical.
 */
 
typedef uint16_t InstrUID;

/*
 * ModRMDecisionType - describes the type of ModR/M decision, allowing the 
 * consumer to determine the number of entries in it.
 *
 * MODRM_ONEENTRY - No matter what the value of the ModR/M byte is, the decoded
 *                  instruction is the same.
 * MODRM_SPLITRM  - If the ModR/M byte is between 0x00 and 0xbf, the opcode
 *                  corresponds to one instruction; otherwise, it corresponds to
 *                  a different instruction.
 * MODRM_FULL     - Potentially, each value of the ModR/M byte could correspond
 *                  to a different instruction.
 */

#define MODRMTYPES            \
  ENUM_ENTRY(MODRM_ONEENTRY)  \
  ENUM_ENTRY(MODRM_SPLITRM)   \
  ENUM_ENTRY(MODRM_FULL)

#define ENUM_ENTRY(n) n,    
typedef enum {
  MODRMTYPES
  MODRM_max
} ModRMDecisionType;
#undef ENUM_ENTRY

/*
 * ModRMDecision - Specifies whether a ModR/M byte is needed and (if so) which 
 *  instruction each possible value of the ModR/M byte corresponds to.  Once
 *  this information is known, we have narrowed down to a single instruction.
 */
struct ModRMDecision {
  uint8_t     modrm_type;
  
  /* The macro below must be defined wherever this file is included. */
  INSTRUCTION_IDS
};

/*
 * OpcodeDecision - Specifies which set of ModR/M->instruction tables to look at
 *   given a particular opcode.
 */
struct OpcodeDecision {
  struct ModRMDecision modRMDecisions[256];
};

/*
 * ContextDecision - Specifies which opcode->instruction tables to look at given
 *   a particular context (set of attributes).  Since there are many possible
 *   contexts, the decoder first uses CONTEXTS_SYM to determine which context
 *   applies given a specific set of attributes.  Hence there are only IC_max
 *   entries in this table, rather than 2^(ATTR_max).
 */
struct ContextDecision {
  struct OpcodeDecision opcodeDecisions[IC_max];
};

/* 
 * Physical encodings of instruction operands.
 */

#define ENCODINGS                                                              \
  ENUM_ENTRY(ENCODING_NONE,   "")                                              \
  ENUM_ENTRY(ENCODING_REG,    "Register operand in ModR/M byte.")              \
  ENUM_ENTRY(ENCODING_RM,     "R/M operand in ModR/M byte.")                   \
  ENUM_ENTRY(ENCODING_VVVV,   "Register operand in VEX.vvvv byte.")            \
  ENUM_ENTRY(ENCODING_CB,     "1-byte code offset (possible new CS value)")    \
  ENUM_ENTRY(ENCODING_CW,     "2-byte")                                        \
  ENUM_ENTRY(ENCODING_CD,     "4-byte")                                        \
  ENUM_ENTRY(ENCODING_CP,     "6-byte")                                        \
  ENUM_ENTRY(ENCODING_CO,     "8-byte")                                        \
  ENUM_ENTRY(ENCODING_CT,     "10-byte")                                       \
  ENUM_ENTRY(ENCODING_IB,     "1-byte immediate")                              \
  ENUM_ENTRY(ENCODING_IW,     "2-byte")                                        \
  ENUM_ENTRY(ENCODING_ID,     "4-byte")                                        \
  ENUM_ENTRY(ENCODING_IO,     "8-byte")                                        \
  ENUM_ENTRY(ENCODING_RB,     "(AL..DIL, R8L..R15L) Register code added to "   \
                              "the opcode byte")                               \
  ENUM_ENTRY(ENCODING_RW,     "(AX..DI, R8W..R15W)")                           \
  ENUM_ENTRY(ENCODING_RD,     "(EAX..EDI, R8D..R15D)")                         \
  ENUM_ENTRY(ENCODING_RO,     "(RAX..RDI, R8..R15)")                           \
  ENUM_ENTRY(ENCODING_I,      "Position on floating-point stack added to the " \
                              "opcode byte")                                   \
                                                                               \
  ENUM_ENTRY(ENCODING_Iv,     "Immediate of operand size")                     \
  ENUM_ENTRY(ENCODING_Ia,     "Immediate of address size")                     \
  ENUM_ENTRY(ENCODING_Rv,     "Register code of operand size added to the "    \
                              "opcode byte")                                   \
  ENUM_ENTRY(ENCODING_DUP,    "Duplicate of another operand; ID is encoded "   \
                              "in type")

#define ENUM_ENTRY(n, d) n,    
  typedef enum {
    ENCODINGS
    ENCODING_max
  } OperandEncoding;
#undef ENUM_ENTRY

/* 
 * Semantic interpretations of instruction operands.
 */

#define TYPES                                                                  \
  ENUM_ENTRY(TYPE_NONE,       "")                                              \
  ENUM_ENTRY(TYPE_REL8,       "1-byte immediate address")                      \
  ENUM_ENTRY(TYPE_REL16,      "2-byte")                                        \
  ENUM_ENTRY(TYPE_REL32,      "4-byte")                                        \
  ENUM_ENTRY(TYPE_REL64,      "8-byte")                                        \
  ENUM_ENTRY(TYPE_PTR1616,    "2+2-byte segment+offset address")               \
  ENUM_ENTRY(TYPE_PTR1632,    "2+4-byte")                                      \
  ENUM_ENTRY(TYPE_PTR1664,    "2+8-byte")                                      \
  ENUM_ENTRY(TYPE_R8,         "1-byte register operand")                       \
  ENUM_ENTRY(TYPE_R16,        "2-byte")                                        \
  ENUM_ENTRY(TYPE_R32,        "4-byte")                                        \
  ENUM_ENTRY(TYPE_R64,        "8-byte")                                        \
  ENUM_ENTRY(TYPE_IMM8,       "1-byte immediate operand")                      \
  ENUM_ENTRY(TYPE_IMM16,      "2-byte")                                        \
  ENUM_ENTRY(TYPE_IMM32,      "4-byte")                                        \
  ENUM_ENTRY(TYPE_IMM64,      "8-byte")                                        \
  ENUM_ENTRY(TYPE_IMM3,       "1-byte immediate operand between 0 and 7")      \
  ENUM_ENTRY(TYPE_RM8,        "1-byte register or memory operand")             \
  ENUM_ENTRY(TYPE_RM16,       "2-byte")                                        \
  ENUM_ENTRY(TYPE_RM32,       "4-byte")                                        \
  ENUM_ENTRY(TYPE_RM64,       "8-byte")                                        \
  ENUM_ENTRY(TYPE_M,          "Memory operand")                                \
  ENUM_ENTRY(TYPE_M8,         "1-byte")                                        \
  ENUM_ENTRY(TYPE_M16,        "2-byte")                                        \
  ENUM_ENTRY(TYPE_M32,        "4-byte")                                        \
  ENUM_ENTRY(TYPE_M64,        "8-byte")                                        \
  ENUM_ENTRY(TYPE_LEA,        "Effective address")                             \
  ENUM_ENTRY(TYPE_M128,       "16-byte (SSE/SSE2)")                            \
  ENUM_ENTRY(TYPE_M256,       "256-byte (AVX)")                                \
  ENUM_ENTRY(TYPE_M1616,      "2+2-byte segment+offset address")               \
  ENUM_ENTRY(TYPE_M1632,      "2+4-byte")                                      \
  ENUM_ENTRY(TYPE_M1664,      "2+8-byte")                                      \
  ENUM_ENTRY(TYPE_M16_32,     "2+4-byte two-part memory operand (LIDT, LGDT)") \
  ENUM_ENTRY(TYPE_M16_16,     "2+2-byte (BOUND)")                              \
  ENUM_ENTRY(TYPE_M32_32,     "4+4-byte (BOUND)")                              \
  ENUM_ENTRY(TYPE_M16_64,     "2+8-byte (LIDT, LGDT)")                         \
  ENUM_ENTRY(TYPE_MOFFS8,     "1-byte memory offset (relative to segment "     \
                              "base)")                                         \
  ENUM_ENTRY(TYPE_MOFFS16,    "2-byte")                                        \
  ENUM_ENTRY(TYPE_MOFFS32,    "4-byte")                                        \
  ENUM_ENTRY(TYPE_MOFFS64,    "8-byte")                                        \
  ENUM_ENTRY(TYPE_SREG,       "Byte with single bit set: 0 = ES, 1 = CS, "     \
                              "2 = SS, 3 = DS, 4 = FS, 5 = GS")                \
  ENUM_ENTRY(TYPE_M32FP,      "32-bit IEE754 memory floating-point operand")   \
  ENUM_ENTRY(TYPE_M64FP,      "64-bit")                                        \
  ENUM_ENTRY(TYPE_M80FP,      "80-bit extended")                               \
  ENUM_ENTRY(TYPE_M16INT,     "2-byte memory integer operand for use in "      \
                              "floating-point instructions")                   \
  ENUM_ENTRY(TYPE_M32INT,     "4-byte")                                        \
  ENUM_ENTRY(TYPE_M64INT,     "8-byte")                                        \
  ENUM_ENTRY(TYPE_ST,         "Position on the floating-point stack")          \
  ENUM_ENTRY(TYPE_MM,         "MMX register operand")                          \
  ENUM_ENTRY(TYPE_MM32,       "4-byte MMX register or memory operand")         \
  ENUM_ENTRY(TYPE_MM64,       "8-byte")                                        \
  ENUM_ENTRY(TYPE_XMM,        "XMM register operand")                          \
  ENUM_ENTRY(TYPE_XMM32,      "4-byte XMM register or memory operand")         \
  ENUM_ENTRY(TYPE_XMM64,      "8-byte")                                        \
  ENUM_ENTRY(TYPE_XMM128,     "16-byte")                                       \
  ENUM_ENTRY(TYPE_XMM256,     "32-byte")                                       \
  ENUM_ENTRY(TYPE_XMM0,       "Implicit use of XMM0")                          \
  ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand")                      \
  ENUM_ENTRY(TYPE_DEBUGREG,   "Debug register operand")                        \
  ENUM_ENTRY(TYPE_CONTROLREG, "Control register operand")                      \
                                                                               \
  ENUM_ENTRY(TYPE_Mv,         "Memory operand of operand size")                \
  ENUM_ENTRY(TYPE_Rv,         "Register operand of operand size")              \
  ENUM_ENTRY(TYPE_IMMv,       "Immediate operand of operand size")             \
  ENUM_ENTRY(TYPE_RELv,       "Immediate address of operand size")             \
  ENUM_ENTRY(TYPE_DUP0,       "Duplicate of operand 0")                        \
  ENUM_ENTRY(TYPE_DUP1,       "operand 1")                                     \
  ENUM_ENTRY(TYPE_DUP2,       "operand 2")                                     \
  ENUM_ENTRY(TYPE_DUP3,       "operand 3")                                     \
  ENUM_ENTRY(TYPE_DUP4,       "operand 4")                                     \
  ENUM_ENTRY(TYPE_M512,       "512-bit FPU/MMX/XMM/MXCSR state")

#define ENUM_ENTRY(n, d) n,    
typedef enum {
  TYPES
  TYPE_max
} OperandType;
#undef ENUM_ENTRY

/* 
 * OperandSpecifier - The specification for how to extract and interpret one
 *   operand.
 */
struct OperandSpecifier {
  OperandEncoding  encoding;
  OperandType      type;
};

/*
 * Indicates where the opcode modifier (if any) is to be found.  Extended
 * opcodes with AddRegFrm have the opcode modifier in the ModR/M byte.
 */

#define MODIFIER_TYPES        \
  ENUM_ENTRY(MODIFIER_NONE)   \
  ENUM_ENTRY(MODIFIER_OPCODE) \
  ENUM_ENTRY(MODIFIER_MODRM)

#define ENUM_ENTRY(n) n,
typedef enum {
  MODIFIER_TYPES
  MODIFIER_max
} ModifierType;
#undef ENUM_ENTRY

#define X86_MAX_OPERANDS 5

/*
 * The specification for how to extract and interpret a full instruction and
 * its operands.
 */
struct InstructionSpecifier {
  ModifierType modifierType;
  uint8_t modifierBase;
  struct OperandSpecifier operands[X86_MAX_OPERANDS];
  
  /* The macro below must be defined wherever this file is included. */
  INSTRUCTION_SPECIFIER_FIELDS
};

/*
 * Decoding mode for the Intel disassembler.  16-bit, 32-bit, and 64-bit mode
 * are supported, and represent real mode, IA-32e, and IA-32e in 64-bit mode,
 * respectively.
 */
typedef enum {
  MODE_16BIT,
  MODE_32BIT,
  MODE_64BIT
} DisassemblerMode;

#endif