1 | /* ----------------------------------------------------------------------- * |
2 | * |
3 | * Copyright 1996-2012 The NASM Authors - All Rights Reserved |
4 | * See the file AUTHORS included with the NASM distribution for |
5 | * the specific copyright holders. |
6 | * |
7 | * Redistribution and use in source and binary forms, with or without |
8 | * modification, are permitted provided that the following |
9 | * conditions are met: |
10 | * |
11 | * * Redistributions of source code must retain the above copyright |
12 | * notice, this list of conditions and the following disclaimer. |
13 | * * Redistributions in binary form must reproduce the above |
14 | * copyright notice, this list of conditions and the following |
15 | * disclaimer in the documentation and/or other materials provided |
16 | * with the distribution. |
17 | * |
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
19 | * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, |
20 | * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF |
21 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
22 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR |
23 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
24 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
25 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
26 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
27 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
28 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR |
29 | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, |
30 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
31 | * |
32 | * ----------------------------------------------------------------------- */ |
33 | |
34 | /* |
35 | * disasm.c where all the _work_ gets done in the Netwide Disassembler |
36 | */ |
37 | |
38 | #include "compiler.h" |
39 | |
40 | #include <stdio.h> |
41 | #include <string.h> |
42 | #include <limits.h> |
43 | |
44 | #include "nasm.h" |
45 | #include "disasm.h" |
46 | #include "sync.h" |
47 | #include "insns.h" |
48 | #include "tables.h" |
49 | #include "regdis.h" |
50 | #include "disp8.h" |
51 | |
52 | #define fetch_safe(_start, _ptr, _size, _need, _op) \ |
53 | do { \ |
54 | if (((_ptr) - (_start)) >= ((_size) - (_need))) \ |
55 | _op; \ |
56 | } while (0) |
57 | |
58 | #define fetch_or_return(_start, _ptr, _size, _need) \ |
59 | fetch_safe(_start, _ptr, _size, _need, return 0) |
60 | |
61 | /* |
62 | * Flags that go into the `segment' field of `insn' structures |
63 | * during disassembly. |
64 | */ |
65 | #define SEG_RELATIVE 1 |
66 | #define SEG_32BIT 2 |
67 | #define SEG_RMREG 4 |
68 | #define SEG_DISP8 8 |
69 | #define SEG_DISP16 16 |
70 | #define SEG_DISP32 32 |
71 | #define SEG_NODISP 64 |
72 | #define SEG_SIGNED 128 |
73 | #define SEG_64BIT 256 |
74 | |
75 | /* |
76 | * Prefix information |
77 | */ |
78 | struct prefix_info { |
79 | uint8_t osize; /* Operand size */ |
80 | uint8_t asize; /* Address size */ |
81 | uint8_t osp; /* Operand size prefix present */ |
82 | uint8_t asp; /* Address size prefix present */ |
83 | uint8_t rep; /* Rep prefix present */ |
84 | uint8_t seg; /* Segment override prefix present */ |
85 | uint8_t wait; /* WAIT "prefix" present */ |
86 | uint8_t lock; /* Lock prefix present */ |
87 | uint8_t vex[3]; /* VEX prefix present */ |
88 | uint8_t vex_c; /* VEX "class" (VEX, XOP, ...) */ |
89 | uint8_t vex_m; /* VEX.M field */ |
90 | uint8_t vex_v; |
91 | uint8_t vex_lp; /* VEX.LP fields */ |
92 | uint32_t rex; /* REX prefix present */ |
93 | uint8_t evex[3]; /* EVEX prefix present */ |
94 | }; |
95 | |
96 | #define getu8(x) (*(uint8_t *)(x)) |
97 | #if X86_MEMORY |
98 | /* Littleendian CPU which can handle unaligned references */ |
99 | #define getu16(x) (*(uint16_t *)(x)) |
100 | #define getu32(x) (*(uint32_t *)(x)) |
101 | #define getu64(x) (*(uint64_t *)(x)) |
102 | #else |
103 | static uint16_t getu16(uint8_t *data) |
104 | { |
105 | return (uint16_t)data[0] + ((uint16_t)data[1] << 8); |
106 | } |
107 | static uint32_t getu32(uint8_t *data) |
108 | { |
109 | return (uint32_t)getu16(data) + ((uint32_t)getu16(data+2) << 16); |
110 | } |
111 | static uint64_t getu64(uint8_t *data) |
112 | { |
113 | return (uint64_t)getu32(data) + ((uint64_t)getu32(data+4) << 32); |
114 | } |
115 | #endif |
116 | |
117 | #define gets8(x) ((int8_t)getu8(x)) |
118 | #define gets16(x) ((int16_t)getu16(x)) |
119 | #define gets32(x) ((int32_t)getu32(x)) |
120 | #define gets64(x) ((int64_t)getu64(x)) |
121 | |
122 | /* Important: regval must already have been adjusted for rex extensions */ |
123 | static enum reg_enum whichreg(opflags_t regflags, int regval, int rex) |
124 | { |
125 | size_t i; |
126 | |
127 | static const struct { |
128 | opflags_t flags; |
129 | enum reg_enum reg; |
130 | } specific_registers[] = { |
131 | {REG_AL, R_AL}, |
132 | {REG_AX, R_AX}, |
133 | {REG_EAX, R_EAX}, |
134 | {REG_RAX, R_RAX}, |
135 | {REG_DL, R_DL}, |
136 | {REG_DX, R_DX}, |
137 | {REG_EDX, R_EDX}, |
138 | {REG_RDX, R_RDX}, |
139 | {REG_CL, R_CL}, |
140 | {REG_CX, R_CX}, |
141 | {REG_ECX, R_ECX}, |
142 | {REG_RCX, R_RCX}, |
143 | {FPU0, R_ST0}, |
144 | {XMM0, R_XMM0}, |
145 | {YMM0, R_YMM0}, |
146 | {ZMM0, R_ZMM0}, |
147 | {REG_ES, R_ES}, |
148 | {REG_CS, R_CS}, |
149 | {REG_SS, R_SS}, |
150 | {REG_DS, R_DS}, |
151 | {REG_FS, R_FS}, |
152 | {REG_GS, R_GS}, |
153 | {OPMASK0, R_K0}, |
154 | }; |
155 | |
156 | if (!(regflags & (REGISTER|REGMEM))) |
157 | return 0; /* Registers not permissible?! */ |
158 | |
159 | regflags |= REGISTER; |
160 | |
161 | for (i = 0; i < ARRAY_SIZE(specific_registers); i++) |
162 | if (!(specific_registers[i].flags & ~regflags)) |
163 | return specific_registers[i].reg; |
164 | |
165 | /* All the entries below look up regval in an 16-entry array */ |
166 | if (regval < 0 || regval > (rex & REX_EV ? 31 : 15)) |
167 | return 0; |
168 | |
169 | #define GET_REGISTER(__array, __index) \ |
170 | ((size_t)(__index) < (size_t)ARRAY_SIZE(__array) ? __array[(__index)] : 0) |
171 | |
172 | if (!(REG8 & ~regflags)) { |
173 | if (rex & (REX_P|REX_NH)) |
174 | return GET_REGISTER(nasm_rd_reg8_rex, regval); |
175 | else |
176 | return GET_REGISTER(nasm_rd_reg8, regval); |
177 | } |
178 | if (!(REG16 & ~regflags)) |
179 | return GET_REGISTER(nasm_rd_reg16, regval); |
180 | if (!(REG32 & ~regflags)) |
181 | return GET_REGISTER(nasm_rd_reg32, regval); |
182 | if (!(REG64 & ~regflags)) |
183 | return GET_REGISTER(nasm_rd_reg64, regval); |
184 | if (!(REG_SREG & ~regflags)) |
185 | return GET_REGISTER(nasm_rd_sreg, regval & 7); /* Ignore REX */ |
186 | if (!(REG_CREG & ~regflags)) |
187 | return GET_REGISTER(nasm_rd_creg, regval); |
188 | if (!(REG_DREG & ~regflags)) |
189 | return GET_REGISTER(nasm_rd_dreg, regval); |
190 | if (!(REG_TREG & ~regflags)) { |
191 | if (regval > 7) |
192 | return 0; /* TR registers are ill-defined with rex */ |
193 | return GET_REGISTER(nasm_rd_treg, regval); |
194 | } |
195 | if (!(FPUREG & ~regflags)) |
196 | return GET_REGISTER(nasm_rd_fpureg, regval & 7); /* Ignore REX */ |
197 | if (!(MMXREG & ~regflags)) |
198 | return GET_REGISTER(nasm_rd_mmxreg, regval & 7); /* Ignore REX */ |
199 | if (!(XMMREG & ~regflags)) |
200 | return GET_REGISTER(nasm_rd_xmmreg, regval); |
201 | if (!(YMMREG & ~regflags)) |
202 | return GET_REGISTER(nasm_rd_ymmreg, regval); |
203 | if (!(ZMMREG & ~regflags)) |
204 | return GET_REGISTER(nasm_rd_zmmreg, regval); |
205 | if (!(OPMASKREG & ~regflags)) |
206 | return GET_REGISTER(nasm_rd_opmaskreg, regval); |
207 | if (!(BNDREG & ~regflags)) |
208 | return GET_REGISTER(nasm_rd_bndreg, regval); |
209 | |
210 | #undef GET_REGISTER |
211 | return 0; |
212 | } |
213 | |
214 | static uint32_t append_evex_reg_deco(char *buf, uint32_t num, |
215 | decoflags_t deco, uint8_t *evex) |
216 | { |
217 | const char * const er_names[] = {"rn-sae" , "rd-sae" , "ru-sae" , "rz-sae" }; |
218 | uint32_t num_chars = 0; |
219 | |
220 | if ((deco & MASK) && (evex[2] & EVEX_P2AAA)) { |
221 | enum reg_enum opmasknum = nasm_rd_opmaskreg[evex[2] & EVEX_P2AAA]; |
222 | const char * regname = nasm_reg_names[opmasknum - EXPR_REG_START]; |
223 | |
224 | num_chars += snprintf(buf + num_chars, num - num_chars, |
225 | "{%s}" , regname); |
226 | |
227 | if ((deco & Z) && (evex[2] & EVEX_P2Z)) { |
228 | num_chars += snprintf(buf + num_chars, num - num_chars, |
229 | "{z}" ); |
230 | } |
231 | } |
232 | |
233 | if (evex[2] & EVEX_P2B) { |
234 | if (deco & ER) { |
235 | uint8_t er_type = (evex[2] & EVEX_P2LL) >> 5; |
236 | num_chars += snprintf(buf + num_chars, num - num_chars, |
237 | ",{%s}" , er_names[er_type]); |
238 | } else if (deco & SAE) { |
239 | num_chars += snprintf(buf + num_chars, num - num_chars, |
240 | ",{sae}" ); |
241 | } |
242 | } |
243 | |
244 | return num_chars; |
245 | } |
246 | |
247 | static uint32_t append_evex_mem_deco(char *buf, uint32_t num, opflags_t type, |
248 | decoflags_t deco, uint8_t *evex) |
249 | { |
250 | uint32_t num_chars = 0; |
251 | |
252 | if ((evex[2] & EVEX_P2B) && (deco & BRDCAST_MASK)) { |
253 | decoflags_t deco_brsize = deco & BRSIZE_MASK; |
254 | opflags_t template_opsize = (deco_brsize == BR_BITS32 ? BITS32 : BITS64); |
255 | uint8_t br_num = (type & SIZE_MASK) / BITS128 * |
256 | BITS64 / template_opsize * 2; |
257 | |
258 | num_chars += snprintf(buf + num_chars, num - num_chars, |
259 | "{1to%d}" , br_num); |
260 | } |
261 | |
262 | if ((deco & MASK) && (evex[2] & EVEX_P2AAA)) { |
263 | enum reg_enum opmasknum = nasm_rd_opmaskreg[evex[2] & EVEX_P2AAA]; |
264 | const char * regname = nasm_reg_names[opmasknum - EXPR_REG_START]; |
265 | |
266 | num_chars += snprintf(buf + num_chars, num - num_chars, |
267 | "{%s}" , regname); |
268 | |
269 | if ((deco & Z) && (evex[2] & EVEX_P2Z)) { |
270 | num_chars += snprintf(buf + num_chars, num - num_chars, |
271 | "{z}" ); |
272 | } |
273 | } |
274 | |
275 | |
276 | return num_chars; |
277 | } |
278 | |
279 | /* |
280 | * Process an effective address (ModRM) specification. |
281 | */ |
282 | static uint8_t *do_ea(uint8_t *data, int modrm, int asize, |
283 | int segsize, enum ea_type type, |
284 | operand *op, insn *ins) |
285 | { |
286 | int mod, rm, scale, index, base; |
287 | int rex; |
288 | uint8_t *evex; |
289 | uint8_t sib = 0; |
290 | bool is_evex = !!(ins->rex & REX_EV); |
291 | |
292 | mod = (modrm >> 6) & 03; |
293 | rm = modrm & 07; |
294 | |
295 | if (mod != 3 && asize != 16 && rm == 4) |
296 | sib = *data++; |
297 | |
298 | rex = ins->rex; |
299 | evex = ins->evex_p; |
300 | |
301 | if (mod == 3) { /* pure register version */ |
302 | op->basereg = rm+(rex & REX_B ? 8 : 0); |
303 | op->segment |= SEG_RMREG; |
304 | if (is_evex && segsize == 64) { |
305 | op->basereg += (evex[0] & EVEX_P0X ? 0 : 16); |
306 | } |
307 | return data; |
308 | } |
309 | |
310 | op->disp_size = 0; |
311 | op->eaflags = 0; |
312 | |
313 | if (asize == 16) { |
314 | /* |
315 | * <mod> specifies the displacement size (none, byte or |
316 | * word), and <rm> specifies the register combination. |
317 | * Exception: mod=0,rm=6 does not specify [BP] as one might |
318 | * expect, but instead specifies [disp16]. |
319 | */ |
320 | |
321 | if (type != EA_SCALAR) |
322 | return NULL; |
323 | |
324 | op->indexreg = op->basereg = -1; |
325 | op->scale = 1; /* always, in 16 bits */ |
326 | switch (rm) { |
327 | case 0: |
328 | op->basereg = R_BX; |
329 | op->indexreg = R_SI; |
330 | break; |
331 | case 1: |
332 | op->basereg = R_BX; |
333 | op->indexreg = R_DI; |
334 | break; |
335 | case 2: |
336 | op->basereg = R_BP; |
337 | op->indexreg = R_SI; |
338 | break; |
339 | case 3: |
340 | op->basereg = R_BP; |
341 | op->indexreg = R_DI; |
342 | break; |
343 | case 4: |
344 | op->basereg = R_SI; |
345 | break; |
346 | case 5: |
347 | op->basereg = R_DI; |
348 | break; |
349 | case 6: |
350 | op->basereg = R_BP; |
351 | break; |
352 | case 7: |
353 | op->basereg = R_BX; |
354 | break; |
355 | } |
356 | if (rm == 6 && mod == 0) { /* special case */ |
357 | op->basereg = -1; |
358 | if (segsize != 16) |
359 | op->disp_size = 16; |
360 | mod = 2; /* fake disp16 */ |
361 | } |
362 | switch (mod) { |
363 | case 0: |
364 | op->segment |= SEG_NODISP; |
365 | break; |
366 | case 1: |
367 | op->segment |= SEG_DISP8; |
368 | if (ins->evex_tuple != 0) { |
369 | op->offset = gets8(data) * get_disp8N(ins); |
370 | } else { |
371 | op->offset = gets8(data); |
372 | } |
373 | data++; |
374 | break; |
375 | case 2: |
376 | op->segment |= SEG_DISP16; |
377 | op->offset = *data++; |
378 | op->offset |= ((unsigned)*data++) << 8; |
379 | break; |
380 | } |
381 | return data; |
382 | } else { |
383 | /* |
384 | * Once again, <mod> specifies displacement size (this time |
385 | * none, byte or *dword*), while <rm> specifies the base |
386 | * register. Again, [EBP] is missing, replaced by a pure |
387 | * disp32 (this time that's mod=0,rm=*5*) in 32-bit mode, |
388 | * and RIP-relative addressing in 64-bit mode. |
389 | * |
390 | * However, rm=4 |
391 | * indicates not a single base register, but instead the |
392 | * presence of a SIB byte... |
393 | */ |
394 | int a64 = asize == 64; |
395 | |
396 | op->indexreg = -1; |
397 | |
398 | if (a64) |
399 | op->basereg = nasm_rd_reg64[rm | ((rex & REX_B) ? 8 : 0)]; |
400 | else |
401 | op->basereg = nasm_rd_reg32[rm | ((rex & REX_B) ? 8 : 0)]; |
402 | |
403 | if (rm == 5 && mod == 0) { |
404 | if (segsize == 64) { |
405 | op->eaflags |= EAF_REL; |
406 | op->segment |= SEG_RELATIVE; |
407 | } |
408 | |
409 | if (asize != 64) |
410 | op->disp_size = asize; |
411 | |
412 | op->basereg = -1; |
413 | mod = 2; /* fake disp32 */ |
414 | } |
415 | |
416 | |
417 | if (rm == 4) { /* process SIB */ |
418 | uint8_t vsib_hi = 0; |
419 | scale = (sib >> 6) & 03; |
420 | index = (sib >> 3) & 07; |
421 | base = sib & 07; |
422 | |
423 | op->scale = 1 << scale; |
424 | |
425 | if (segsize == 64) { |
426 | vsib_hi = (rex & REX_X ? 8 : 0) | |
427 | (evex[2] & EVEX_P2VP ? 0 : 16); |
428 | } |
429 | |
430 | if (type == EA_XMMVSIB) |
431 | op->indexreg = nasm_rd_xmmreg[index | vsib_hi]; |
432 | else if (type == EA_YMMVSIB) |
433 | op->indexreg = nasm_rd_ymmreg[index | vsib_hi]; |
434 | else if (type == EA_ZMMVSIB) |
435 | op->indexreg = nasm_rd_zmmreg[index | vsib_hi]; |
436 | else if (index == 4 && !(rex & REX_X)) |
437 | op->indexreg = -1; /* ESP/RSP cannot be an index */ |
438 | else if (a64) |
439 | op->indexreg = nasm_rd_reg64[index | ((rex & REX_X) ? 8 : 0)]; |
440 | else |
441 | op->indexreg = nasm_rd_reg32[index | ((rex & REX_X) ? 8 : 0)]; |
442 | |
443 | if (base == 5 && mod == 0) { |
444 | op->basereg = -1; |
445 | mod = 2; /* Fake disp32 */ |
446 | } else if (a64) |
447 | op->basereg = nasm_rd_reg64[base | ((rex & REX_B) ? 8 : 0)]; |
448 | else |
449 | op->basereg = nasm_rd_reg32[base | ((rex & REX_B) ? 8 : 0)]; |
450 | |
451 | if (segsize == 16) |
452 | op->disp_size = 32; |
453 | } else if (type != EA_SCALAR) { |
454 | /* Can't have VSIB without SIB */ |
455 | return NULL; |
456 | } |
457 | |
458 | switch (mod) { |
459 | case 0: |
460 | op->segment |= SEG_NODISP; |
461 | break; |
462 | case 1: |
463 | op->segment |= SEG_DISP8; |
464 | if (ins->evex_tuple != 0) { |
465 | op->offset = gets8(data) * get_disp8N(ins); |
466 | } else { |
467 | op->offset = gets8(data); |
468 | } |
469 | data++; |
470 | break; |
471 | case 2: |
472 | op->segment |= SEG_DISP32; |
473 | op->offset = gets32(data); |
474 | data += 4; |
475 | break; |
476 | } |
477 | return data; |
478 | } |
479 | } |
480 | |
481 | /* |
482 | * Determine whether the instruction template in t corresponds to the data |
483 | * stream in data. Return the number of bytes matched if so. |
484 | */ |
485 | #define case4(x) case (x): case (x)+1: case (x)+2: case (x)+3 |
486 | |
487 | static int matches(const struct itemplate *t, uint8_t *data, |
488 | const struct prefix_info *prefix, int segsize, insn *ins) |
489 | { |
490 | uint8_t *r = (uint8_t *)(t->code); |
491 | uint8_t *origdata = data; |
492 | bool a_used = false, o_used = false; |
493 | enum prefixes drep = 0; |
494 | enum prefixes dwait = 0; |
495 | uint8_t lock = prefix->lock; |
496 | int osize = prefix->osize; |
497 | int asize = prefix->asize; |
498 | int i, c; |
499 | int op1, op2; |
500 | struct operand *opx, *opy; |
501 | uint8_t opex = 0; |
502 | bool vex_ok = false; |
503 | int regmask = (segsize == 64) ? 15 : 7; |
504 | enum ea_type eat = EA_SCALAR; |
505 | |
506 | for (i = 0; i < MAX_OPERANDS; i++) { |
507 | ins->oprs[i].segment = ins->oprs[i].disp_size = |
508 | (segsize == 64 ? SEG_64BIT : segsize == 32 ? SEG_32BIT : 0); |
509 | } |
510 | ins->condition = -1; |
511 | ins->evex_tuple = 0; |
512 | ins->rex = prefix->rex; |
513 | memset(ins->prefixes, 0, sizeof ins->prefixes); |
514 | |
515 | if (itemp_has(t, (segsize == 64 ? IF_NOLONG : IF_LONG))) |
516 | return 0; |
517 | |
518 | if (prefix->rep == 0xF2) |
519 | drep = (itemp_has(t, IF_BND) ? P_BND : P_REPNE); |
520 | else if (prefix->rep == 0xF3) |
521 | drep = P_REP; |
522 | |
523 | dwait = prefix->wait ? P_WAIT : 0; |
524 | |
525 | while ((c = *r++) != 0) { |
526 | op1 = (c & 3) + ((opex & 1) << 2); |
527 | op2 = ((c >> 3) & 3) + ((opex & 2) << 1); |
528 | opx = &ins->oprs[op1]; |
529 | opy = &ins->oprs[op2]; |
530 | opex = 0; |
531 | |
532 | switch (c) { |
533 | case 01: |
534 | case 02: |
535 | case 03: |
536 | case 04: |
537 | while (c--) |
538 | if (*r++ != *data++) |
539 | return 0; |
540 | break; |
541 | |
542 | case 05: |
543 | case 06: |
544 | case 07: |
545 | opex = c; |
546 | break; |
547 | |
548 | case4(010): |
549 | { |
550 | int t = *r++, d = *data++; |
551 | if (d < t || d > t + 7) |
552 | return 0; |
553 | else { |
554 | opx->basereg = (d-t)+ |
555 | (ins->rex & REX_B ? 8 : 0); |
556 | opx->segment |= SEG_RMREG; |
557 | } |
558 | break; |
559 | } |
560 | |
561 | case4(014): |
562 | /* this is an separate index reg position of MIB operand (ICC) */ |
563 | /* Disassembler uses NASM's split EA form only */ |
564 | break; |
565 | |
566 | case4(0274): |
567 | opx->offset = (int8_t)*data++; |
568 | opx->segment |= SEG_SIGNED; |
569 | break; |
570 | |
571 | case4(020): |
572 | opx->offset = *data++; |
573 | break; |
574 | |
575 | case4(024): |
576 | opx->offset = *data++; |
577 | break; |
578 | |
579 | case4(030): |
580 | opx->offset = getu16(data); |
581 | data += 2; |
582 | break; |
583 | |
584 | case4(034): |
585 | if (osize == 32) { |
586 | opx->offset = getu32(data); |
587 | data += 4; |
588 | } else { |
589 | opx->offset = getu16(data); |
590 | data += 2; |
591 | } |
592 | if (segsize != asize) |
593 | opx->disp_size = asize; |
594 | break; |
595 | |
596 | case4(040): |
597 | opx->offset = getu32(data); |
598 | data += 4; |
599 | break; |
600 | |
601 | case4(0254): |
602 | opx->offset = gets32(data); |
603 | data += 4; |
604 | break; |
605 | |
606 | case4(044): |
607 | switch (asize) { |
608 | case 16: |
609 | opx->offset = getu16(data); |
610 | data += 2; |
611 | if (segsize != 16) |
612 | opx->disp_size = 16; |
613 | break; |
614 | case 32: |
615 | opx->offset = getu32(data); |
616 | data += 4; |
617 | if (segsize == 16) |
618 | opx->disp_size = 32; |
619 | break; |
620 | case 64: |
621 | opx->offset = getu64(data); |
622 | opx->disp_size = 64; |
623 | data += 8; |
624 | break; |
625 | } |
626 | break; |
627 | |
628 | case4(050): |
629 | opx->offset = gets8(data++); |
630 | opx->segment |= SEG_RELATIVE; |
631 | break; |
632 | |
633 | case4(054): |
634 | opx->offset = getu64(data); |
635 | data += 8; |
636 | break; |
637 | |
638 | case4(060): |
639 | opx->offset = gets16(data); |
640 | data += 2; |
641 | opx->segment |= SEG_RELATIVE; |
642 | opx->segment &= ~SEG_32BIT; |
643 | break; |
644 | |
645 | case4(064): /* rel */ |
646 | opx->segment |= SEG_RELATIVE; |
647 | /* In long mode rel is always 32 bits, sign extended. */ |
648 | if (segsize == 64 || osize == 32) { |
649 | opx->offset = gets32(data); |
650 | data += 4; |
651 | if (segsize != 64) |
652 | opx->segment |= SEG_32BIT; |
653 | opx->type = (opx->type & ~SIZE_MASK) |
654 | | (segsize == 64 ? BITS64 : BITS32); |
655 | } else { |
656 | opx->offset = gets16(data); |
657 | data += 2; |
658 | opx->segment &= ~SEG_32BIT; |
659 | opx->type = (opx->type & ~SIZE_MASK) | BITS16; |
660 | } |
661 | break; |
662 | |
663 | case4(070): |
664 | opx->offset = gets32(data); |
665 | data += 4; |
666 | opx->segment |= SEG_32BIT | SEG_RELATIVE; |
667 | break; |
668 | |
669 | case4(0100): |
670 | case4(0110): |
671 | case4(0120): |
672 | case4(0130): |
673 | { |
674 | int modrm = *data++; |
675 | opx->segment |= SEG_RMREG; |
676 | data = do_ea(data, modrm, asize, segsize, eat, opy, ins); |
677 | if (!data) |
678 | return 0; |
679 | opx->basereg = ((modrm >> 3) & 7) + (ins->rex & REX_R ? 8 : 0); |
680 | if ((ins->rex & REX_EV) && (segsize == 64)) |
681 | opx->basereg += (ins->evex_p[0] & EVEX_P0RP ? 0 : 16); |
682 | break; |
683 | } |
684 | |
685 | case 0172: |
686 | { |
687 | uint8_t ximm = *data++; |
688 | c = *r++; |
689 | ins->oprs[c >> 3].basereg = (ximm >> 4) & regmask; |
690 | ins->oprs[c >> 3].segment |= SEG_RMREG; |
691 | ins->oprs[c & 7].offset = ximm & 15; |
692 | } |
693 | break; |
694 | |
695 | case 0173: |
696 | { |
697 | uint8_t ximm = *data++; |
698 | c = *r++; |
699 | |
700 | if ((c ^ ximm) & 15) |
701 | return 0; |
702 | |
703 | ins->oprs[c >> 4].basereg = (ximm >> 4) & regmask; |
704 | ins->oprs[c >> 4].segment |= SEG_RMREG; |
705 | } |
706 | break; |
707 | |
708 | case4(0174): |
709 | { |
710 | uint8_t ximm = *data++; |
711 | |
712 | opx->basereg = (ximm >> 4) & regmask; |
713 | opx->segment |= SEG_RMREG; |
714 | } |
715 | break; |
716 | |
717 | case4(0200): |
718 | case4(0204): |
719 | case4(0210): |
720 | case4(0214): |
721 | case4(0220): |
722 | case4(0224): |
723 | case4(0230): |
724 | case4(0234): |
725 | { |
726 | int modrm = *data++; |
727 | if (((modrm >> 3) & 07) != (c & 07)) |
728 | return 0; /* spare field doesn't match up */ |
729 | data = do_ea(data, modrm, asize, segsize, eat, opy, ins); |
730 | if (!data) |
731 | return 0; |
732 | break; |
733 | } |
734 | |
735 | case4(0240): |
736 | case 0250: |
737 | { |
738 | uint8_t evexm = *r++; |
739 | uint8_t evexwlp = *r++; |
740 | uint8_t modrm, valid_mask; |
741 | ins->evex_tuple = *r++ - 0300; |
742 | modrm = *(origdata + 1); |
743 | |
744 | ins->rex |= REX_EV; |
745 | if ((prefix->rex & (REX_EV|REX_V|REX_P)) != REX_EV) |
746 | return 0; |
747 | |
748 | if ((evexm & 0x1f) != prefix->vex_m) |
749 | return 0; |
750 | |
751 | switch (evexwlp & 060) { |
752 | case 000: |
753 | if (prefix->rex & REX_W) |
754 | return 0; |
755 | break; |
756 | case 020: |
757 | if (!(prefix->rex & REX_W)) |
758 | return 0; |
759 | ins->rex |= REX_W; |
760 | break; |
761 | case 040: /* VEX.W is a don't care */ |
762 | ins->rex &= ~REX_W; |
763 | break; |
764 | case 060: |
765 | break; |
766 | } |
767 | |
768 | /* If EVEX.b is set with reg-reg op, |
769 | * EVEX.L'L contains embedded rounding control info |
770 | */ |
771 | if ((prefix->evex[2] & EVEX_P2B) && ((modrm >> 6) == 3)) { |
772 | valid_mask = 0x3; /* prefix only */ |
773 | } else { |
774 | valid_mask = 0xf; /* vector length and prefix */ |
775 | } |
776 | if ((evexwlp ^ prefix->vex_lp) & valid_mask) |
777 | return 0; |
778 | |
779 | if (c == 0250) { |
780 | if ((prefix->vex_v != 0) || |
781 | (!(prefix->evex[2] & EVEX_P2VP) && |
782 | ((eat < EA_XMMVSIB) || (eat > EA_ZMMVSIB)))) |
783 | return 0; |
784 | } else { |
785 | opx->segment |= SEG_RMREG; |
786 | opx->basereg = ((~prefix->evex[2] & EVEX_P2VP) << (4 - 3) ) | |
787 | prefix->vex_v; |
788 | } |
789 | vex_ok = true; |
790 | memcpy(ins->evex_p, prefix->evex, 3); |
791 | break; |
792 | } |
793 | |
794 | case4(0260): |
795 | case 0270: |
796 | { |
797 | int vexm = *r++; |
798 | int vexwlp = *r++; |
799 | |
800 | ins->rex |= REX_V; |
801 | if ((prefix->rex & (REX_V|REX_P)) != REX_V) |
802 | return 0; |
803 | |
804 | if ((vexm & 0x1f) != prefix->vex_m) |
805 | return 0; |
806 | |
807 | switch (vexwlp & 060) { |
808 | case 000: |
809 | if (prefix->rex & REX_W) |
810 | return 0; |
811 | break; |
812 | case 020: |
813 | if (!(prefix->rex & REX_W)) |
814 | return 0; |
815 | ins->rex &= ~REX_W; |
816 | break; |
817 | case 040: /* VEX.W is a don't care */ |
818 | ins->rex &= ~REX_W; |
819 | break; |
820 | case 060: |
821 | break; |
822 | } |
823 | |
824 | /* The 010 bit of vexwlp is set if VEX.L is ignored */ |
825 | if ((vexwlp ^ prefix->vex_lp) & ((vexwlp & 010) ? 03 : 07)) |
826 | return 0; |
827 | |
828 | if (c == 0270) { |
829 | if (prefix->vex_v != 0) |
830 | return 0; |
831 | } else { |
832 | opx->segment |= SEG_RMREG; |
833 | opx->basereg = prefix->vex_v; |
834 | } |
835 | vex_ok = true; |
836 | break; |
837 | } |
838 | |
839 | case 0271: |
840 | if (prefix->rep == 0xF3) |
841 | drep = P_XRELEASE; |
842 | break; |
843 | |
844 | case 0272: |
845 | if (prefix->rep == 0xF2) |
846 | drep = P_XACQUIRE; |
847 | else if (prefix->rep == 0xF3) |
848 | drep = P_XRELEASE; |
849 | break; |
850 | |
851 | case 0273: |
852 | if (prefix->lock == 0xF0) { |
853 | if (prefix->rep == 0xF2) |
854 | drep = P_XACQUIRE; |
855 | else if (prefix->rep == 0xF3) |
856 | drep = P_XRELEASE; |
857 | } |
858 | break; |
859 | |
860 | case 0310: |
861 | if (asize != 16) |
862 | return 0; |
863 | else |
864 | a_used = true; |
865 | break; |
866 | |
867 | case 0311: |
868 | if (asize != 32) |
869 | return 0; |
870 | else |
871 | a_used = true; |
872 | break; |
873 | |
874 | case 0312: |
875 | if (asize != segsize) |
876 | return 0; |
877 | else |
878 | a_used = true; |
879 | break; |
880 | |
881 | case 0313: |
882 | if (asize != 64) |
883 | return 0; |
884 | else |
885 | a_used = true; |
886 | break; |
887 | |
888 | case 0314: |
889 | if (prefix->rex & REX_B) |
890 | return 0; |
891 | break; |
892 | |
893 | case 0315: |
894 | if (prefix->rex & REX_X) |
895 | return 0; |
896 | break; |
897 | |
898 | case 0316: |
899 | if (prefix->rex & REX_R) |
900 | return 0; |
901 | break; |
902 | |
903 | case 0317: |
904 | if (prefix->rex & REX_W) |
905 | return 0; |
906 | break; |
907 | |
908 | case 0320: |
909 | if (osize != 16) |
910 | return 0; |
911 | else |
912 | o_used = true; |
913 | break; |
914 | |
915 | case 0321: |
916 | if (osize != 32) |
917 | return 0; |
918 | else |
919 | o_used = true; |
920 | break; |
921 | |
922 | case 0322: |
923 | if (osize != (segsize == 16 ? 16 : 32)) |
924 | return 0; |
925 | else |
926 | o_used = true; |
927 | break; |
928 | |
929 | case 0323: |
930 | ins->rex |= REX_W; /* 64-bit only instruction */ |
931 | osize = 64; |
932 | o_used = true; |
933 | break; |
934 | |
935 | case 0324: |
936 | if (osize != 64) |
937 | return 0; |
938 | o_used = true; |
939 | break; |
940 | |
941 | case 0325: |
942 | ins->rex |= REX_NH; |
943 | break; |
944 | |
945 | case 0330: |
946 | { |
947 | int t = *r++, d = *data++; |
948 | if (d < t || d > t + 15) |
949 | return 0; |
950 | else |
951 | ins->condition = d - t; |
952 | break; |
953 | } |
954 | |
955 | case 0326: |
956 | if (prefix->rep == 0xF3) |
957 | return 0; |
958 | break; |
959 | |
960 | case 0331: |
961 | if (prefix->rep) |
962 | return 0; |
963 | break; |
964 | |
965 | case 0332: |
966 | if (prefix->rep != 0xF2) |
967 | return 0; |
968 | drep = 0; |
969 | break; |
970 | |
971 | case 0333: |
972 | if (prefix->rep != 0xF3) |
973 | return 0; |
974 | drep = 0; |
975 | break; |
976 | |
977 | case 0334: |
978 | if (lock) { |
979 | ins->rex |= REX_R; |
980 | lock = 0; |
981 | } |
982 | break; |
983 | |
984 | case 0335: |
985 | if (drep == P_REP) |
986 | drep = P_REPE; |
987 | break; |
988 | |
989 | case 0336: |
990 | case 0337: |
991 | break; |
992 | |
993 | case 0340: |
994 | return 0; |
995 | |
996 | case 0341: |
997 | if (prefix->wait != 0x9B) |
998 | return 0; |
999 | dwait = 0; |
1000 | break; |
1001 | |
1002 | case 0360: |
1003 | if (prefix->osp || prefix->rep) |
1004 | return 0; |
1005 | break; |
1006 | |
1007 | case 0361: |
1008 | if (!prefix->osp || prefix->rep) |
1009 | return 0; |
1010 | o_used = true; |
1011 | break; |
1012 | |
1013 | case 0364: |
1014 | if (prefix->osp) |
1015 | return 0; |
1016 | break; |
1017 | |
1018 | case 0365: |
1019 | if (prefix->asp) |
1020 | return 0; |
1021 | break; |
1022 | |
1023 | case 0366: |
1024 | if (!prefix->osp) |
1025 | return 0; |
1026 | o_used = true; |
1027 | break; |
1028 | |
1029 | case 0367: |
1030 | if (!prefix->asp) |
1031 | return 0; |
1032 | a_used = true; |
1033 | break; |
1034 | |
1035 | case 0370: |
1036 | case 0371: |
1037 | break; |
1038 | |
1039 | case 0374: |
1040 | eat = EA_XMMVSIB; |
1041 | break; |
1042 | |
1043 | case 0375: |
1044 | eat = EA_YMMVSIB; |
1045 | break; |
1046 | |
1047 | case 0376: |
1048 | eat = EA_ZMMVSIB; |
1049 | break; |
1050 | |
1051 | default: |
1052 | return 0; /* Unknown code */ |
1053 | } |
1054 | } |
1055 | |
1056 | if (!vex_ok && (ins->rex & (REX_V | REX_EV))) |
1057 | return 0; |
1058 | |
1059 | /* REX cannot be combined with VEX */ |
1060 | if ((ins->rex & REX_V) && (prefix->rex & REX_P)) |
1061 | return 0; |
1062 | |
1063 | /* |
1064 | * Check for unused rep or a/o prefixes. |
1065 | */ |
1066 | for (i = 0; i < t->operands; i++) { |
1067 | if (ins->oprs[i].segment != SEG_RMREG) |
1068 | a_used = true; |
1069 | } |
1070 | |
1071 | if (lock) { |
1072 | if (ins->prefixes[PPS_LOCK]) |
1073 | return 0; |
1074 | ins->prefixes[PPS_LOCK] = P_LOCK; |
1075 | } |
1076 | if (drep) { |
1077 | if (ins->prefixes[PPS_REP]) |
1078 | return 0; |
1079 | ins->prefixes[PPS_REP] = drep; |
1080 | } |
1081 | ins->prefixes[PPS_WAIT] = dwait; |
1082 | if (!o_used) { |
1083 | if (osize != ((segsize == 16) ? 16 : 32)) { |
1084 | enum prefixes pfx = 0; |
1085 | |
1086 | switch (osize) { |
1087 | case 16: |
1088 | pfx = P_O16; |
1089 | break; |
1090 | case 32: |
1091 | pfx = P_O32; |
1092 | break; |
1093 | case 64: |
1094 | pfx = P_O64; |
1095 | break; |
1096 | } |
1097 | |
1098 | if (ins->prefixes[PPS_OSIZE]) |
1099 | return 0; |
1100 | ins->prefixes[PPS_OSIZE] = pfx; |
1101 | } |
1102 | } |
1103 | if (!a_used && asize != segsize) { |
1104 | if (ins->prefixes[PPS_ASIZE]) |
1105 | return 0; |
1106 | ins->prefixes[PPS_ASIZE] = asize == 16 ? P_A16 : P_A32; |
1107 | } |
1108 | |
1109 | /* Fix: check for redundant REX prefixes */ |
1110 | |
1111 | return data - origdata; |
1112 | } |
1113 | |
1114 | /* Condition names for disassembly, sorted by x86 code */ |
1115 | static const char * const condition_name[16] = { |
1116 | "o" , "no" , "c" , "nc" , "z" , "nz" , "na" , "a" , |
1117 | "s" , "ns" , "pe" , "po" , "l" , "nl" , "ng" , "g" |
1118 | }; |
1119 | |
1120 | int32_t disasm(uint8_t *data, int32_t data_size, char *output, int outbufsize, int segsize, |
1121 | int64_t offset, int autosync, iflag_t *prefer) |
1122 | { |
1123 | const struct itemplate * const *p, * const *best_p; |
1124 | const struct disasm_index *ix; |
1125 | uint8_t *dp; |
1126 | int length, best_length = 0; |
1127 | char *segover; |
1128 | int i, slen, colon, n; |
1129 | uint8_t *origdata; |
1130 | int works; |
1131 | insn tmp_ins, ins; |
1132 | iflag_t goodness, best; |
1133 | int best_pref; |
1134 | struct prefix_info prefix; |
1135 | bool end_prefix; |
1136 | bool is_evex; |
1137 | |
1138 | memset(&ins, 0, sizeof ins); |
1139 | |
1140 | /* |
1141 | * Scan for prefixes. |
1142 | */ |
1143 | memset(&prefix, 0, sizeof prefix); |
1144 | prefix.asize = segsize; |
1145 | prefix.osize = (segsize == 64) ? 32 : segsize; |
1146 | segover = NULL; |
1147 | origdata = data; |
1148 | |
1149 | ix = itable; |
1150 | |
1151 | end_prefix = false; |
1152 | while (!end_prefix) { |
1153 | switch (*data) { |
1154 | case 0xF2: |
1155 | case 0xF3: |
1156 | fetch_or_return(origdata, data, data_size, 1); |
1157 | prefix.rep = *data++; |
1158 | break; |
1159 | |
1160 | case 0x9B: |
1161 | fetch_or_return(origdata, data, data_size, 1); |
1162 | prefix.wait = *data++; |
1163 | break; |
1164 | |
1165 | case 0xF0: |
1166 | fetch_or_return(origdata, data, data_size, 1); |
1167 | prefix.lock = *data++; |
1168 | break; |
1169 | |
1170 | case 0x2E: |
1171 | fetch_or_return(origdata, data, data_size, 1); |
1172 | segover = "cs" , prefix.seg = *data++; |
1173 | break; |
1174 | case 0x36: |
1175 | fetch_or_return(origdata, data, data_size, 1); |
1176 | segover = "ss" , prefix.seg = *data++; |
1177 | break; |
1178 | case 0x3E: |
1179 | fetch_or_return(origdata, data, data_size, 1); |
1180 | segover = "ds" , prefix.seg = *data++; |
1181 | break; |
1182 | case 0x26: |
1183 | fetch_or_return(origdata, data, data_size, 1); |
1184 | segover = "es" , prefix.seg = *data++; |
1185 | break; |
1186 | case 0x64: |
1187 | fetch_or_return(origdata, data, data_size, 1); |
1188 | segover = "fs" , prefix.seg = *data++; |
1189 | break; |
1190 | case 0x65: |
1191 | fetch_or_return(origdata, data, data_size, 1); |
1192 | segover = "gs" , prefix.seg = *data++; |
1193 | break; |
1194 | |
1195 | case 0x66: |
1196 | fetch_or_return(origdata, data, data_size, 1); |
1197 | prefix.osize = (segsize == 16) ? 32 : 16; |
1198 | prefix.osp = *data++; |
1199 | break; |
1200 | case 0x67: |
1201 | fetch_or_return(origdata, data, data_size, 1); |
1202 | prefix.asize = (segsize == 32) ? 16 : 32; |
1203 | prefix.asp = *data++; |
1204 | break; |
1205 | |
1206 | case 0xC4: |
1207 | case 0xC5: |
1208 | if (segsize == 64 || (data[1] & 0xc0) == 0xc0) { |
1209 | fetch_or_return(origdata, data, data_size, 2); |
1210 | prefix.vex[0] = *data++; |
1211 | prefix.vex[1] = *data++; |
1212 | |
1213 | prefix.rex = REX_V; |
1214 | prefix.vex_c = RV_VEX; |
1215 | |
1216 | if (prefix.vex[0] == 0xc4) { |
1217 | fetch_or_return(origdata, data, data_size, 1); |
1218 | prefix.vex[2] = *data++; |
1219 | prefix.rex |= (~prefix.vex[1] >> 5) & 7; /* REX_RXB */ |
1220 | prefix.rex |= (prefix.vex[2] >> (7-3)) & REX_W; |
1221 | prefix.vex_m = prefix.vex[1] & 0x1f; |
1222 | prefix.vex_v = (~prefix.vex[2] >> 3) & 15; |
1223 | prefix.vex_lp = prefix.vex[2] & 7; |
1224 | } else { |
1225 | prefix.rex |= (~prefix.vex[1] >> (7-2)) & REX_R; |
1226 | prefix.vex_m = 1; |
1227 | prefix.vex_v = (~prefix.vex[1] >> 3) & 15; |
1228 | prefix.vex_lp = prefix.vex[1] & 7; |
1229 | } |
1230 | |
1231 | ix = itable_vex[RV_VEX][prefix.vex_m][prefix.vex_lp & 3]; |
1232 | } |
1233 | end_prefix = true; |
1234 | break; |
1235 | |
1236 | case 0x62: |
1237 | { |
1238 | if (segsize == 64 || ((data[1] & 0xc0) == 0xc0)) { |
1239 | fetch_or_return(origdata, data, data_size, 4); |
1240 | data++; /* 62h EVEX prefix */ |
1241 | prefix.evex[0] = *data++; |
1242 | prefix.evex[1] = *data++; |
1243 | prefix.evex[2] = *data++; |
1244 | |
1245 | prefix.rex = REX_EV; |
1246 | prefix.vex_c = RV_EVEX; |
1247 | prefix.rex |= (~prefix.evex[0] >> 5) & 7; /* REX_RXB */ |
1248 | prefix.rex |= (prefix.evex[1] >> (7-3)) & REX_W; |
1249 | prefix.vex_m = prefix.evex[0] & EVEX_P0MM; |
1250 | prefix.vex_v = (~prefix.evex[1] & EVEX_P1VVVV) >> 3; |
1251 | prefix.vex_lp = ((prefix.evex[2] & EVEX_P2LL) >> (5-2)) | |
1252 | (prefix.evex[1] & EVEX_P1PP); |
1253 | |
1254 | ix = itable_vex[prefix.vex_c][prefix.vex_m][prefix.vex_lp & 3]; |
1255 | } |
1256 | end_prefix = true; |
1257 | break; |
1258 | } |
1259 | |
1260 | case 0x8F: |
1261 | if ((data[1] & 030) != 0 && |
1262 | (segsize == 64 || (data[1] & 0xc0) == 0xc0)) { |
1263 | fetch_or_return(origdata, data, data_size, 3); |
1264 | prefix.vex[0] = *data++; |
1265 | prefix.vex[1] = *data++; |
1266 | prefix.vex[2] = *data++; |
1267 | |
1268 | prefix.rex = REX_V; |
1269 | prefix.vex_c = RV_XOP; |
1270 | |
1271 | prefix.rex |= (~prefix.vex[1] >> 5) & 7; /* REX_RXB */ |
1272 | prefix.rex |= (prefix.vex[2] >> (7-3)) & REX_W; |
1273 | prefix.vex_m = prefix.vex[1] & 0x1f; |
1274 | prefix.vex_v = (~prefix.vex[2] >> 3) & 15; |
1275 | prefix.vex_lp = prefix.vex[2] & 7; |
1276 | |
1277 | ix = itable_vex[RV_XOP][prefix.vex_m][prefix.vex_lp & 3]; |
1278 | } |
1279 | end_prefix = true; |
1280 | break; |
1281 | |
1282 | case REX_P + 0x0: |
1283 | case REX_P + 0x1: |
1284 | case REX_P + 0x2: |
1285 | case REX_P + 0x3: |
1286 | case REX_P + 0x4: |
1287 | case REX_P + 0x5: |
1288 | case REX_P + 0x6: |
1289 | case REX_P + 0x7: |
1290 | case REX_P + 0x8: |
1291 | case REX_P + 0x9: |
1292 | case REX_P + 0xA: |
1293 | case REX_P + 0xB: |
1294 | case REX_P + 0xC: |
1295 | case REX_P + 0xD: |
1296 | case REX_P + 0xE: |
1297 | case REX_P + 0xF: |
1298 | if (segsize == 64) { |
1299 | fetch_or_return(origdata, data, data_size, 1); |
1300 | prefix.rex = *data++; |
1301 | if (prefix.rex & REX_W) |
1302 | prefix.osize = 64; |
1303 | } |
1304 | end_prefix = true; |
1305 | break; |
1306 | |
1307 | default: |
1308 | end_prefix = true; |
1309 | break; |
1310 | } |
1311 | } |
1312 | |
1313 | iflag_set_all(&best); /* Worst possible */ |
1314 | best_p = NULL; |
1315 | best_pref = INT_MAX; |
1316 | |
1317 | if (!ix) |
1318 | return 0; /* No instruction table at all... */ |
1319 | |
1320 | dp = data; |
1321 | fetch_or_return(origdata, dp, data_size, 1); |
1322 | ix += *dp++; |
1323 | while (ix->n == -1) { |
1324 | fetch_or_return(origdata, dp, data_size, 1); |
1325 | ix = (const struct disasm_index *)ix->p + *dp++; |
1326 | } |
1327 | |
1328 | p = (const struct itemplate * const *)ix->p; |
1329 | for (n = ix->n; n; n--, p++) { |
1330 | if ((length = matches(*p, data, &prefix, segsize, &tmp_ins))) { |
1331 | works = true; |
1332 | /* |
1333 | * Final check to make sure the types of r/m match up. |
1334 | * XXX: Need to make sure this is actually correct. |
1335 | */ |
1336 | for (i = 0; i < (*p)->operands; i++) { |
1337 | if ( |
1338 | /* If it's a mem-only EA but we have a |
1339 | register, die. */ |
1340 | ((tmp_ins.oprs[i].segment & SEG_RMREG) && |
1341 | is_class(MEMORY, (*p)->opd[i])) || |
1342 | /* If it's a reg-only EA but we have a memory |
1343 | ref, die. */ |
1344 | (!(tmp_ins.oprs[i].segment & SEG_RMREG) && |
1345 | !(REG_EA & ~(*p)->opd[i]) && |
1346 | !((*p)->opd[i] & REG_SMASK)) || |
1347 | /* Register type mismatch (eg FS vs REG_DESS): |
1348 | die. */ |
1349 | ((((*p)->opd[i] & (REGISTER | FPUREG)) || |
1350 | (tmp_ins.oprs[i].segment & SEG_RMREG)) && |
1351 | !whichreg((*p)->opd[i], |
1352 | tmp_ins.oprs[i].basereg, tmp_ins.rex)) |
1353 | ) { |
1354 | works = false; |
1355 | break; |
1356 | } |
1357 | } |
1358 | |
1359 | /* |
1360 | * Note: we always prefer instructions which incorporate |
1361 | * prefixes in the instructions themselves. This is to allow |
1362 | * e.g. PAUSE to be preferred to REP NOP, and deal with |
1363 | * MMX/SSE instructions where prefixes are used to select |
1364 | * between MMX and SSE register sets or outright opcode |
1365 | * selection. |
1366 | */ |
1367 | if (works) { |
1368 | int i, nprefix; |
1369 | goodness = iflag_pfmask(*p); |
1370 | goodness = iflag_xor(&goodness, prefer); |
1371 | nprefix = 0; |
1372 | for (i = 0; i < MAXPREFIX; i++) |
1373 | if (tmp_ins.prefixes[i]) |
1374 | nprefix++; |
1375 | if (nprefix < best_pref || |
1376 | (nprefix == best_pref && |
1377 | iflag_cmp(&goodness, &best) < 0)) { |
1378 | /* This is the best one found so far */ |
1379 | best = goodness; |
1380 | best_p = p; |
1381 | best_pref = nprefix; |
1382 | best_length = length; |
1383 | ins = tmp_ins; |
1384 | } |
1385 | } |
1386 | } |
1387 | } |
1388 | |
1389 | if (!best_p) |
1390 | return 0; /* no instruction was matched */ |
1391 | |
1392 | /* Pick the best match */ |
1393 | p = best_p; |
1394 | length = best_length; |
1395 | |
1396 | slen = 0; |
1397 | |
1398 | /* TODO: snprintf returns the value that the string would have if |
1399 | * the buffer were long enough, and not the actual length of |
1400 | * the returned string, so each instance of using the return |
1401 | * value of snprintf should actually be checked to assure that |
1402 | * the return value is "sane." Maybe a macro wrapper could |
1403 | * be used for that purpose. |
1404 | */ |
1405 | for (i = 0; i < MAXPREFIX; i++) { |
1406 | const char *prefix = prefix_name(ins.prefixes[i]); |
1407 | if (prefix) |
1408 | slen += snprintf(output+slen, outbufsize-slen, "%s " , prefix); |
1409 | } |
1410 | |
1411 | i = (*p)->opcode; |
1412 | if (i >= FIRST_COND_OPCODE) |
1413 | slen += snprintf(output + slen, outbufsize - slen, "%s%s" , |
1414 | nasm_insn_names[i], condition_name[ins.condition]); |
1415 | else |
1416 | slen += snprintf(output + slen, outbufsize - slen, "%s" , |
1417 | nasm_insn_names[i]); |
1418 | |
1419 | colon = false; |
1420 | is_evex = !!(ins.rex & REX_EV); |
1421 | length += data - origdata; /* fix up for prefixes */ |
1422 | for (i = 0; i < (*p)->operands; i++) { |
1423 | opflags_t t = (*p)->opd[i]; |
1424 | decoflags_t deco = (*p)->deco[i]; |
1425 | const operand *o = &ins.oprs[i]; |
1426 | int64_t offs; |
1427 | |
1428 | output[slen++] = (colon ? ':' : i == 0 ? ' ' : ','); |
1429 | |
1430 | offs = o->offset; |
1431 | if (o->segment & SEG_RELATIVE) { |
1432 | offs += offset + length; |
1433 | /* |
1434 | * sort out wraparound |
1435 | */ |
1436 | if (!(o->segment & (SEG_32BIT|SEG_64BIT))) |
1437 | offs &= 0xffff; |
1438 | else if (segsize != 64) |
1439 | offs &= 0xffffffff; |
1440 | |
1441 | /* |
1442 | * add sync marker, if autosync is on |
1443 | */ |
1444 | if (autosync) |
1445 | add_sync(offs, 0L); |
1446 | } |
1447 | |
1448 | if (t & COLON) |
1449 | colon = true; |
1450 | else |
1451 | colon = false; |
1452 | |
1453 | if ((t & (REGISTER | FPUREG)) || |
1454 | (o->segment & SEG_RMREG)) { |
1455 | enum reg_enum reg; |
1456 | reg = whichreg(t, o->basereg, ins.rex); |
1457 | if (t & TO) |
1458 | slen += snprintf(output + slen, outbufsize - slen, "to " ); |
1459 | slen += snprintf(output + slen, outbufsize - slen, "%s" , |
1460 | nasm_reg_names[reg-EXPR_REG_START]); |
1461 | if (t & REGSET_MASK) |
1462 | slen += snprintf(output + slen, outbufsize - slen, "+%d" , |
1463 | (int)((t & REGSET_MASK) >> (REGSET_SHIFT-1))-1); |
1464 | if (is_evex && deco) |
1465 | slen += append_evex_reg_deco(output + slen, outbufsize - slen, |
1466 | deco, ins.evex_p); |
1467 | } else if (!(UNITY & ~t)) { |
1468 | output[slen++] = '1'; |
1469 | } else if (t & IMMEDIATE) { |
1470 | if (t & BITS8) { |
1471 | slen += |
1472 | snprintf(output + slen, outbufsize - slen, "byte " ); |
1473 | if (o->segment & SEG_SIGNED) { |
1474 | if (offs < 0) { |
1475 | offs *= -1; |
1476 | output[slen++] = '-'; |
1477 | } else |
1478 | output[slen++] = '+'; |
1479 | } |
1480 | } else if (t & BITS16) { |
1481 | slen += |
1482 | snprintf(output + slen, outbufsize - slen, "word " ); |
1483 | } else if (t & BITS32) { |
1484 | slen += |
1485 | snprintf(output + slen, outbufsize - slen, "dword " ); |
1486 | } else if (t & BITS64) { |
1487 | slen += |
1488 | snprintf(output + slen, outbufsize - slen, "qword " ); |
1489 | } else if (t & NEAR) { |
1490 | slen += |
1491 | snprintf(output + slen, outbufsize - slen, "near " ); |
1492 | } else if (t & SHORT) { |
1493 | slen += |
1494 | snprintf(output + slen, outbufsize - slen, "short " ); |
1495 | } |
1496 | slen += |
1497 | snprintf(output + slen, outbufsize - slen, "0x%" PRIx64"" , |
1498 | offs); |
1499 | } else if (!(MEM_OFFS & ~t)) { |
1500 | slen += |
1501 | snprintf(output + slen, outbufsize - slen, |
1502 | "[%s%s%s0x%" PRIx64"]" , |
1503 | (segover ? segover : "" ), |
1504 | (segover ? ":" : "" ), |
1505 | (o->disp_size == 64 ? "qword " : |
1506 | o->disp_size == 32 ? "dword " : |
1507 | o->disp_size == 16 ? "word " : "" ), offs); |
1508 | segover = NULL; |
1509 | } else if (is_class(REGMEM, t)) { |
1510 | int started = false; |
1511 | if (t & BITS8) |
1512 | slen += |
1513 | snprintf(output + slen, outbufsize - slen, "byte " ); |
1514 | if (t & BITS16) |
1515 | slen += |
1516 | snprintf(output + slen, outbufsize - slen, "word " ); |
1517 | if (t & BITS32) |
1518 | slen += |
1519 | snprintf(output + slen, outbufsize - slen, "dword " ); |
1520 | if (t & BITS64) |
1521 | slen += |
1522 | snprintf(output + slen, outbufsize - slen, "qword " ); |
1523 | if (t & BITS80) |
1524 | slen += |
1525 | snprintf(output + slen, outbufsize - slen, "tword " ); |
1526 | if ((ins.evex_p[2] & EVEX_P2B) && (deco & BRDCAST_MASK)) { |
1527 | /* when broadcasting, each element size should be used */ |
1528 | if (deco & BR_BITS32) |
1529 | slen += |
1530 | snprintf(output + slen, outbufsize - slen, "dword " ); |
1531 | else if (deco & BR_BITS64) |
1532 | slen += |
1533 | snprintf(output + slen, outbufsize - slen, "qword " ); |
1534 | } else { |
1535 | if (t & BITS128) |
1536 | slen += |
1537 | snprintf(output + slen, outbufsize - slen, "oword " ); |
1538 | if (t & BITS256) |
1539 | slen += |
1540 | snprintf(output + slen, outbufsize - slen, "yword " ); |
1541 | if (t & BITS512) |
1542 | slen += |
1543 | snprintf(output + slen, outbufsize - slen, "zword " ); |
1544 | } |
1545 | if (t & FAR) |
1546 | slen += snprintf(output + slen, outbufsize - slen, "far " ); |
1547 | if (t & NEAR) |
1548 | slen += |
1549 | snprintf(output + slen, outbufsize - slen, "near " ); |
1550 | output[slen++] = '['; |
1551 | if (o->disp_size) |
1552 | slen += snprintf(output + slen, outbufsize - slen, "%s" , |
1553 | (o->disp_size == 64 ? "qword " : |
1554 | o->disp_size == 32 ? "dword " : |
1555 | o->disp_size == 16 ? "word " : |
1556 | "" )); |
1557 | if (o->eaflags & EAF_REL) |
1558 | slen += snprintf(output + slen, outbufsize - slen, "rel " ); |
1559 | if (segover) { |
1560 | slen += |
1561 | snprintf(output + slen, outbufsize - slen, "%s:" , |
1562 | segover); |
1563 | segover = NULL; |
1564 | } |
1565 | if (o->basereg != -1) { |
1566 | slen += snprintf(output + slen, outbufsize - slen, "%s" , |
1567 | nasm_reg_names[(o->basereg-EXPR_REG_START)]); |
1568 | started = true; |
1569 | } |
1570 | if (o->indexreg != -1 && !itemp_has(*best_p, IF_MIB)) { |
1571 | if (started) |
1572 | output[slen++] = '+'; |
1573 | slen += snprintf(output + slen, outbufsize - slen, "%s" , |
1574 | nasm_reg_names[(o->indexreg-EXPR_REG_START)]); |
1575 | if (o->scale > 1) |
1576 | slen += |
1577 | snprintf(output + slen, outbufsize - slen, "*%d" , |
1578 | o->scale); |
1579 | started = true; |
1580 | } |
1581 | |
1582 | |
1583 | if (o->segment & SEG_DISP8) { |
1584 | if (is_evex) { |
1585 | const char *prefix; |
1586 | uint32_t offset = offs; |
1587 | if ((int32_t)offset < 0) { |
1588 | prefix = "-" ; |
1589 | offset = -offset; |
1590 | } else { |
1591 | prefix = "+" ; |
1592 | } |
1593 | slen += |
1594 | snprintf(output + slen, outbufsize - slen, "%s0x%" PRIx32"" , |
1595 | prefix, offset); |
1596 | } else { |
1597 | const char *prefix; |
1598 | uint8_t offset = offs; |
1599 | if ((int8_t)offset < 0) { |
1600 | prefix = "-" ; |
1601 | offset = -offset; |
1602 | } else { |
1603 | prefix = "+" ; |
1604 | } |
1605 | slen += |
1606 | snprintf(output + slen, outbufsize - slen, "%s0x%" PRIx8"" , |
1607 | prefix, offset); |
1608 | } |
1609 | } else if (o->segment & SEG_DISP16) { |
1610 | const char *prefix; |
1611 | uint16_t offset = offs; |
1612 | if ((int16_t)offset < 0 && started) { |
1613 | offset = -offset; |
1614 | prefix = "-" ; |
1615 | } else { |
1616 | prefix = started ? "+" : "" ; |
1617 | } |
1618 | slen += |
1619 | snprintf(output + slen, outbufsize - slen, |
1620 | "%s0x%" PRIx16"" , prefix, offset); |
1621 | } else if (o->segment & SEG_DISP32) { |
1622 | if (prefix.asize == 64) { |
1623 | const char *prefix; |
1624 | uint64_t offset = offs; |
1625 | if ((int32_t)offs < 0 && started) { |
1626 | offset = -offset; |
1627 | prefix = "-" ; |
1628 | } else { |
1629 | prefix = started ? "+" : "" ; |
1630 | } |
1631 | slen += |
1632 | snprintf(output + slen, outbufsize - slen, |
1633 | "%s0x%" PRIx64"" , prefix, offset); |
1634 | } else { |
1635 | const char *prefix; |
1636 | uint32_t offset = offs; |
1637 | if ((int32_t) offset < 0 && started) { |
1638 | offset = -offset; |
1639 | prefix = "-" ; |
1640 | } else { |
1641 | prefix = started ? "+" : "" ; |
1642 | } |
1643 | slen += |
1644 | snprintf(output + slen, outbufsize - slen, |
1645 | "%s0x%" PRIx32"" , prefix, offset); |
1646 | } |
1647 | } |
1648 | |
1649 | if (o->indexreg != -1 && itemp_has(*best_p, IF_MIB)) { |
1650 | output[slen++] = ','; |
1651 | slen += snprintf(output + slen, outbufsize - slen, "%s" , |
1652 | nasm_reg_names[(o->indexreg-EXPR_REG_START)]); |
1653 | if (o->scale > 1) |
1654 | slen += |
1655 | snprintf(output + slen, outbufsize - slen, "*%d" , |
1656 | o->scale); |
1657 | started = true; |
1658 | } |
1659 | |
1660 | output[slen++] = ']'; |
1661 | |
1662 | if (is_evex && deco) |
1663 | slen += append_evex_mem_deco(output + slen, outbufsize - slen, |
1664 | t, deco, ins.evex_p); |
1665 | } else { |
1666 | slen += |
1667 | snprintf(output + slen, outbufsize - slen, "<operand%d>" , |
1668 | i); |
1669 | } |
1670 | } |
1671 | output[slen] = '\0'; |
1672 | if (segover) { /* unused segment override */ |
1673 | char *p = output; |
1674 | int count = slen + 1; |
1675 | while (count--) |
1676 | p[count + 3] = p[count]; |
1677 | strncpy(output, segover, 2); |
1678 | output[2] = ' '; |
1679 | } |
1680 | return length; |
1681 | } |
1682 | |
1683 | /* |
1684 | * This is called when we don't have a complete instruction. If it |
1685 | * is a standalone *single-byte* prefix show it as such, otherwise |
1686 | * print it as a literal. |
1687 | */ |
1688 | int32_t eatbyte(uint8_t *data, char *output, int outbufsize, int segsize) |
1689 | { |
1690 | uint8_t byte = *data; |
1691 | const char *str = NULL; |
1692 | |
1693 | switch (byte) { |
1694 | case 0xF2: |
1695 | str = "repne" ; |
1696 | break; |
1697 | case 0xF3: |
1698 | str = "rep" ; |
1699 | break; |
1700 | case 0x9B: |
1701 | str = "wait" ; |
1702 | break; |
1703 | case 0xF0: |
1704 | str = "lock" ; |
1705 | break; |
1706 | case 0x2E: |
1707 | str = "cs" ; |
1708 | break; |
1709 | case 0x36: |
1710 | str = "ss" ; |
1711 | break; |
1712 | case 0x3E: |
1713 | str = "ds" ; |
1714 | break; |
1715 | case 0x26: |
1716 | str = "es" ; |
1717 | break; |
1718 | case 0x64: |
1719 | str = "fs" ; |
1720 | break; |
1721 | case 0x65: |
1722 | str = "gs" ; |
1723 | break; |
1724 | case 0x66: |
1725 | str = (segsize == 16) ? "o32" : "o16" ; |
1726 | break; |
1727 | case 0x67: |
1728 | str = (segsize == 32) ? "a16" : "a32" ; |
1729 | break; |
1730 | case REX_P + 0x0: |
1731 | case REX_P + 0x1: |
1732 | case REX_P + 0x2: |
1733 | case REX_P + 0x3: |
1734 | case REX_P + 0x4: |
1735 | case REX_P + 0x5: |
1736 | case REX_P + 0x6: |
1737 | case REX_P + 0x7: |
1738 | case REX_P + 0x8: |
1739 | case REX_P + 0x9: |
1740 | case REX_P + 0xA: |
1741 | case REX_P + 0xB: |
1742 | case REX_P + 0xC: |
1743 | case REX_P + 0xD: |
1744 | case REX_P + 0xE: |
1745 | case REX_P + 0xF: |
1746 | if (segsize == 64) { |
1747 | snprintf(output, outbufsize, "rex%s%s%s%s%s" , |
1748 | (byte == REX_P) ? "" : "." , |
1749 | (byte & REX_W) ? "w" : "" , |
1750 | (byte & REX_R) ? "r" : "" , |
1751 | (byte & REX_X) ? "x" : "" , |
1752 | (byte & REX_B) ? "b" : "" ); |
1753 | break; |
1754 | } |
1755 | /* else fall through */ |
1756 | default: |
1757 | snprintf(output, outbufsize, "db 0x%02x" , byte); |
1758 | break; |
1759 | } |
1760 | |
1761 | if (str) |
1762 | snprintf(output, outbufsize, "%s" , str); |
1763 | |
1764 | return 1; |
1765 | } |
1766 | |