1 | /* ----------------------------------------------------------------------- * |
2 | * |
3 | * Copyright 1996-2018 The NASM Authors - All Rights Reserved |
4 | * See the file AUTHORS included with the NASM distribution for |
5 | * the specific copyright holders. |
6 | * |
7 | * Redistribution and use in source and binary forms, with or without |
8 | * modification, are permitted provided that the following |
9 | * conditions are met: |
10 | * |
11 | * * Redistributions of source code must retain the above copyright |
12 | * notice, this list of conditions and the following disclaimer. |
13 | * * Redistributions in binary form must reproduce the above |
14 | * copyright notice, this list of conditions and the following |
15 | * disclaimer in the documentation and/or other materials provided |
16 | * with the distribution. |
17 | * |
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
19 | * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, |
20 | * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF |
21 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
22 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR |
23 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
24 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
25 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
26 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
27 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
28 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR |
29 | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, |
30 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
31 | * |
32 | * ----------------------------------------------------------------------- */ |
33 | |
34 | /* |
35 | * parser.c source line parser for the Netwide Assembler |
36 | */ |
37 | |
38 | #include "compiler.h" |
39 | |
40 | #include <stdio.h> |
41 | #include <stdlib.h> |
42 | #include <stddef.h> |
43 | #include <string.h> |
44 | #include <ctype.h> |
45 | |
46 | #include "nasm.h" |
47 | #include "insns.h" |
48 | #include "nasmlib.h" |
49 | #include "error.h" |
50 | #include "stdscan.h" |
51 | #include "eval.h" |
52 | #include "parser.h" |
53 | #include "float.h" |
54 | #include "assemble.h" |
55 | #include "tables.h" |
56 | |
57 | |
58 | static int is_comma_next(void); |
59 | |
60 | static struct tokenval tokval; |
61 | |
62 | static int prefix_slot(int prefix) |
63 | { |
64 | switch (prefix) { |
65 | case P_WAIT: |
66 | return PPS_WAIT; |
67 | case R_CS: |
68 | case R_DS: |
69 | case R_SS: |
70 | case R_ES: |
71 | case R_FS: |
72 | case R_GS: |
73 | return PPS_SEG; |
74 | case P_LOCK: |
75 | return PPS_LOCK; |
76 | case P_REP: |
77 | case P_REPE: |
78 | case P_REPZ: |
79 | case P_REPNE: |
80 | case P_REPNZ: |
81 | case P_XACQUIRE: |
82 | case P_XRELEASE: |
83 | case P_BND: |
84 | case P_NOBND: |
85 | return PPS_REP; |
86 | case P_O16: |
87 | case P_O32: |
88 | case P_O64: |
89 | case P_OSP: |
90 | return PPS_OSIZE; |
91 | case P_A16: |
92 | case P_A32: |
93 | case P_A64: |
94 | case P_ASP: |
95 | return PPS_ASIZE; |
96 | case P_EVEX: |
97 | case P_VEX3: |
98 | case P_VEX2: |
99 | return PPS_VEX; |
100 | default: |
101 | nasm_panic(0, "Invalid value %d passed to prefix_slot()" , prefix); |
102 | return -1; |
103 | } |
104 | } |
105 | |
106 | static void process_size_override(insn *result, operand *op) |
107 | { |
108 | if (tasm_compatible_mode) { |
109 | switch (tokval.t_integer) { |
110 | /* For TASM compatibility a size override inside the |
111 | * brackets changes the size of the operand, not the |
112 | * address type of the operand as it does in standard |
113 | * NASM syntax. Hence: |
114 | * |
115 | * mov eax,[DWORD val] |
116 | * |
117 | * is valid syntax in TASM compatibility mode. Note that |
118 | * you lose the ability to override the default address |
119 | * type for the instruction, but we never use anything |
120 | * but 32-bit flat model addressing in our code. |
121 | */ |
122 | case S_BYTE: |
123 | op->type |= BITS8; |
124 | break; |
125 | case S_WORD: |
126 | op->type |= BITS16; |
127 | break; |
128 | case S_DWORD: |
129 | case S_LONG: |
130 | op->type |= BITS32; |
131 | break; |
132 | case S_QWORD: |
133 | op->type |= BITS64; |
134 | break; |
135 | case S_TWORD: |
136 | op->type |= BITS80; |
137 | break; |
138 | case S_OWORD: |
139 | op->type |= BITS128; |
140 | break; |
141 | default: |
142 | nasm_error(ERR_NONFATAL, |
143 | "invalid operand size specification" ); |
144 | break; |
145 | } |
146 | } else { |
147 | /* Standard NASM compatible syntax */ |
148 | switch (tokval.t_integer) { |
149 | case S_NOSPLIT: |
150 | op->eaflags |= EAF_TIMESTWO; |
151 | break; |
152 | case S_REL: |
153 | op->eaflags |= EAF_REL; |
154 | break; |
155 | case S_ABS: |
156 | op->eaflags |= EAF_ABS; |
157 | break; |
158 | case S_BYTE: |
159 | op->disp_size = 8; |
160 | op->eaflags |= EAF_BYTEOFFS; |
161 | break; |
162 | case P_A16: |
163 | case P_A32: |
164 | case P_A64: |
165 | if (result->prefixes[PPS_ASIZE] && |
166 | result->prefixes[PPS_ASIZE] != tokval.t_integer) |
167 | nasm_error(ERR_NONFATAL, |
168 | "conflicting address size specifications" ); |
169 | else |
170 | result->prefixes[PPS_ASIZE] = tokval.t_integer; |
171 | break; |
172 | case S_WORD: |
173 | op->disp_size = 16; |
174 | op->eaflags |= EAF_WORDOFFS; |
175 | break; |
176 | case S_DWORD: |
177 | case S_LONG: |
178 | op->disp_size = 32; |
179 | op->eaflags |= EAF_WORDOFFS; |
180 | break; |
181 | case S_QWORD: |
182 | op->disp_size = 64; |
183 | op->eaflags |= EAF_WORDOFFS; |
184 | break; |
185 | default: |
186 | nasm_error(ERR_NONFATAL, "invalid size specification in" |
187 | " effective address" ); |
188 | break; |
189 | } |
190 | } |
191 | } |
192 | |
193 | /* |
194 | * Brace decorators are are parsed here. opmask and zeroing |
195 | * decorators can be placed in any order. e.g. zmm1 {k2}{z} or zmm2 |
196 | * {z}{k3} decorator(s) are placed at the end of an operand. |
197 | */ |
198 | static bool parse_braces(decoflags_t *decoflags) |
199 | { |
200 | int i, j; |
201 | |
202 | i = tokval.t_type; |
203 | |
204 | while (true) { |
205 | switch (i) { |
206 | case TOKEN_OPMASK: |
207 | if (*decoflags & OPMASK_MASK) { |
208 | nasm_error(ERR_NONFATAL, |
209 | "opmask k%" PRIu64" is already set" , |
210 | *decoflags & OPMASK_MASK); |
211 | *decoflags &= ~OPMASK_MASK; |
212 | } |
213 | *decoflags |= VAL_OPMASK(nasm_regvals[tokval.t_integer]); |
214 | break; |
215 | case TOKEN_DECORATOR: |
216 | j = tokval.t_integer; |
217 | switch (j) { |
218 | case BRC_Z: |
219 | *decoflags |= Z_MASK; |
220 | break; |
221 | case BRC_1TO2: |
222 | case BRC_1TO4: |
223 | case BRC_1TO8: |
224 | case BRC_1TO16: |
225 | *decoflags |= BRDCAST_MASK | VAL_BRNUM(j - BRC_1TO2); |
226 | break; |
227 | default: |
228 | nasm_error(ERR_NONFATAL, |
229 | "{%s} is not an expected decorator" , |
230 | tokval.t_charptr); |
231 | break; |
232 | } |
233 | break; |
234 | case ',': |
235 | case TOKEN_EOS: |
236 | return false; |
237 | default: |
238 | nasm_error(ERR_NONFATAL, |
239 | "only a series of valid decorators expected" ); |
240 | return true; |
241 | } |
242 | i = stdscan(NULL, &tokval); |
243 | } |
244 | } |
245 | |
246 | static int parse_mref(operand *op, const expr *e) |
247 | { |
248 | int b, i, s; /* basereg, indexreg, scale */ |
249 | int64_t o; /* offset */ |
250 | |
251 | b = i = -1; |
252 | o = s = 0; |
253 | op->segment = op->wrt = NO_SEG; |
254 | |
255 | if (e->type && e->type <= EXPR_REG_END) { /* this bit's a register */ |
256 | bool is_gpr = is_class(REG_GPR,nasm_reg_flags[e->type]); |
257 | |
258 | if (is_gpr && e->value == 1) |
259 | b = e->type; /* It can be basereg */ |
260 | else /* No, it has to be indexreg */ |
261 | i = e->type, s = e->value; |
262 | e++; |
263 | } |
264 | if (e->type && e->type <= EXPR_REG_END) { /* it's a 2nd register */ |
265 | bool is_gpr = is_class(REG_GPR,nasm_reg_flags[e->type]); |
266 | |
267 | if (b != -1) /* If the first was the base, ... */ |
268 | i = e->type, s = e->value; /* second has to be indexreg */ |
269 | |
270 | else if (!is_gpr || e->value != 1) { |
271 | /* If both want to be index */ |
272 | nasm_error(ERR_NONFATAL, |
273 | "invalid effective address: two index registers" ); |
274 | return -1; |
275 | } else |
276 | b = e->type; |
277 | e++; |
278 | } |
279 | |
280 | if (e->type) { /* is there an offset? */ |
281 | if (e->type <= EXPR_REG_END) { /* in fact, is there an error? */ |
282 | nasm_error(ERR_NONFATAL, |
283 | "invalid effective address: impossible register" ); |
284 | return -1; |
285 | } else { |
286 | if (e->type == EXPR_UNKNOWN) { |
287 | op->opflags |= OPFLAG_UNKNOWN; |
288 | o = 0; /* doesn't matter what */ |
289 | while (e->type) |
290 | e++; /* go to the end of the line */ |
291 | } else { |
292 | if (e->type == EXPR_SIMPLE) { |
293 | o = e->value; |
294 | e++; |
295 | } |
296 | if (e->type == EXPR_WRT) { |
297 | op->wrt = e->value; |
298 | e++; |
299 | } |
300 | /* |
301 | * Look for a segment base type. |
302 | */ |
303 | for (; e->type; e++) { |
304 | if (!e->value) |
305 | continue; |
306 | |
307 | if (e->type <= EXPR_REG_END) { |
308 | nasm_error(ERR_NONFATAL, |
309 | "invalid effective address: too many registers" ); |
310 | return -1; |
311 | } else if (e->type < EXPR_SEGBASE) { |
312 | nasm_error(ERR_NONFATAL, |
313 | "invalid effective address: bad subexpression type" ); |
314 | return -1; |
315 | } else if (e->value == 1) { |
316 | if (op->segment != NO_SEG) { |
317 | nasm_error(ERR_NONFATAL, |
318 | "invalid effective address: multiple base segments" ); |
319 | return -1; |
320 | } |
321 | op->segment = e->type - EXPR_SEGBASE; |
322 | } else if (e->value == -1 && |
323 | e->type == location.segment + EXPR_SEGBASE && |
324 | !(op->opflags & OPFLAG_RELATIVE)) { |
325 | op->opflags |= OPFLAG_RELATIVE; |
326 | } else { |
327 | nasm_error(ERR_NONFATAL, |
328 | "invalid effective address: impossible segment base multiplier" ); |
329 | return -1; |
330 | } |
331 | } |
332 | } |
333 | } |
334 | } |
335 | |
336 | nasm_assert(!e->type); /* We should be at the end */ |
337 | |
338 | op->basereg = b; |
339 | op->indexreg = i; |
340 | op->scale = s; |
341 | op->offset = o; |
342 | return 0; |
343 | } |
344 | |
345 | static void mref_set_optype(operand *op) |
346 | { |
347 | int b = op->basereg; |
348 | int i = op->indexreg; |
349 | int s = op->scale; |
350 | |
351 | /* It is memory, but it can match any r/m operand */ |
352 | op->type |= MEMORY_ANY; |
353 | |
354 | if (b == -1 && (i == -1 || s == 0)) { |
355 | int is_rel = globalbits == 64 && |
356 | !(op->eaflags & EAF_ABS) && |
357 | ((globalrel && |
358 | !(op->eaflags & EAF_FSGS)) || |
359 | (op->eaflags & EAF_REL)); |
360 | |
361 | op->type |= is_rel ? IP_REL : MEM_OFFS; |
362 | } |
363 | |
364 | if (i != -1) { |
365 | opflags_t iclass = nasm_reg_flags[i]; |
366 | |
367 | if (is_class(XMMREG,iclass)) |
368 | op->type |= XMEM; |
369 | else if (is_class(YMMREG,iclass)) |
370 | op->type |= YMEM; |
371 | else if (is_class(ZMMREG,iclass)) |
372 | op->type |= ZMEM; |
373 | } |
374 | } |
375 | |
376 | /* |
377 | * Convert an expression vector returned from evaluate() into an |
378 | * extop structure. Return zero on success. |
379 | */ |
380 | static int value_to_extop(expr * vect, extop *eop, int32_t myseg) |
381 | { |
382 | eop->type = EOT_DB_NUMBER; |
383 | eop->offset = 0; |
384 | eop->segment = eop->wrt = NO_SEG; |
385 | eop->relative = false; |
386 | |
387 | for (; vect->type; vect++) { |
388 | if (!vect->value) /* zero term, safe to ignore */ |
389 | continue; |
390 | |
391 | if (vect->type <= EXPR_REG_END) /* false if a register is present */ |
392 | return -1; |
393 | |
394 | if (vect->type == EXPR_UNKNOWN) /* something we can't resolve yet */ |
395 | return 0; |
396 | |
397 | if (vect->type == EXPR_SIMPLE) { |
398 | /* Simple number expression */ |
399 | eop->offset += vect->value; |
400 | continue; |
401 | } |
402 | if (eop->wrt == NO_SEG && !eop->relative && vect->type == EXPR_WRT) { |
403 | /* WRT term */ |
404 | eop->wrt = vect->value; |
405 | continue; |
406 | } |
407 | |
408 | if (!eop->relative && |
409 | vect->type == EXPR_SEGBASE + myseg && vect->value == -1) { |
410 | /* Expression of the form: foo - $ */ |
411 | eop->relative = true; |
412 | continue; |
413 | } |
414 | |
415 | if (eop->segment == NO_SEG && vect->type >= EXPR_SEGBASE && |
416 | vect->value == 1) { |
417 | eop->segment = vect->type - EXPR_SEGBASE; |
418 | continue; |
419 | } |
420 | |
421 | /* Otherwise, badness */ |
422 | return -1; |
423 | } |
424 | |
425 | /* We got to the end and it was all okay */ |
426 | return 0; |
427 | } |
428 | |
429 | insn *parse_line(int pass, char *buffer, insn *result) |
430 | { |
431 | bool insn_is_label = false; |
432 | struct eval_hints hints; |
433 | int opnum; |
434 | int critical; |
435 | bool first; |
436 | bool recover; |
437 | int i; |
438 | |
439 | nasm_static_assert(P_none == 0); |
440 | |
441 | restart_parse: |
442 | first = true; |
443 | result->forw_ref = false; |
444 | |
445 | stdscan_reset(); |
446 | stdscan_set(buffer); |
447 | i = stdscan(NULL, &tokval); |
448 | |
449 | memset(result->prefixes, P_none, sizeof(result->prefixes)); |
450 | result->times = 1; /* No TIMES either yet */ |
451 | result->label = NULL; /* Assume no label */ |
452 | result->eops = NULL; /* must do this, whatever happens */ |
453 | result->operands = 0; /* must initialize this */ |
454 | result->evex_rm = 0; /* Ensure EVEX rounding mode is reset */ |
455 | result->evex_brerop = -1; /* Reset EVEX broadcasting/ER op position */ |
456 | |
457 | /* Ignore blank lines */ |
458 | if (i == TOKEN_EOS) |
459 | goto fail; |
460 | |
461 | if (i != TOKEN_ID && |
462 | i != TOKEN_INSN && |
463 | i != TOKEN_PREFIX && |
464 | (i != TOKEN_REG || !IS_SREG(tokval.t_integer))) { |
465 | nasm_error(ERR_NONFATAL, |
466 | "label or instruction expected at start of line" ); |
467 | goto fail; |
468 | } |
469 | |
470 | if (i == TOKEN_ID || (insn_is_label && i == TOKEN_INSN)) { |
471 | /* there's a label here */ |
472 | first = false; |
473 | result->label = tokval.t_charptr; |
474 | i = stdscan(NULL, &tokval); |
475 | if (i == ':') { /* skip over the optional colon */ |
476 | i = stdscan(NULL, &tokval); |
477 | } else if (i == 0) { |
478 | nasm_error(ERR_WARNING | WARN_OL | ERR_PASS1, |
479 | "label alone on a line without a colon might be in error" ); |
480 | } |
481 | if (i != TOKEN_INSN || tokval.t_integer != I_EQU) { |
482 | /* |
483 | * FIXME: location.segment could be NO_SEG, in which case |
484 | * it is possible we should be passing 'absolute.segment'. Look into this. |
485 | * Work out whether that is *really* what we should be doing. |
486 | * Generally fix things. I think this is right as it is, but |
487 | * am still not certain. |
488 | */ |
489 | define_label(result->label, |
490 | in_absolute ? absolute.segment : location.segment, |
491 | location.offset, true); |
492 | } |
493 | } |
494 | |
495 | /* Just a label here */ |
496 | if (i == TOKEN_EOS) |
497 | goto fail; |
498 | |
499 | while (i == TOKEN_PREFIX || |
500 | (i == TOKEN_REG && IS_SREG(tokval.t_integer))) { |
501 | first = false; |
502 | |
503 | /* |
504 | * Handle special case: the TIMES prefix. |
505 | */ |
506 | if (i == TOKEN_PREFIX && tokval.t_integer == P_TIMES) { |
507 | expr *value; |
508 | |
509 | i = stdscan(NULL, &tokval); |
510 | value = evaluate(stdscan, NULL, &tokval, NULL, pass0, NULL); |
511 | i = tokval.t_type; |
512 | if (!value) /* Error in evaluator */ |
513 | goto fail; |
514 | if (!is_simple(value)) { |
515 | nasm_error(ERR_NONFATAL, |
516 | "non-constant argument supplied to TIMES" ); |
517 | result->times = 1L; |
518 | } else { |
519 | result->times = value->value; |
520 | if (value->value < 0) { |
521 | nasm_error(ERR_NONFATAL|ERR_PASS2, "TIMES value %" PRId64" is negative" , value->value); |
522 | result->times = 0; |
523 | } |
524 | } |
525 | } else { |
526 | int slot = prefix_slot(tokval.t_integer); |
527 | if (result->prefixes[slot]) { |
528 | if (result->prefixes[slot] == tokval.t_integer) |
529 | nasm_error(ERR_WARNING | ERR_PASS1, |
530 | "instruction has redundant prefixes" ); |
531 | else |
532 | nasm_error(ERR_NONFATAL, |
533 | "instruction has conflicting prefixes" ); |
534 | } |
535 | result->prefixes[slot] = tokval.t_integer; |
536 | i = stdscan(NULL, &tokval); |
537 | } |
538 | } |
539 | |
540 | if (i != TOKEN_INSN) { |
541 | int j; |
542 | enum prefixes pfx; |
543 | |
544 | for (j = 0; j < MAXPREFIX; j++) { |
545 | if ((pfx = result->prefixes[j]) != P_none) |
546 | break; |
547 | } |
548 | |
549 | if (i == 0 && pfx != P_none) { |
550 | /* |
551 | * Instruction prefixes are present, but no actual |
552 | * instruction. This is allowed: at this point we |
553 | * invent a notional instruction of RESB 0. |
554 | */ |
555 | result->opcode = I_RESB; |
556 | result->operands = 1; |
557 | nasm_zero(result->oprs); |
558 | result->oprs[0].type = IMMEDIATE; |
559 | result->oprs[0].offset = 0L; |
560 | result->oprs[0].segment = result->oprs[0].wrt = NO_SEG; |
561 | return result; |
562 | } else { |
563 | nasm_error(ERR_NONFATAL, "parser: instruction expected" ); |
564 | goto fail; |
565 | } |
566 | } |
567 | |
568 | result->opcode = tokval.t_integer; |
569 | result->condition = tokval.t_inttwo; |
570 | |
571 | /* |
572 | * INCBIN cannot be satisfied with incorrectly |
573 | * evaluated operands, since the correct values _must_ be known |
574 | * on the first pass. Hence, even in pass one, we set the |
575 | * `critical' flag on calling evaluate(), so that it will bomb |
576 | * out on undefined symbols. |
577 | */ |
578 | if (result->opcode == I_INCBIN) { |
579 | critical = (pass0 < 2 ? 1 : 2); |
580 | |
581 | } else |
582 | critical = (pass == 2 ? 2 : 0); |
583 | |
584 | if (opcode_is_db(result->opcode) || result->opcode == I_INCBIN) { |
585 | extop *eop, **tail = &result->eops, **fixptr; |
586 | int oper_num = 0; |
587 | int32_t sign; |
588 | |
589 | result->eops_float = false; |
590 | |
591 | /* |
592 | * Begin to read the DB/DW/DD/DQ/DT/DO/DY/DZ/INCBIN operands. |
593 | */ |
594 | while (1) { |
595 | i = stdscan(NULL, &tokval); |
596 | if (i == TOKEN_EOS) |
597 | break; |
598 | else if (first && i == ':') { |
599 | insn_is_label = true; |
600 | goto restart_parse; |
601 | } |
602 | first = false; |
603 | fixptr = tail; |
604 | eop = *tail = nasm_malloc(sizeof(extop)); |
605 | tail = &eop->next; |
606 | eop->next = NULL; |
607 | eop->type = EOT_NOTHING; |
608 | oper_num++; |
609 | sign = +1; |
610 | |
611 | /* |
612 | * is_comma_next() here is to distinguish this from |
613 | * a string used as part of an expression... |
614 | */ |
615 | if (i == TOKEN_STR && is_comma_next()) { |
616 | eop->type = EOT_DB_STRING; |
617 | eop->stringval = tokval.t_charptr; |
618 | eop->stringlen = tokval.t_inttwo; |
619 | i = stdscan(NULL, &tokval); /* eat the comma */ |
620 | } else if (i == TOKEN_STRFUNC) { |
621 | bool parens = false; |
622 | const char *funcname = tokval.t_charptr; |
623 | enum strfunc func = tokval.t_integer; |
624 | i = stdscan(NULL, &tokval); |
625 | if (i == '(') { |
626 | parens = true; |
627 | i = stdscan(NULL, &tokval); |
628 | } |
629 | if (i != TOKEN_STR) { |
630 | nasm_error(ERR_NONFATAL, |
631 | "%s must be followed by a string constant" , |
632 | funcname); |
633 | eop->type = EOT_NOTHING; |
634 | } else { |
635 | eop->type = EOT_DB_STRING_FREE; |
636 | eop->stringlen = |
637 | string_transform(tokval.t_charptr, tokval.t_inttwo, |
638 | &eop->stringval, func); |
639 | if (eop->stringlen == (size_t)-1) { |
640 | nasm_error(ERR_NONFATAL, "invalid string for transform" ); |
641 | eop->type = EOT_NOTHING; |
642 | } |
643 | } |
644 | if (parens && i && i != ')') { |
645 | i = stdscan(NULL, &tokval); |
646 | if (i != ')') { |
647 | nasm_error(ERR_NONFATAL, "unterminated %s function" , |
648 | funcname); |
649 | } |
650 | } |
651 | if (i && i != ',') |
652 | i = stdscan(NULL, &tokval); |
653 | } else if (i == '-' || i == '+') { |
654 | char *save = stdscan_get(); |
655 | int token = i; |
656 | sign = (i == '-') ? -1 : 1; |
657 | i = stdscan(NULL, &tokval); |
658 | if (i != TOKEN_FLOAT) { |
659 | stdscan_set(save); |
660 | i = tokval.t_type = token; |
661 | goto is_expression; |
662 | } else { |
663 | goto is_float; |
664 | } |
665 | } else if (i == TOKEN_FLOAT) { |
666 | is_float: |
667 | eop->type = EOT_DB_STRING; |
668 | result->eops_float = true; |
669 | |
670 | eop->stringlen = db_bytes(result->opcode); |
671 | if (eop->stringlen > 16) { |
672 | nasm_error(ERR_NONFATAL, "floating-point constant" |
673 | " encountered in DY or DZ instruction" ); |
674 | eop->stringlen = 0; |
675 | } else if (eop->stringlen < 1) { |
676 | nasm_error(ERR_NONFATAL, "floating-point constant" |
677 | " encountered in unknown instruction" ); |
678 | /* |
679 | * fix suggested by Pedro Gimeno... original line was: |
680 | * eop->type = EOT_NOTHING; |
681 | */ |
682 | eop->stringlen = 0; |
683 | } |
684 | |
685 | eop = nasm_realloc(eop, sizeof(extop) + eop->stringlen); |
686 | tail = &eop->next; |
687 | *fixptr = eop; |
688 | eop->stringval = (char *)eop + sizeof(extop); |
689 | if (!eop->stringlen || |
690 | !float_const(tokval.t_charptr, sign, |
691 | (uint8_t *)eop->stringval, eop->stringlen)) |
692 | eop->type = EOT_NOTHING; |
693 | i = stdscan(NULL, &tokval); /* eat the comma */ |
694 | } else { |
695 | /* anything else, assume it is an expression */ |
696 | expr *value; |
697 | |
698 | is_expression: |
699 | value = evaluate(stdscan, NULL, &tokval, NULL, |
700 | critical, NULL); |
701 | i = tokval.t_type; |
702 | if (!value) /* Error in evaluator */ |
703 | goto fail; |
704 | if (value_to_extop(value, eop, location.segment)) { |
705 | nasm_error(ERR_NONFATAL, |
706 | "operand %d: expression is not simple or relocatable" , |
707 | oper_num); |
708 | } |
709 | } |
710 | |
711 | /* |
712 | * We're about to call stdscan(), which will eat the |
713 | * comma that we're currently sitting on between |
714 | * arguments. However, we'd better check first that it |
715 | * _is_ a comma. |
716 | */ |
717 | if (i == TOKEN_EOS) /* also could be EOL */ |
718 | break; |
719 | if (i != ',') { |
720 | nasm_error(ERR_NONFATAL, "comma expected after operand %d" , |
721 | oper_num); |
722 | goto fail; |
723 | } |
724 | } |
725 | |
726 | if (result->opcode == I_INCBIN) { |
727 | /* |
728 | * Correct syntax for INCBIN is that there should be |
729 | * one string operand, followed by one or two numeric |
730 | * operands. |
731 | */ |
732 | if (!result->eops || result->eops->type != EOT_DB_STRING) |
733 | nasm_error(ERR_NONFATAL, "`incbin' expects a file name" ); |
734 | else if (result->eops->next && |
735 | result->eops->next->type != EOT_DB_NUMBER) |
736 | nasm_error(ERR_NONFATAL, "`incbin': second parameter is" |
737 | " non-numeric" ); |
738 | else if (result->eops->next && result->eops->next->next && |
739 | result->eops->next->next->type != EOT_DB_NUMBER) |
740 | nasm_error(ERR_NONFATAL, "`incbin': third parameter is" |
741 | " non-numeric" ); |
742 | else if (result->eops->next && result->eops->next->next && |
743 | result->eops->next->next->next) |
744 | nasm_error(ERR_NONFATAL, |
745 | "`incbin': more than three parameters" ); |
746 | else |
747 | return result; |
748 | /* |
749 | * If we reach here, one of the above errors happened. |
750 | * Throw the instruction away. |
751 | */ |
752 | goto fail; |
753 | } else /* DB ... */ if (oper_num == 0) |
754 | nasm_error(ERR_WARNING | ERR_PASS1, |
755 | "no operand for data declaration" ); |
756 | else |
757 | result->operands = oper_num; |
758 | |
759 | return result; |
760 | } |
761 | |
762 | /* |
763 | * Now we begin to parse the operands. There may be up to four |
764 | * of these, separated by commas, and terminated by a zero token. |
765 | */ |
766 | |
767 | for (opnum = 0; opnum < MAX_OPERANDS; opnum++) { |
768 | operand *op = &result->oprs[opnum]; |
769 | expr *value; /* used most of the time */ |
770 | bool mref; /* is this going to be a memory ref? */ |
771 | bool bracket; /* is it a [] mref, or a & mref? */ |
772 | bool mib; /* compound (mib) mref? */ |
773 | int setsize = 0; |
774 | decoflags_t brace_flags = 0; /* flags for decorators in braces */ |
775 | |
776 | op->disp_size = 0; /* have to zero this whatever */ |
777 | op->eaflags = 0; /* and this */ |
778 | op->opflags = 0; |
779 | op->decoflags = 0; |
780 | |
781 | i = stdscan(NULL, &tokval); |
782 | if (i == TOKEN_EOS) |
783 | break; /* end of operands: get out of here */ |
784 | else if (first && i == ':') { |
785 | insn_is_label = true; |
786 | goto restart_parse; |
787 | } |
788 | first = false; |
789 | op->type = 0; /* so far, no override */ |
790 | while (i == TOKEN_SPECIAL) { /* size specifiers */ |
791 | switch (tokval.t_integer) { |
792 | case S_BYTE: |
793 | if (!setsize) /* we want to use only the first */ |
794 | op->type |= BITS8; |
795 | setsize = 1; |
796 | break; |
797 | case S_WORD: |
798 | if (!setsize) |
799 | op->type |= BITS16; |
800 | setsize = 1; |
801 | break; |
802 | case S_DWORD: |
803 | case S_LONG: |
804 | if (!setsize) |
805 | op->type |= BITS32; |
806 | setsize = 1; |
807 | break; |
808 | case S_QWORD: |
809 | if (!setsize) |
810 | op->type |= BITS64; |
811 | setsize = 1; |
812 | break; |
813 | case S_TWORD: |
814 | if (!setsize) |
815 | op->type |= BITS80; |
816 | setsize = 1; |
817 | break; |
818 | case S_OWORD: |
819 | if (!setsize) |
820 | op->type |= BITS128; |
821 | setsize = 1; |
822 | break; |
823 | case S_YWORD: |
824 | if (!setsize) |
825 | op->type |= BITS256; |
826 | setsize = 1; |
827 | break; |
828 | case S_ZWORD: |
829 | if (!setsize) |
830 | op->type |= BITS512; |
831 | setsize = 1; |
832 | break; |
833 | case S_TO: |
834 | op->type |= TO; |
835 | break; |
836 | case S_STRICT: |
837 | op->type |= STRICT; |
838 | break; |
839 | case S_FAR: |
840 | op->type |= FAR; |
841 | break; |
842 | case S_NEAR: |
843 | op->type |= NEAR; |
844 | break; |
845 | case S_SHORT: |
846 | op->type |= SHORT; |
847 | break; |
848 | default: |
849 | nasm_error(ERR_NONFATAL, "invalid operand size specification" ); |
850 | } |
851 | i = stdscan(NULL, &tokval); |
852 | } |
853 | |
854 | if (i == '[' || i == '&') { /* memory reference */ |
855 | mref = true; |
856 | bracket = (i == '['); |
857 | i = stdscan(NULL, &tokval); /* then skip the colon */ |
858 | while (i == TOKEN_SPECIAL || i == TOKEN_PREFIX) { |
859 | process_size_override(result, op); |
860 | i = stdscan(NULL, &tokval); |
861 | } |
862 | /* when a comma follows an opening bracket - [ , eax*4] */ |
863 | if (i == ',') { |
864 | /* treat as if there is a zero displacement virtually */ |
865 | tokval.t_type = TOKEN_NUM; |
866 | tokval.t_integer = 0; |
867 | stdscan_set(stdscan_get() - 1); /* rewind the comma */ |
868 | } |
869 | } else { /* immediate operand, or register */ |
870 | mref = false; |
871 | bracket = false; /* placate optimisers */ |
872 | } |
873 | |
874 | if ((op->type & FAR) && !mref && |
875 | result->opcode != I_JMP && result->opcode != I_CALL) { |
876 | nasm_error(ERR_NONFATAL, "invalid use of FAR operand specifier" ); |
877 | } |
878 | |
879 | value = evaluate(stdscan, NULL, &tokval, |
880 | &op->opflags, critical, &hints); |
881 | i = tokval.t_type; |
882 | if (op->opflags & OPFLAG_FORWARD) { |
883 | result->forw_ref = true; |
884 | } |
885 | if (!value) /* Error in evaluator */ |
886 | goto fail; |
887 | if (i == ':' && mref) { /* it was seg:offset */ |
888 | /* |
889 | * Process the segment override. |
890 | */ |
891 | if (value[1].type != 0 || |
892 | value->value != 1 || |
893 | !IS_SREG(value->type)) |
894 | nasm_error(ERR_NONFATAL, "invalid segment override" ); |
895 | else if (result->prefixes[PPS_SEG]) |
896 | nasm_error(ERR_NONFATAL, |
897 | "instruction has conflicting segment overrides" ); |
898 | else { |
899 | result->prefixes[PPS_SEG] = value->type; |
900 | if (IS_FSGS(value->type)) |
901 | op->eaflags |= EAF_FSGS; |
902 | } |
903 | |
904 | i = stdscan(NULL, &tokval); /* then skip the colon */ |
905 | while (i == TOKEN_SPECIAL || i == TOKEN_PREFIX) { |
906 | process_size_override(result, op); |
907 | i = stdscan(NULL, &tokval); |
908 | } |
909 | value = evaluate(stdscan, NULL, &tokval, |
910 | &op->opflags, critical, &hints); |
911 | i = tokval.t_type; |
912 | if (op->opflags & OPFLAG_FORWARD) { |
913 | result->forw_ref = true; |
914 | } |
915 | /* and get the offset */ |
916 | if (!value) /* Error in evaluator */ |
917 | goto fail; |
918 | } |
919 | |
920 | mib = false; |
921 | if (mref && bracket && i == ',') { |
922 | /* [seg:base+offset,index*scale] syntax (mib) */ |
923 | |
924 | operand o1, o2; /* Partial operands */ |
925 | |
926 | if (parse_mref(&o1, value)) |
927 | goto fail; |
928 | |
929 | i = stdscan(NULL, &tokval); /* Eat comma */ |
930 | value = evaluate(stdscan, NULL, &tokval, &op->opflags, |
931 | critical, &hints); |
932 | i = tokval.t_type; |
933 | if (!value) |
934 | goto fail; |
935 | |
936 | if (parse_mref(&o2, value)) |
937 | goto fail; |
938 | |
939 | if (o2.basereg != -1 && o2.indexreg == -1) { |
940 | o2.indexreg = o2.basereg; |
941 | o2.scale = 1; |
942 | o2.basereg = -1; |
943 | } |
944 | |
945 | if (o1.indexreg != -1 || o2.basereg != -1 || o2.offset != 0 || |
946 | o2.segment != NO_SEG || o2.wrt != NO_SEG) { |
947 | nasm_error(ERR_NONFATAL, "invalid mib expression" ); |
948 | goto fail; |
949 | } |
950 | |
951 | op->basereg = o1.basereg; |
952 | op->indexreg = o2.indexreg; |
953 | op->scale = o2.scale; |
954 | op->offset = o1.offset; |
955 | op->segment = o1.segment; |
956 | op->wrt = o1.wrt; |
957 | |
958 | if (op->basereg != -1) { |
959 | op->hintbase = op->basereg; |
960 | op->hinttype = EAH_MAKEBASE; |
961 | } else if (op->indexreg != -1) { |
962 | op->hintbase = op->indexreg; |
963 | op->hinttype = EAH_NOTBASE; |
964 | } else { |
965 | op->hintbase = -1; |
966 | op->hinttype = EAH_NOHINT; |
967 | } |
968 | |
969 | mib = true; |
970 | } |
971 | |
972 | recover = false; |
973 | if (mref && bracket) { /* find ] at the end */ |
974 | if (i != ']') { |
975 | nasm_error(ERR_NONFATAL, "parser: expecting ]" ); |
976 | recover = true; |
977 | } else { /* we got the required ] */ |
978 | i = stdscan(NULL, &tokval); |
979 | if (i == TOKEN_DECORATOR || i == TOKEN_OPMASK) { |
980 | /* parse opmask (and zeroing) after an operand */ |
981 | recover = parse_braces(&brace_flags); |
982 | i = tokval.t_type; |
983 | } |
984 | if (i != 0 && i != ',') { |
985 | nasm_error(ERR_NONFATAL, "comma or end of line expected" ); |
986 | recover = true; |
987 | } |
988 | } |
989 | } else { /* immediate operand */ |
990 | if (i != 0 && i != ',' && i != ':' && |
991 | i != TOKEN_DECORATOR && i != TOKEN_OPMASK) { |
992 | nasm_error(ERR_NONFATAL, "comma, colon, decorator or end of " |
993 | "line expected after operand" ); |
994 | recover = true; |
995 | } else if (i == ':') { |
996 | op->type |= COLON; |
997 | } else if (i == TOKEN_DECORATOR || i == TOKEN_OPMASK) { |
998 | /* parse opmask (and zeroing) after an operand */ |
999 | recover = parse_braces(&brace_flags); |
1000 | } |
1001 | } |
1002 | if (recover) { |
1003 | do { /* error recovery */ |
1004 | i = stdscan(NULL, &tokval); |
1005 | } while (i != 0 && i != ','); |
1006 | } |
1007 | |
1008 | /* |
1009 | * now convert the exprs returned from evaluate() |
1010 | * into operand descriptions... |
1011 | */ |
1012 | op->decoflags |= brace_flags; |
1013 | |
1014 | if (mref) { /* it's a memory reference */ |
1015 | /* A mib reference was fully parsed already */ |
1016 | if (!mib) { |
1017 | if (parse_mref(op, value)) |
1018 | goto fail; |
1019 | op->hintbase = hints.base; |
1020 | op->hinttype = hints.type; |
1021 | } |
1022 | mref_set_optype(op); |
1023 | } else { /* it's not a memory reference */ |
1024 | if (is_just_unknown(value)) { /* it's immediate but unknown */ |
1025 | op->type |= IMMEDIATE; |
1026 | op->opflags |= OPFLAG_UNKNOWN; |
1027 | op->offset = 0; /* don't care */ |
1028 | op->segment = NO_SEG; /* don't care again */ |
1029 | op->wrt = NO_SEG; /* still don't care */ |
1030 | |
1031 | if(optimizing.level >= 0 && !(op->type & STRICT)) { |
1032 | /* Be optimistic */ |
1033 | op->type |= |
1034 | UNITY | SBYTEWORD | SBYTEDWORD | UDWORD | SDWORD; |
1035 | } |
1036 | } else if (is_reloc(value)) { /* it's immediate */ |
1037 | uint64_t n = reloc_value(value); |
1038 | |
1039 | op->type |= IMMEDIATE; |
1040 | op->offset = n; |
1041 | op->segment = reloc_seg(value); |
1042 | op->wrt = reloc_wrt(value); |
1043 | op->opflags |= is_self_relative(value) ? OPFLAG_RELATIVE : 0; |
1044 | |
1045 | if (is_simple(value)) { |
1046 | if (n == 1) |
1047 | op->type |= UNITY; |
1048 | if (optimizing.level >= 0 && !(op->type & STRICT)) { |
1049 | if ((uint32_t) (n + 128) <= 255) |
1050 | op->type |= SBYTEDWORD; |
1051 | if ((uint16_t) (n + 128) <= 255) |
1052 | op->type |= SBYTEWORD; |
1053 | if (n <= UINT64_C(0xFFFFFFFF)) |
1054 | op->type |= UDWORD; |
1055 | if (n + UINT64_C(0x80000000) <= UINT64_C(0xFFFFFFFF)) |
1056 | op->type |= SDWORD; |
1057 | } |
1058 | } |
1059 | } else if (value->type == EXPR_RDSAE) { |
1060 | /* |
1061 | * it's not an operand but a rounding or SAE decorator. |
1062 | * put the decorator information in the (opflag_t) type field |
1063 | * of previous operand. |
1064 | */ |
1065 | opnum--; op--; |
1066 | switch (value->value) { |
1067 | case BRC_RN: |
1068 | case BRC_RU: |
1069 | case BRC_RD: |
1070 | case BRC_RZ: |
1071 | case BRC_SAE: |
1072 | op->decoflags |= (value->value == BRC_SAE ? SAE : ER); |
1073 | result->evex_rm = value->value; |
1074 | break; |
1075 | default: |
1076 | nasm_error(ERR_NONFATAL, "invalid decorator" ); |
1077 | break; |
1078 | } |
1079 | } else { /* it's a register */ |
1080 | opflags_t rs; |
1081 | uint64_t regset_size = 0; |
1082 | |
1083 | if (value->type >= EXPR_SIMPLE || value->value != 1) { |
1084 | nasm_error(ERR_NONFATAL, "invalid operand type" ); |
1085 | goto fail; |
1086 | } |
1087 | |
1088 | /* |
1089 | * We do not allow any kind of expression, except for |
1090 | * reg+value in which case it is a register set. |
1091 | */ |
1092 | for (i = 1; value[i].type; i++) { |
1093 | if (!value[i].value) |
1094 | continue; |
1095 | |
1096 | switch (value[i].type) { |
1097 | case EXPR_SIMPLE: |
1098 | if (!regset_size) { |
1099 | regset_size = value[i].value + 1; |
1100 | break; |
1101 | } |
1102 | /* fallthrough */ |
1103 | default: |
1104 | nasm_error(ERR_NONFATAL, "invalid operand type" ); |
1105 | goto fail; |
1106 | } |
1107 | } |
1108 | |
1109 | if ((regset_size & (regset_size - 1)) || |
1110 | regset_size >= (UINT64_C(1) << REGSET_BITS)) { |
1111 | nasm_error(ERR_NONFATAL | ERR_PASS2, |
1112 | "invalid register set size" ); |
1113 | regset_size = 0; |
1114 | } |
1115 | |
1116 | /* clear overrides, except TO which applies to FPU regs */ |
1117 | if (op->type & ~TO) { |
1118 | /* |
1119 | * we want to produce a warning iff the specified size |
1120 | * is different from the register size |
1121 | */ |
1122 | rs = op->type & SIZE_MASK; |
1123 | } else { |
1124 | rs = 0; |
1125 | } |
1126 | |
1127 | /* |
1128 | * Make sure we're not out of nasm_reg_flags, still |
1129 | * probably this should be fixed when we're defining |
1130 | * the label. |
1131 | * |
1132 | * An easy trigger is |
1133 | * |
1134 | * e equ 0x80000000:0 |
1135 | * pshufw word e-0 |
1136 | * |
1137 | */ |
1138 | if (value->type < EXPR_REG_START || |
1139 | value->type > EXPR_REG_END) { |
1140 | nasm_error(ERR_NONFATAL, "invalid operand type" ); |
1141 | goto fail; |
1142 | } |
1143 | |
1144 | op->type &= TO; |
1145 | op->type |= REGISTER; |
1146 | op->type |= nasm_reg_flags[value->type]; |
1147 | op->type |= (regset_size >> 1) << REGSET_SHIFT; |
1148 | op->decoflags |= brace_flags; |
1149 | op->basereg = value->type; |
1150 | |
1151 | if (rs && (op->type & SIZE_MASK) != rs) |
1152 | nasm_error(ERR_WARNING | ERR_PASS1, |
1153 | "register size specification ignored" ); |
1154 | } |
1155 | } |
1156 | |
1157 | /* remember the position of operand having broadcasting/ER mode */ |
1158 | if (op->decoflags & (BRDCAST_MASK | ER | SAE)) |
1159 | result->evex_brerop = opnum; |
1160 | } |
1161 | |
1162 | result->operands = opnum; /* set operand count */ |
1163 | |
1164 | /* clear remaining operands */ |
1165 | while (opnum < MAX_OPERANDS) |
1166 | result->oprs[opnum++].type = 0; |
1167 | |
1168 | /* |
1169 | * Transform RESW, RESD, RESQ, REST, RESO, RESY, RESZ into RESB. |
1170 | */ |
1171 | if (opcode_is_resb(result->opcode)) { |
1172 | result->oprs[0].offset *= resb_bytes(result->opcode); |
1173 | result->oprs[0].offset *= result->times; |
1174 | result->times = 1; |
1175 | result->opcode = I_RESB; |
1176 | } |
1177 | |
1178 | return result; |
1179 | |
1180 | fail: |
1181 | result->opcode = I_none; |
1182 | return result; |
1183 | } |
1184 | |
1185 | static int is_comma_next(void) |
1186 | { |
1187 | struct tokenval tv; |
1188 | char *p; |
1189 | int i; |
1190 | |
1191 | p = stdscan_get(); |
1192 | i = stdscan(NULL, &tv); |
1193 | stdscan_set(p); |
1194 | |
1195 | return (i == ',' || i == ';' || !i); |
1196 | } |
1197 | |
1198 | void cleanup_insn(insn * i) |
1199 | { |
1200 | extop *e; |
1201 | |
1202 | while ((e = i->eops)) { |
1203 | i->eops = e->next; |
1204 | if (e->type == EOT_DB_STRING_FREE) |
1205 | nasm_free(e->stringval); |
1206 | nasm_free(e); |
1207 | } |
1208 | } |
1209 | |