1 | #include <Python.h> |
2 | #include "pycore_ast.h" // _PyAST_Validate(), |
3 | #include <errcode.h> |
4 | #include "tokenizer.h" |
5 | |
6 | #include "pegen.h" |
7 | #include "string_parser.h" |
8 | |
9 | PyObject * |
10 | (Parser *p, const char *s) |
11 | { |
12 | PyObject *res = PyUnicode_DecodeUTF8(s, strlen(s), NULL); |
13 | if (res == NULL) { |
14 | return NULL; |
15 | } |
16 | if (_PyArena_AddPyObject(p->arena, res) < 0) { |
17 | Py_DECREF(res); |
18 | return NULL; |
19 | } |
20 | return res; |
21 | } |
22 | |
23 | arg_ty |
24 | (Parser *p, arg_ty a, Token *tc) |
25 | { |
26 | if (tc == NULL) { |
27 | return a; |
28 | } |
29 | const char *bytes = PyBytes_AsString(tc->bytes); |
30 | if (bytes == NULL) { |
31 | return NULL; |
32 | } |
33 | PyObject *tco = _PyPegen_new_type_comment(p, bytes); |
34 | if (tco == NULL) { |
35 | return NULL; |
36 | } |
37 | return _PyAST_arg(a->arg, a->annotation, tco, |
38 | a->lineno, a->col_offset, a->end_lineno, a->end_col_offset, |
39 | p->arena); |
40 | } |
41 | |
42 | static int |
43 | init_normalization(Parser *p) |
44 | { |
45 | if (p->normalize) { |
46 | return 1; |
47 | } |
48 | PyObject *m = PyImport_ImportModuleNoBlock("unicodedata" ); |
49 | if (!m) |
50 | { |
51 | return 0; |
52 | } |
53 | p->normalize = PyObject_GetAttrString(m, "normalize" ); |
54 | Py_DECREF(m); |
55 | if (!p->normalize) |
56 | { |
57 | return 0; |
58 | } |
59 | return 1; |
60 | } |
61 | |
62 | /* Checks if the NOTEQUAL token is valid given the current parser flags |
63 | 0 indicates success and nonzero indicates failure (an exception may be set) */ |
64 | int |
65 | _PyPegen_check_barry_as_flufl(Parser *p, Token* t) { |
66 | assert(t->bytes != NULL); |
67 | assert(t->type == NOTEQUAL); |
68 | |
69 | const char* tok_str = PyBytes_AS_STRING(t->bytes); |
70 | if (p->flags & PyPARSE_BARRY_AS_BDFL && strcmp(tok_str, "<>" ) != 0) { |
71 | RAISE_SYNTAX_ERROR("with Barry as BDFL, use '<>' instead of '!='" ); |
72 | return -1; |
73 | } |
74 | if (!(p->flags & PyPARSE_BARRY_AS_BDFL)) { |
75 | return strcmp(tok_str, "!=" ); |
76 | } |
77 | return 0; |
78 | } |
79 | |
80 | int |
81 | _PyPegen_check_legacy_stmt(Parser *p, expr_ty name) { |
82 | if (name->kind != Name_kind) { |
83 | return 0; |
84 | } |
85 | const char* candidates[2] = {"print" , "exec" }; |
86 | for (int i=0; i<2; i++) { |
87 | if (PyUnicode_CompareWithASCIIString(name->v.Name.id, candidates[i]) == 0) { |
88 | return 1; |
89 | } |
90 | } |
91 | return 0; |
92 | } |
93 | |
94 | PyObject * |
95 | _PyPegen_new_identifier(Parser *p, const char *n) |
96 | { |
97 | PyObject *id = PyUnicode_DecodeUTF8(n, strlen(n), NULL); |
98 | if (!id) { |
99 | goto error; |
100 | } |
101 | /* PyUnicode_DecodeUTF8 should always return a ready string. */ |
102 | assert(PyUnicode_IS_READY(id)); |
103 | /* Check whether there are non-ASCII characters in the |
104 | identifier; if so, normalize to NFKC. */ |
105 | if (!PyUnicode_IS_ASCII(id)) |
106 | { |
107 | PyObject *id2; |
108 | if (!init_normalization(p)) |
109 | { |
110 | Py_DECREF(id); |
111 | goto error; |
112 | } |
113 | PyObject *form = PyUnicode_InternFromString("NFKC" ); |
114 | if (form == NULL) |
115 | { |
116 | Py_DECREF(id); |
117 | goto error; |
118 | } |
119 | PyObject *args[2] = {form, id}; |
120 | id2 = _PyObject_FastCall(p->normalize, args, 2); |
121 | Py_DECREF(id); |
122 | Py_DECREF(form); |
123 | if (!id2) { |
124 | goto error; |
125 | } |
126 | if (!PyUnicode_Check(id2)) |
127 | { |
128 | PyErr_Format(PyExc_TypeError, |
129 | "unicodedata.normalize() must return a string, not " |
130 | "%.200s" , |
131 | _PyType_Name(Py_TYPE(id2))); |
132 | Py_DECREF(id2); |
133 | goto error; |
134 | } |
135 | id = id2; |
136 | } |
137 | PyUnicode_InternInPlace(&id); |
138 | if (_PyArena_AddPyObject(p->arena, id) < 0) |
139 | { |
140 | Py_DECREF(id); |
141 | goto error; |
142 | } |
143 | return id; |
144 | |
145 | error: |
146 | p->error_indicator = 1; |
147 | return NULL; |
148 | } |
149 | |
150 | static PyObject * |
151 | _create_dummy_identifier(Parser *p) |
152 | { |
153 | return _PyPegen_new_identifier(p, "" ); |
154 | } |
155 | |
156 | static inline Py_ssize_t |
157 | byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset) |
158 | { |
159 | const char *str = PyUnicode_AsUTF8(line); |
160 | if (!str) { |
161 | return 0; |
162 | } |
163 | Py_ssize_t len = strlen(str); |
164 | if (col_offset > len + 1) { |
165 | col_offset = len + 1; |
166 | } |
167 | assert(col_offset >= 0); |
168 | PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, "replace" ); |
169 | if (!text) { |
170 | return 0; |
171 | } |
172 | Py_ssize_t size = PyUnicode_GET_LENGTH(text); |
173 | Py_DECREF(text); |
174 | return size; |
175 | } |
176 | |
177 | const char * |
178 | _PyPegen_get_expr_name(expr_ty e) |
179 | { |
180 | assert(e != NULL); |
181 | switch (e->kind) { |
182 | case Attribute_kind: |
183 | return "attribute" ; |
184 | case Subscript_kind: |
185 | return "subscript" ; |
186 | case Starred_kind: |
187 | return "starred" ; |
188 | case Name_kind: |
189 | return "name" ; |
190 | case List_kind: |
191 | return "list" ; |
192 | case Tuple_kind: |
193 | return "tuple" ; |
194 | case Lambda_kind: |
195 | return "lambda" ; |
196 | case Call_kind: |
197 | return "function call" ; |
198 | case BoolOp_kind: |
199 | case BinOp_kind: |
200 | case UnaryOp_kind: |
201 | return "expression" ; |
202 | case GeneratorExp_kind: |
203 | return "generator expression" ; |
204 | case Yield_kind: |
205 | case YieldFrom_kind: |
206 | return "yield expression" ; |
207 | case Await_kind: |
208 | return "await expression" ; |
209 | case ListComp_kind: |
210 | return "list comprehension" ; |
211 | case SetComp_kind: |
212 | return "set comprehension" ; |
213 | case DictComp_kind: |
214 | return "dict comprehension" ; |
215 | case Dict_kind: |
216 | return "dict literal" ; |
217 | case Set_kind: |
218 | return "set display" ; |
219 | case JoinedStr_kind: |
220 | case FormattedValue_kind: |
221 | return "f-string expression" ; |
222 | case Constant_kind: { |
223 | PyObject *value = e->v.Constant.value; |
224 | if (value == Py_None) { |
225 | return "None" ; |
226 | } |
227 | if (value == Py_False) { |
228 | return "False" ; |
229 | } |
230 | if (value == Py_True) { |
231 | return "True" ; |
232 | } |
233 | if (value == Py_Ellipsis) { |
234 | return "ellipsis" ; |
235 | } |
236 | return "literal" ; |
237 | } |
238 | case Compare_kind: |
239 | return "comparison" ; |
240 | case IfExp_kind: |
241 | return "conditional expression" ; |
242 | case NamedExpr_kind: |
243 | return "named expression" ; |
244 | default: |
245 | PyErr_Format(PyExc_SystemError, |
246 | "unexpected expression in assignment %d (line %d)" , |
247 | e->kind, e->lineno); |
248 | return NULL; |
249 | } |
250 | } |
251 | |
252 | static int |
253 | raise_decode_error(Parser *p) |
254 | { |
255 | assert(PyErr_Occurred()); |
256 | const char *errtype = NULL; |
257 | if (PyErr_ExceptionMatches(PyExc_UnicodeError)) { |
258 | errtype = "unicode error" ; |
259 | } |
260 | else if (PyErr_ExceptionMatches(PyExc_ValueError)) { |
261 | errtype = "value error" ; |
262 | } |
263 | if (errtype) { |
264 | PyObject *type; |
265 | PyObject *value; |
266 | PyObject *tback; |
267 | PyObject *errstr; |
268 | PyErr_Fetch(&type, &value, &tback); |
269 | errstr = PyObject_Str(value); |
270 | if (errstr) { |
271 | RAISE_SYNTAX_ERROR("(%s) %U" , errtype, errstr); |
272 | Py_DECREF(errstr); |
273 | } |
274 | else { |
275 | PyErr_Clear(); |
276 | RAISE_SYNTAX_ERROR("(%s) unknown error" , errtype); |
277 | } |
278 | Py_XDECREF(type); |
279 | Py_XDECREF(value); |
280 | Py_XDECREF(tback); |
281 | } |
282 | |
283 | return -1; |
284 | } |
285 | |
286 | static inline void |
287 | raise_unclosed_parentheses_error(Parser *p) { |
288 | int error_lineno = p->tok->parenlinenostack[p->tok->level-1]; |
289 | int error_col = p->tok->parencolstack[p->tok->level-1]; |
290 | RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, |
291 | error_lineno, error_col, error_lineno, -1, |
292 | "'%c' was never closed" , |
293 | p->tok->parenstack[p->tok->level-1]); |
294 | } |
295 | |
296 | static void |
297 | raise_tokenizer_init_error(PyObject *filename) |
298 | { |
299 | if (!(PyErr_ExceptionMatches(PyExc_LookupError) |
300 | || PyErr_ExceptionMatches(PyExc_SyntaxError) |
301 | || PyErr_ExceptionMatches(PyExc_ValueError) |
302 | || PyErr_ExceptionMatches(PyExc_UnicodeDecodeError))) { |
303 | return; |
304 | } |
305 | PyObject *errstr = NULL; |
306 | PyObject *tuple = NULL; |
307 | PyObject *type; |
308 | PyObject *value; |
309 | PyObject *tback; |
310 | PyErr_Fetch(&type, &value, &tback); |
311 | errstr = PyObject_Str(value); |
312 | if (!errstr) { |
313 | goto error; |
314 | } |
315 | |
316 | PyObject *tmp = Py_BuildValue("(OiiO)" , filename, 0, -1, Py_None); |
317 | if (!tmp) { |
318 | goto error; |
319 | } |
320 | |
321 | tuple = PyTuple_Pack(2, errstr, tmp); |
322 | Py_DECREF(tmp); |
323 | if (!value) { |
324 | goto error; |
325 | } |
326 | PyErr_SetObject(PyExc_SyntaxError, tuple); |
327 | |
328 | error: |
329 | Py_XDECREF(type); |
330 | Py_XDECREF(value); |
331 | Py_XDECREF(tback); |
332 | Py_XDECREF(errstr); |
333 | Py_XDECREF(tuple); |
334 | } |
335 | |
336 | static int |
337 | tokenizer_error(Parser *p) |
338 | { |
339 | if (PyErr_Occurred()) { |
340 | return -1; |
341 | } |
342 | |
343 | const char *msg = NULL; |
344 | PyObject* errtype = PyExc_SyntaxError; |
345 | Py_ssize_t col_offset = -1; |
346 | switch (p->tok->done) { |
347 | case E_TOKEN: |
348 | msg = "invalid token" ; |
349 | break; |
350 | case E_EOF: |
351 | if (p->tok->level) { |
352 | raise_unclosed_parentheses_error(p); |
353 | } else { |
354 | RAISE_SYNTAX_ERROR("unexpected EOF while parsing" ); |
355 | } |
356 | return -1; |
357 | case E_DEDENT: |
358 | RAISE_INDENTATION_ERROR("unindent does not match any outer indentation level" ); |
359 | return -1; |
360 | case E_INTR: |
361 | if (!PyErr_Occurred()) { |
362 | PyErr_SetNone(PyExc_KeyboardInterrupt); |
363 | } |
364 | return -1; |
365 | case E_NOMEM: |
366 | PyErr_NoMemory(); |
367 | return -1; |
368 | case E_TABSPACE: |
369 | errtype = PyExc_TabError; |
370 | msg = "inconsistent use of tabs and spaces in indentation" ; |
371 | break; |
372 | case E_TOODEEP: |
373 | errtype = PyExc_IndentationError; |
374 | msg = "too many levels of indentation" ; |
375 | break; |
376 | case E_LINECONT: { |
377 | col_offset = p->tok->cur - p->tok->buf - 1; |
378 | msg = "unexpected character after line continuation character" ; |
379 | break; |
380 | } |
381 | default: |
382 | msg = "unknown parsing error" ; |
383 | } |
384 | |
385 | RAISE_ERROR_KNOWN_LOCATION(p, errtype, p->tok->lineno, |
386 | col_offset >= 0 ? col_offset : 0, |
387 | p->tok->lineno, -1, msg); |
388 | return -1; |
389 | } |
390 | |
391 | void * |
392 | _PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...) |
393 | { |
394 | if (p->fill == 0) { |
395 | va_list va; |
396 | va_start(va, errmsg); |
397 | _PyPegen_raise_error_known_location(p, errtype, 0, 0, 0, -1, errmsg, va); |
398 | va_end(va); |
399 | return NULL; |
400 | } |
401 | |
402 | Token *t = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1]; |
403 | Py_ssize_t col_offset; |
404 | Py_ssize_t end_col_offset = -1; |
405 | if (t->col_offset == -1) { |
406 | if (p->tok->cur == p->tok->buf) { |
407 | col_offset = 0; |
408 | } else { |
409 | const char* start = p->tok->buf ? p->tok->line_start : p->tok->buf; |
410 | col_offset = Py_SAFE_DOWNCAST(p->tok->cur - start, intptr_t, int); |
411 | } |
412 | } else { |
413 | col_offset = t->col_offset + 1; |
414 | } |
415 | |
416 | if (t->end_col_offset != -1) { |
417 | end_col_offset = t->end_col_offset + 1; |
418 | } |
419 | |
420 | va_list va; |
421 | va_start(va, errmsg); |
422 | _PyPegen_raise_error_known_location(p, errtype, t->lineno, col_offset, t->end_lineno, end_col_offset, errmsg, va); |
423 | va_end(va); |
424 | |
425 | return NULL; |
426 | } |
427 | |
428 | static PyObject * |
429 | get_error_line(Parser *p, Py_ssize_t lineno) |
430 | { |
431 | /* If the file descriptor is interactive, the source lines of the current |
432 | * (multi-line) statement are stored in p->tok->interactive_src_start. |
433 | * If not, we're parsing from a string, which means that the whole source |
434 | * is stored in p->tok->str. */ |
435 | assert((p->tok->fp == NULL && p->tok->str != NULL) || p->tok->fp == stdin); |
436 | |
437 | char *cur_line = p->tok->fp_interactive ? p->tok->interactive_src_start : p->tok->str; |
438 | if (cur_line == NULL) { |
439 | assert(p->tok->fp_interactive); |
440 | // We can reach this point if the tokenizer buffers for interactive source have not been |
441 | // initialized because we failed to decode the original source with the given locale. |
442 | return PyUnicode_FromStringAndSize("" , 0); |
443 | } |
444 | const char* buf_end = p->tok->fp_interactive ? p->tok->interactive_src_end : p->tok->inp; |
445 | |
446 | Py_ssize_t relative_lineno = p->starting_lineno ? lineno - p->starting_lineno + 1 : lineno; |
447 | |
448 | for (int i = 0; i < relative_lineno - 1; i++) { |
449 | char *new_line = strchr(cur_line, '\n') + 1; |
450 | assert(new_line != NULL && new_line <= buf_end); |
451 | if (new_line == NULL || new_line > buf_end) { |
452 | break; |
453 | } |
454 | cur_line = new_line; |
455 | } |
456 | |
457 | char *next_newline; |
458 | if ((next_newline = strchr(cur_line, '\n')) == NULL) { // This is the last line |
459 | next_newline = cur_line + strlen(cur_line); |
460 | } |
461 | return PyUnicode_DecodeUTF8(cur_line, next_newline - cur_line, "replace" ); |
462 | } |
463 | |
464 | void * |
465 | _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype, |
466 | Py_ssize_t lineno, Py_ssize_t col_offset, |
467 | Py_ssize_t end_lineno, Py_ssize_t end_col_offset, |
468 | const char *errmsg, va_list va) |
469 | { |
470 | PyObject *value = NULL; |
471 | PyObject *errstr = NULL; |
472 | PyObject *error_line = NULL; |
473 | PyObject *tmp = NULL; |
474 | p->error_indicator = 1; |
475 | |
476 | if (end_lineno == CURRENT_POS) { |
477 | end_lineno = p->tok->lineno; |
478 | } |
479 | if (end_col_offset == CURRENT_POS) { |
480 | end_col_offset = p->tok->cur - p->tok->line_start; |
481 | } |
482 | |
483 | if (p->start_rule == Py_fstring_input) { |
484 | const char *fstring_msg = "f-string: " ; |
485 | Py_ssize_t len = strlen(fstring_msg) + strlen(errmsg); |
486 | |
487 | char *new_errmsg = PyMem_Malloc(len + 1); // Lengths of both strings plus NULL character |
488 | if (!new_errmsg) { |
489 | return (void *) PyErr_NoMemory(); |
490 | } |
491 | |
492 | // Copy both strings into new buffer |
493 | memcpy(new_errmsg, fstring_msg, strlen(fstring_msg)); |
494 | memcpy(new_errmsg + strlen(fstring_msg), errmsg, strlen(errmsg)); |
495 | new_errmsg[len] = 0; |
496 | errmsg = new_errmsg; |
497 | } |
498 | errstr = PyUnicode_FromFormatV(errmsg, va); |
499 | if (!errstr) { |
500 | goto error; |
501 | } |
502 | |
503 | if (p->tok->fp_interactive && p->tok->interactive_src_start != NULL) { |
504 | error_line = get_error_line(p, lineno); |
505 | } |
506 | else if (p->start_rule == Py_file_input) { |
507 | error_line = _PyErr_ProgramDecodedTextObject(p->tok->filename, |
508 | (int) lineno, p->tok->encoding); |
509 | } |
510 | |
511 | if (!error_line) { |
512 | /* PyErr_ProgramTextObject was not called or returned NULL. If it was not called, |
513 | then we need to find the error line from some other source, because |
514 | p->start_rule != Py_file_input. If it returned NULL, then it either unexpectedly |
515 | failed or we're parsing from a string or the REPL. There's a third edge case where |
516 | we're actually parsing from a file, which has an E_EOF SyntaxError and in that case |
517 | `PyErr_ProgramTextObject` fails because lineno points to last_file_line + 1, which |
518 | does not physically exist */ |
519 | assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF); |
520 | |
521 | if (p->tok->lineno <= lineno && p->tok->inp > p->tok->buf) { |
522 | Py_ssize_t size = p->tok->inp - p->tok->buf; |
523 | error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace" ); |
524 | } |
525 | else if (p->tok->fp == NULL || p->tok->fp == stdin) { |
526 | error_line = get_error_line(p, lineno); |
527 | } |
528 | else { |
529 | error_line = PyUnicode_FromStringAndSize("" , 0); |
530 | } |
531 | if (!error_line) { |
532 | goto error; |
533 | } |
534 | } |
535 | |
536 | if (p->start_rule == Py_fstring_input) { |
537 | col_offset -= p->starting_col_offset; |
538 | end_col_offset -= p->starting_col_offset; |
539 | } |
540 | |
541 | Py_ssize_t col_number = col_offset; |
542 | Py_ssize_t end_col_number = end_col_offset; |
543 | |
544 | if (p->tok->encoding != NULL) { |
545 | col_number = byte_offset_to_character_offset(error_line, col_offset); |
546 | end_col_number = end_col_number > 0 ? |
547 | byte_offset_to_character_offset(error_line, end_col_offset) : |
548 | end_col_number; |
549 | } |
550 | tmp = Py_BuildValue("(OiiNii)" , p->tok->filename, lineno, col_number, error_line, end_lineno, end_col_number); |
551 | if (!tmp) { |
552 | goto error; |
553 | } |
554 | value = PyTuple_Pack(2, errstr, tmp); |
555 | Py_DECREF(tmp); |
556 | if (!value) { |
557 | goto error; |
558 | } |
559 | PyErr_SetObject(errtype, value); |
560 | |
561 | Py_DECREF(errstr); |
562 | Py_DECREF(value); |
563 | if (p->start_rule == Py_fstring_input) { |
564 | PyMem_Free((void *)errmsg); |
565 | } |
566 | return NULL; |
567 | |
568 | error: |
569 | Py_XDECREF(errstr); |
570 | Py_XDECREF(error_line); |
571 | if (p->start_rule == Py_fstring_input) { |
572 | PyMem_Free((void *)errmsg); |
573 | } |
574 | return NULL; |
575 | } |
576 | |
577 | #if 0 |
578 | static const char * |
579 | token_name(int type) |
580 | { |
581 | if (0 <= type && type <= N_TOKENS) { |
582 | return _PyParser_TokenNames[type]; |
583 | } |
584 | return "<Huh?>" ; |
585 | } |
586 | #endif |
587 | |
588 | // Here, mark is the start of the node, while p->mark is the end. |
589 | // If node==NULL, they should be the same. |
590 | int |
591 | _PyPegen_insert_memo(Parser *p, int mark, int type, void *node) |
592 | { |
593 | // Insert in front |
594 | Memo *m = _PyArena_Malloc(p->arena, sizeof(Memo)); |
595 | if (m == NULL) { |
596 | return -1; |
597 | } |
598 | m->type = type; |
599 | m->node = node; |
600 | m->mark = p->mark; |
601 | m->next = p->tokens[mark]->memo; |
602 | p->tokens[mark]->memo = m; |
603 | return 0; |
604 | } |
605 | |
606 | // Like _PyPegen_insert_memo(), but updates an existing node if found. |
607 | int |
608 | _PyPegen_update_memo(Parser *p, int mark, int type, void *node) |
609 | { |
610 | for (Memo *m = p->tokens[mark]->memo; m != NULL; m = m->next) { |
611 | if (m->type == type) { |
612 | // Update existing node. |
613 | m->node = node; |
614 | m->mark = p->mark; |
615 | return 0; |
616 | } |
617 | } |
618 | // Insert new node. |
619 | return _PyPegen_insert_memo(p, mark, type, node); |
620 | } |
621 | |
622 | // Return dummy NAME. |
623 | void * |
624 | _PyPegen_dummy_name(Parser *p, ...) |
625 | { |
626 | static void *cache = NULL; |
627 | |
628 | if (cache != NULL) { |
629 | return cache; |
630 | } |
631 | |
632 | PyObject *id = _create_dummy_identifier(p); |
633 | if (!id) { |
634 | return NULL; |
635 | } |
636 | cache = _PyAST_Name(id, Load, 1, 0, 1, 0, p->arena); |
637 | return cache; |
638 | } |
639 | |
640 | static int |
641 | _get_keyword_or_name_type(Parser *p, const char *name, int name_len) |
642 | { |
643 | assert(name_len > 0); |
644 | if (name_len >= p->n_keyword_lists || |
645 | p->keywords[name_len] == NULL || |
646 | p->keywords[name_len]->type == -1) { |
647 | return NAME; |
648 | } |
649 | for (KeywordToken *k = p->keywords[name_len]; k != NULL && k->type != -1; k++) { |
650 | if (strncmp(k->str, name, name_len) == 0) { |
651 | return k->type; |
652 | } |
653 | } |
654 | return NAME; |
655 | } |
656 | |
657 | static int |
658 | (growable_comment_array *arr, size_t initial_size) { |
659 | assert(initial_size > 0); |
660 | arr->items = PyMem_Malloc(initial_size * sizeof(*arr->items)); |
661 | arr->size = initial_size; |
662 | arr->num_items = 0; |
663 | |
664 | return arr->items != NULL; |
665 | } |
666 | |
667 | static int |
668 | (growable_comment_array *arr, int lineno, char *) { |
669 | if (arr->num_items >= arr->size) { |
670 | size_t new_size = arr->size * 2; |
671 | void *new_items_array = PyMem_Realloc(arr->items, new_size * sizeof(*arr->items)); |
672 | if (!new_items_array) { |
673 | return 0; |
674 | } |
675 | arr->items = new_items_array; |
676 | arr->size = new_size; |
677 | } |
678 | |
679 | arr->items[arr->num_items].lineno = lineno; |
680 | arr->items[arr->num_items].comment = comment; // Take ownership |
681 | arr->num_items++; |
682 | return 1; |
683 | } |
684 | |
685 | static void |
686 | (growable_comment_array *arr) { |
687 | for (unsigned i = 0; i < arr->num_items; i++) { |
688 | PyMem_Free(arr->items[i].comment); |
689 | } |
690 | PyMem_Free(arr->items); |
691 | } |
692 | |
693 | static int |
694 | initialize_token(Parser *p, Token *token, const char *start, const char *end, int token_type) { |
695 | assert(token != NULL); |
696 | |
697 | token->type = (token_type == NAME) ? _get_keyword_or_name_type(p, start, (int)(end - start)) : token_type; |
698 | token->bytes = PyBytes_FromStringAndSize(start, end - start); |
699 | if (token->bytes == NULL) { |
700 | return -1; |
701 | } |
702 | |
703 | if (_PyArena_AddPyObject(p->arena, token->bytes) < 0) { |
704 | Py_DECREF(token->bytes); |
705 | return -1; |
706 | } |
707 | |
708 | token->level = p->tok->level; |
709 | |
710 | const char *line_start = token_type == STRING ? p->tok->multi_line_start : p->tok->line_start; |
711 | int lineno = token_type == STRING ? p->tok->first_lineno : p->tok->lineno; |
712 | int end_lineno = p->tok->lineno; |
713 | |
714 | int col_offset = (start != NULL && start >= line_start) ? (int)(start - line_start) : -1; |
715 | int end_col_offset = (end != NULL && end >= p->tok->line_start) ? (int)(end - p->tok->line_start) : -1; |
716 | |
717 | token->lineno = lineno; |
718 | token->col_offset = p->tok->lineno == p->starting_lineno ? p->starting_col_offset + col_offset : col_offset; |
719 | token->end_lineno = end_lineno; |
720 | token->end_col_offset = p->tok->lineno == p->starting_lineno ? p->starting_col_offset + end_col_offset : end_col_offset; |
721 | |
722 | p->fill += 1; |
723 | |
724 | if (token_type == ERRORTOKEN && p->tok->done == E_DECODE) { |
725 | return raise_decode_error(p); |
726 | } |
727 | |
728 | return (token_type == ERRORTOKEN ? tokenizer_error(p) : 0); |
729 | } |
730 | |
731 | static int |
732 | _resize_tokens_array(Parser *p) { |
733 | int newsize = p->size * 2; |
734 | Token **new_tokens = PyMem_Realloc(p->tokens, newsize * sizeof(Token *)); |
735 | if (new_tokens == NULL) { |
736 | PyErr_NoMemory(); |
737 | return -1; |
738 | } |
739 | p->tokens = new_tokens; |
740 | |
741 | for (int i = p->size; i < newsize; i++) { |
742 | p->tokens[i] = PyMem_Calloc(1, sizeof(Token)); |
743 | if (p->tokens[i] == NULL) { |
744 | p->size = i; // Needed, in order to cleanup correctly after parser fails |
745 | PyErr_NoMemory(); |
746 | return -1; |
747 | } |
748 | } |
749 | p->size = newsize; |
750 | return 0; |
751 | } |
752 | |
753 | int |
754 | _PyPegen_fill_token(Parser *p) |
755 | { |
756 | const char *start; |
757 | const char *end; |
758 | int type = PyTokenizer_Get(p->tok, &start, &end); |
759 | |
760 | // Record and skip '# type: ignore' comments |
761 | while (type == TYPE_IGNORE) { |
762 | Py_ssize_t len = end - start; |
763 | char *tag = PyMem_Malloc(len + 1); |
764 | if (tag == NULL) { |
765 | PyErr_NoMemory(); |
766 | return -1; |
767 | } |
768 | strncpy(tag, start, len); |
769 | tag[len] = '\0'; |
770 | // Ownership of tag passes to the growable array |
771 | if (!growable_comment_array_add(&p->type_ignore_comments, p->tok->lineno, tag)) { |
772 | PyErr_NoMemory(); |
773 | return -1; |
774 | } |
775 | type = PyTokenizer_Get(p->tok, &start, &end); |
776 | } |
777 | |
778 | // If we have reached the end and we are in single input mode we need to insert a newline and reset the parsing |
779 | if (p->start_rule == Py_single_input && type == ENDMARKER && p->parsing_started) { |
780 | type = NEWLINE; /* Add an extra newline */ |
781 | p->parsing_started = 0; |
782 | |
783 | if (p->tok->indent && !(p->flags & PyPARSE_DONT_IMPLY_DEDENT)) { |
784 | p->tok->pendin = -p->tok->indent; |
785 | p->tok->indent = 0; |
786 | } |
787 | } |
788 | else { |
789 | p->parsing_started = 1; |
790 | } |
791 | |
792 | // Check if we are at the limit of the token array capacity and resize if needed |
793 | if ((p->fill == p->size) && (_resize_tokens_array(p) != 0)) { |
794 | return -1; |
795 | } |
796 | |
797 | Token *t = p->tokens[p->fill]; |
798 | return initialize_token(p, t, start, end, type); |
799 | } |
800 | |
801 | |
802 | #if defined(Py_DEBUG) |
803 | // Instrumentation to count the effectiveness of memoization. |
804 | // The array counts the number of tokens skipped by memoization, |
805 | // indexed by type. |
806 | |
807 | #define NSTATISTICS 2000 |
808 | static long memo_statistics[NSTATISTICS]; |
809 | |
810 | void |
811 | _PyPegen_clear_memo_statistics() |
812 | { |
813 | for (int i = 0; i < NSTATISTICS; i++) { |
814 | memo_statistics[i] = 0; |
815 | } |
816 | } |
817 | |
818 | PyObject * |
819 | _PyPegen_get_memo_statistics() |
820 | { |
821 | PyObject *ret = PyList_New(NSTATISTICS); |
822 | if (ret == NULL) { |
823 | return NULL; |
824 | } |
825 | for (int i = 0; i < NSTATISTICS; i++) { |
826 | PyObject *value = PyLong_FromLong(memo_statistics[i]); |
827 | if (value == NULL) { |
828 | Py_DECREF(ret); |
829 | return NULL; |
830 | } |
831 | // PyList_SetItem borrows a reference to value. |
832 | if (PyList_SetItem(ret, i, value) < 0) { |
833 | Py_DECREF(ret); |
834 | return NULL; |
835 | } |
836 | } |
837 | return ret; |
838 | } |
839 | #endif |
840 | |
841 | int // bool |
842 | _PyPegen_is_memoized(Parser *p, int type, void *pres) |
843 | { |
844 | if (p->mark == p->fill) { |
845 | if (_PyPegen_fill_token(p) < 0) { |
846 | p->error_indicator = 1; |
847 | return -1; |
848 | } |
849 | } |
850 | |
851 | Token *t = p->tokens[p->mark]; |
852 | |
853 | for (Memo *m = t->memo; m != NULL; m = m->next) { |
854 | if (m->type == type) { |
855 | #if defined(PY_DEBUG) |
856 | if (0 <= type && type < NSTATISTICS) { |
857 | long count = m->mark - p->mark; |
858 | // A memoized negative result counts for one. |
859 | if (count <= 0) { |
860 | count = 1; |
861 | } |
862 | memo_statistics[type] += count; |
863 | } |
864 | #endif |
865 | p->mark = m->mark; |
866 | *(void **)(pres) = m->node; |
867 | return 1; |
868 | } |
869 | } |
870 | return 0; |
871 | } |
872 | |
873 | int |
874 | _PyPegen_lookahead_with_name(int positive, expr_ty (func)(Parser *), Parser *p) |
875 | { |
876 | int mark = p->mark; |
877 | void *res = func(p); |
878 | p->mark = mark; |
879 | return (res != NULL) == positive; |
880 | } |
881 | |
882 | int |
883 | _PyPegen_lookahead_with_string(int positive, expr_ty (func)(Parser *, const char*), Parser *p, const char* arg) |
884 | { |
885 | int mark = p->mark; |
886 | void *res = func(p, arg); |
887 | p->mark = mark; |
888 | return (res != NULL) == positive; |
889 | } |
890 | |
891 | int |
892 | _PyPegen_lookahead_with_int(int positive, Token *(func)(Parser *, int), Parser *p, int arg) |
893 | { |
894 | int mark = p->mark; |
895 | void *res = func(p, arg); |
896 | p->mark = mark; |
897 | return (res != NULL) == positive; |
898 | } |
899 | |
900 | int |
901 | _PyPegen_lookahead(int positive, void *(func)(Parser *), Parser *p) |
902 | { |
903 | int mark = p->mark; |
904 | void *res = (void*)func(p); |
905 | p->mark = mark; |
906 | return (res != NULL) == positive; |
907 | } |
908 | |
909 | Token * |
910 | _PyPegen_expect_token(Parser *p, int type) |
911 | { |
912 | if (p->mark == p->fill) { |
913 | if (_PyPegen_fill_token(p) < 0) { |
914 | p->error_indicator = 1; |
915 | return NULL; |
916 | } |
917 | } |
918 | Token *t = p->tokens[p->mark]; |
919 | if (t->type != type) { |
920 | return NULL; |
921 | } |
922 | p->mark += 1; |
923 | return t; |
924 | } |
925 | |
926 | Token * |
927 | _PyPegen_expect_forced_token(Parser *p, int type, const char* expected) { |
928 | |
929 | if (p->error_indicator == 1) { |
930 | return NULL; |
931 | } |
932 | |
933 | if (p->mark == p->fill) { |
934 | if (_PyPegen_fill_token(p) < 0) { |
935 | p->error_indicator = 1; |
936 | return NULL; |
937 | } |
938 | } |
939 | Token *t = p->tokens[p->mark]; |
940 | if (t->type != type) { |
941 | RAISE_SYNTAX_ERROR_KNOWN_LOCATION(t, "expected '%s'" , expected); |
942 | return NULL; |
943 | } |
944 | p->mark += 1; |
945 | return t; |
946 | } |
947 | |
948 | expr_ty |
949 | _PyPegen_expect_soft_keyword(Parser *p, const char *keyword) |
950 | { |
951 | if (p->mark == p->fill) { |
952 | if (_PyPegen_fill_token(p) < 0) { |
953 | p->error_indicator = 1; |
954 | return NULL; |
955 | } |
956 | } |
957 | Token *t = p->tokens[p->mark]; |
958 | if (t->type != NAME) { |
959 | return NULL; |
960 | } |
961 | const char *s = PyBytes_AsString(t->bytes); |
962 | if (!s) { |
963 | p->error_indicator = 1; |
964 | return NULL; |
965 | } |
966 | if (strcmp(s, keyword) != 0) { |
967 | return NULL; |
968 | } |
969 | return _PyPegen_name_token(p); |
970 | } |
971 | |
972 | Token * |
973 | _PyPegen_get_last_nonnwhitespace_token(Parser *p) |
974 | { |
975 | assert(p->mark >= 0); |
976 | Token *token = NULL; |
977 | for (int m = p->mark - 1; m >= 0; m--) { |
978 | token = p->tokens[m]; |
979 | if (token->type != ENDMARKER && (token->type < NEWLINE || token->type > DEDENT)) { |
980 | break; |
981 | } |
982 | } |
983 | return token; |
984 | } |
985 | |
986 | static expr_ty |
987 | _PyPegen_name_from_token(Parser *p, Token* t) |
988 | { |
989 | if (t == NULL) { |
990 | return NULL; |
991 | } |
992 | const char *s = PyBytes_AsString(t->bytes); |
993 | if (!s) { |
994 | p->error_indicator = 1; |
995 | return NULL; |
996 | } |
997 | PyObject *id = _PyPegen_new_identifier(p, s); |
998 | if (id == NULL) { |
999 | p->error_indicator = 1; |
1000 | return NULL; |
1001 | } |
1002 | return _PyAST_Name(id, Load, t->lineno, t->col_offset, t->end_lineno, |
1003 | t->end_col_offset, p->arena); |
1004 | } |
1005 | |
1006 | |
1007 | expr_ty |
1008 | _PyPegen_name_token(Parser *p) |
1009 | { |
1010 | Token *t = _PyPegen_expect_token(p, NAME); |
1011 | return _PyPegen_name_from_token(p, t); |
1012 | } |
1013 | |
1014 | void * |
1015 | _PyPegen_string_token(Parser *p) |
1016 | { |
1017 | return _PyPegen_expect_token(p, STRING); |
1018 | } |
1019 | |
1020 | |
1021 | expr_ty _PyPegen_soft_keyword_token(Parser *p) { |
1022 | Token *t = _PyPegen_expect_token(p, NAME); |
1023 | if (t == NULL) { |
1024 | return NULL; |
1025 | } |
1026 | char *the_token; |
1027 | Py_ssize_t size; |
1028 | PyBytes_AsStringAndSize(t->bytes, &the_token, &size); |
1029 | for (char **keyword = p->soft_keywords; *keyword != NULL; keyword++) { |
1030 | if (strncmp(*keyword, the_token, size) == 0) { |
1031 | return _PyPegen_name_from_token(p, t); |
1032 | } |
1033 | } |
1034 | return NULL; |
1035 | } |
1036 | |
1037 | static PyObject * |
1038 | parsenumber_raw(const char *s) |
1039 | { |
1040 | const char *end; |
1041 | long x; |
1042 | double dx; |
1043 | Py_complex compl; |
1044 | int imflag; |
1045 | |
1046 | assert(s != NULL); |
1047 | errno = 0; |
1048 | end = s + strlen(s) - 1; |
1049 | imflag = *end == 'j' || *end == 'J'; |
1050 | if (s[0] == '0') { |
1051 | x = (long)PyOS_strtoul(s, (char **)&end, 0); |
1052 | if (x < 0 && errno == 0) { |
1053 | return PyLong_FromString(s, (char **)0, 0); |
1054 | } |
1055 | } |
1056 | else { |
1057 | x = PyOS_strtol(s, (char **)&end, 0); |
1058 | } |
1059 | if (*end == '\0') { |
1060 | if (errno != 0) { |
1061 | return PyLong_FromString(s, (char **)0, 0); |
1062 | } |
1063 | return PyLong_FromLong(x); |
1064 | } |
1065 | /* XXX Huge floats may silently fail */ |
1066 | if (imflag) { |
1067 | compl.real = 0.; |
1068 | compl.imag = PyOS_string_to_double(s, (char **)&end, NULL); |
1069 | if (compl.imag == -1.0 && PyErr_Occurred()) { |
1070 | return NULL; |
1071 | } |
1072 | return PyComplex_FromCComplex(compl); |
1073 | } |
1074 | dx = PyOS_string_to_double(s, NULL, NULL); |
1075 | if (dx == -1.0 && PyErr_Occurred()) { |
1076 | return NULL; |
1077 | } |
1078 | return PyFloat_FromDouble(dx); |
1079 | } |
1080 | |
1081 | static PyObject * |
1082 | parsenumber(const char *s) |
1083 | { |
1084 | char *dup; |
1085 | char *end; |
1086 | PyObject *res = NULL; |
1087 | |
1088 | assert(s != NULL); |
1089 | |
1090 | if (strchr(s, '_') == NULL) { |
1091 | return parsenumber_raw(s); |
1092 | } |
1093 | /* Create a duplicate without underscores. */ |
1094 | dup = PyMem_Malloc(strlen(s) + 1); |
1095 | if (dup == NULL) { |
1096 | return PyErr_NoMemory(); |
1097 | } |
1098 | end = dup; |
1099 | for (; *s; s++) { |
1100 | if (*s != '_') { |
1101 | *end++ = *s; |
1102 | } |
1103 | } |
1104 | *end = '\0'; |
1105 | res = parsenumber_raw(dup); |
1106 | PyMem_Free(dup); |
1107 | return res; |
1108 | } |
1109 | |
1110 | expr_ty |
1111 | _PyPegen_number_token(Parser *p) |
1112 | { |
1113 | Token *t = _PyPegen_expect_token(p, NUMBER); |
1114 | if (t == NULL) { |
1115 | return NULL; |
1116 | } |
1117 | |
1118 | const char *num_raw = PyBytes_AsString(t->bytes); |
1119 | if (num_raw == NULL) { |
1120 | p->error_indicator = 1; |
1121 | return NULL; |
1122 | } |
1123 | |
1124 | if (p->feature_version < 6 && strchr(num_raw, '_') != NULL) { |
1125 | p->error_indicator = 1; |
1126 | return RAISE_SYNTAX_ERROR("Underscores in numeric literals are only supported " |
1127 | "in Python 3.6 and greater" ); |
1128 | } |
1129 | |
1130 | PyObject *c = parsenumber(num_raw); |
1131 | |
1132 | if (c == NULL) { |
1133 | p->error_indicator = 1; |
1134 | return NULL; |
1135 | } |
1136 | |
1137 | if (_PyArena_AddPyObject(p->arena, c) < 0) { |
1138 | Py_DECREF(c); |
1139 | p->error_indicator = 1; |
1140 | return NULL; |
1141 | } |
1142 | |
1143 | return _PyAST_Constant(c, NULL, t->lineno, t->col_offset, t->end_lineno, |
1144 | t->end_col_offset, p->arena); |
1145 | } |
1146 | |
1147 | /* Check that the source for a single input statement really is a single |
1148 | statement by looking at what is left in the buffer after parsing. |
1149 | Trailing whitespace and comments are OK. */ |
1150 | static int // bool |
1151 | bad_single_statement(Parser *p) |
1152 | { |
1153 | char *cur = p->tok->cur; |
1154 | char c = *cur; |
1155 | |
1156 | for (;;) { |
1157 | while (c == ' ' || c == '\t' || c == '\n' || c == '\014') { |
1158 | c = *++cur; |
1159 | } |
1160 | |
1161 | if (!c) { |
1162 | return 0; |
1163 | } |
1164 | |
1165 | if (c != '#') { |
1166 | return 1; |
1167 | } |
1168 | |
1169 | /* Suck up comment. */ |
1170 | while (c && c != '\n') { |
1171 | c = *++cur; |
1172 | } |
1173 | } |
1174 | } |
1175 | |
1176 | void |
1177 | _PyPegen_Parser_Free(Parser *p) |
1178 | { |
1179 | Py_XDECREF(p->normalize); |
1180 | for (int i = 0; i < p->size; i++) { |
1181 | PyMem_Free(p->tokens[i]); |
1182 | } |
1183 | PyMem_Free(p->tokens); |
1184 | growable_comment_array_deallocate(&p->type_ignore_comments); |
1185 | PyMem_Free(p); |
1186 | } |
1187 | |
1188 | static int |
1189 | compute_parser_flags(PyCompilerFlags *flags) |
1190 | { |
1191 | int parser_flags = 0; |
1192 | if (!flags) { |
1193 | return 0; |
1194 | } |
1195 | if (flags->cf_flags & PyCF_DONT_IMPLY_DEDENT) { |
1196 | parser_flags |= PyPARSE_DONT_IMPLY_DEDENT; |
1197 | } |
1198 | if (flags->cf_flags & PyCF_IGNORE_COOKIE) { |
1199 | parser_flags |= PyPARSE_IGNORE_COOKIE; |
1200 | } |
1201 | if (flags->cf_flags & CO_FUTURE_BARRY_AS_BDFL) { |
1202 | parser_flags |= PyPARSE_BARRY_AS_BDFL; |
1203 | } |
1204 | if (flags->cf_flags & PyCF_TYPE_COMMENTS) { |
1205 | parser_flags |= PyPARSE_TYPE_COMMENTS; |
1206 | } |
1207 | if ((flags->cf_flags & PyCF_ONLY_AST) && flags->cf_feature_version < 7) { |
1208 | parser_flags |= PyPARSE_ASYNC_HACKS; |
1209 | } |
1210 | if (flags->cf_flags & PyCF_ALLOW_INCOMPLETE_INPUT) { |
1211 | parser_flags |= PyPARSE_ALLOW_INCOMPLETE_INPUT; |
1212 | } |
1213 | return parser_flags; |
1214 | } |
1215 | |
1216 | Parser * |
1217 | _PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags, |
1218 | int feature_version, int *errcode, PyArena *arena) |
1219 | { |
1220 | Parser *p = PyMem_Malloc(sizeof(Parser)); |
1221 | if (p == NULL) { |
1222 | return (Parser *) PyErr_NoMemory(); |
1223 | } |
1224 | assert(tok != NULL); |
1225 | tok->type_comments = (flags & PyPARSE_TYPE_COMMENTS) > 0; |
1226 | tok->async_hacks = (flags & PyPARSE_ASYNC_HACKS) > 0; |
1227 | p->tok = tok; |
1228 | p->keywords = NULL; |
1229 | p->n_keyword_lists = -1; |
1230 | p->soft_keywords = NULL; |
1231 | p->tokens = PyMem_Malloc(sizeof(Token *)); |
1232 | if (!p->tokens) { |
1233 | PyMem_Free(p); |
1234 | return (Parser *) PyErr_NoMemory(); |
1235 | } |
1236 | p->tokens[0] = PyMem_Calloc(1, sizeof(Token)); |
1237 | if (!p->tokens) { |
1238 | PyMem_Free(p->tokens); |
1239 | PyMem_Free(p); |
1240 | return (Parser *) PyErr_NoMemory(); |
1241 | } |
1242 | if (!growable_comment_array_init(&p->type_ignore_comments, 10)) { |
1243 | PyMem_Free(p->tokens[0]); |
1244 | PyMem_Free(p->tokens); |
1245 | PyMem_Free(p); |
1246 | return (Parser *) PyErr_NoMemory(); |
1247 | } |
1248 | |
1249 | p->mark = 0; |
1250 | p->fill = 0; |
1251 | p->size = 1; |
1252 | |
1253 | p->errcode = errcode; |
1254 | p->arena = arena; |
1255 | p->start_rule = start_rule; |
1256 | p->parsing_started = 0; |
1257 | p->normalize = NULL; |
1258 | p->error_indicator = 0; |
1259 | |
1260 | p->starting_lineno = 0; |
1261 | p->starting_col_offset = 0; |
1262 | p->flags = flags; |
1263 | p->feature_version = feature_version; |
1264 | p->known_err_token = NULL; |
1265 | p->level = 0; |
1266 | p->call_invalid_rules = 0; |
1267 | return p; |
1268 | } |
1269 | |
1270 | static void |
1271 | reset_parser_state(Parser *p) |
1272 | { |
1273 | for (int i = 0; i < p->fill; i++) { |
1274 | p->tokens[i]->memo = NULL; |
1275 | } |
1276 | p->mark = 0; |
1277 | p->call_invalid_rules = 1; |
1278 | // Don't try to get extra tokens in interactive mode when trying to |
1279 | // raise specialized errors in the second pass. |
1280 | p->tok->interactive_underflow = IUNDERFLOW_STOP; |
1281 | } |
1282 | |
1283 | static int |
1284 | _PyPegen_check_tokenizer_errors(Parser *p) { |
1285 | // Tokenize the whole input to see if there are any tokenization |
1286 | // errors such as mistmatching parentheses. These will get priority |
1287 | // over generic syntax errors only if the line number of the error is |
1288 | // before the one that we had for the generic error. |
1289 | |
1290 | // We don't want to tokenize to the end for interactive input |
1291 | if (p->tok->prompt != NULL) { |
1292 | return 0; |
1293 | } |
1294 | |
1295 | PyObject *type, *value, *traceback; |
1296 | PyErr_Fetch(&type, &value, &traceback); |
1297 | |
1298 | Token *current_token = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1]; |
1299 | Py_ssize_t current_err_line = current_token->lineno; |
1300 | |
1301 | int ret = 0; |
1302 | |
1303 | for (;;) { |
1304 | const char *start; |
1305 | const char *end; |
1306 | switch (PyTokenizer_Get(p->tok, &start, &end)) { |
1307 | case ERRORTOKEN: |
1308 | if (p->tok->level != 0) { |
1309 | int error_lineno = p->tok->parenlinenostack[p->tok->level-1]; |
1310 | if (current_err_line > error_lineno) { |
1311 | raise_unclosed_parentheses_error(p); |
1312 | ret = -1; |
1313 | goto exit; |
1314 | } |
1315 | } |
1316 | break; |
1317 | case ENDMARKER: |
1318 | break; |
1319 | default: |
1320 | continue; |
1321 | } |
1322 | break; |
1323 | } |
1324 | |
1325 | |
1326 | exit: |
1327 | if (PyErr_Occurred()) { |
1328 | Py_XDECREF(value); |
1329 | Py_XDECREF(type); |
1330 | Py_XDECREF(traceback); |
1331 | } else { |
1332 | PyErr_Restore(type, value, traceback); |
1333 | } |
1334 | return ret; |
1335 | } |
1336 | |
1337 | |
1338 | static inline int |
1339 | _is_end_of_source(Parser *p) { |
1340 | int err = p->tok->done; |
1341 | return err == E_EOF || err == E_EOFS || err == E_EOLS; |
1342 | } |
1343 | |
1344 | void * |
1345 | _PyPegen_run_parser(Parser *p) |
1346 | { |
1347 | void *res = _PyPegen_parse(p); |
1348 | assert(p->level == 0); |
1349 | if (res == NULL) { |
1350 | if ((p->flags & PyPARSE_ALLOW_INCOMPLETE_INPUT) && _is_end_of_source(p)) { |
1351 | PyErr_Clear(); |
1352 | return RAISE_SYNTAX_ERROR("incomplete input" ); |
1353 | } |
1354 | if (PyErr_Occurred() && !PyErr_ExceptionMatches(PyExc_SyntaxError)) { |
1355 | return NULL; |
1356 | } |
1357 | // Make a second parser pass. In this pass we activate heavier and slower checks |
1358 | // to produce better error messages and more complete diagnostics. Extra "invalid_*" |
1359 | // rules will be active during parsing. |
1360 | Token *last_token = p->tokens[p->fill - 1]; |
1361 | reset_parser_state(p); |
1362 | _PyPegen_parse(p); |
1363 | if (PyErr_Occurred()) { |
1364 | // Prioritize tokenizer errors to custom syntax errors raised |
1365 | // on the second phase only if the errors come from the parser. |
1366 | int is_tok_ok = (p->tok->done == E_DONE || p->tok->done == E_OK); |
1367 | if (is_tok_ok && PyErr_ExceptionMatches(PyExc_SyntaxError)) { |
1368 | _PyPegen_check_tokenizer_errors(p); |
1369 | } |
1370 | return NULL; |
1371 | } |
1372 | if (p->fill == 0) { |
1373 | RAISE_SYNTAX_ERROR("error at start before reading any input" ); |
1374 | } |
1375 | else if (last_token->type == ERRORTOKEN && p->tok->done == E_EOF) { |
1376 | if (p->tok->level) { |
1377 | raise_unclosed_parentheses_error(p); |
1378 | } else { |
1379 | RAISE_SYNTAX_ERROR("unexpected EOF while parsing" ); |
1380 | } |
1381 | } |
1382 | else { |
1383 | if (p->tokens[p->fill-1]->type == INDENT) { |
1384 | RAISE_INDENTATION_ERROR("unexpected indent" ); |
1385 | } |
1386 | else if (p->tokens[p->fill-1]->type == DEDENT) { |
1387 | RAISE_INDENTATION_ERROR("unexpected unindent" ); |
1388 | } |
1389 | else { |
1390 | // Use the last token we found on the first pass to avoid reporting |
1391 | // incorrect locations for generic syntax errors just because we reached |
1392 | // further away when trying to find specific syntax errors in the second |
1393 | // pass. |
1394 | RAISE_SYNTAX_ERROR_KNOWN_LOCATION(last_token, "invalid syntax" ); |
1395 | // _PyPegen_check_tokenizer_errors will override the existing |
1396 | // generic SyntaxError we just raised if errors are found. |
1397 | _PyPegen_check_tokenizer_errors(p); |
1398 | } |
1399 | } |
1400 | return NULL; |
1401 | } |
1402 | |
1403 | if (p->start_rule == Py_single_input && bad_single_statement(p)) { |
1404 | p->tok->done = E_BADSINGLE; // This is not necessary for now, but might be in the future |
1405 | return RAISE_SYNTAX_ERROR("multiple statements found while compiling a single statement" ); |
1406 | } |
1407 | |
1408 | // test_peg_generator defines _Py_TEST_PEGEN to not call PyAST_Validate() |
1409 | #if defined(Py_DEBUG) && !defined(_Py_TEST_PEGEN) |
1410 | if (p->start_rule == Py_single_input || |
1411 | p->start_rule == Py_file_input || |
1412 | p->start_rule == Py_eval_input) |
1413 | { |
1414 | if (!_PyAST_Validate(res)) { |
1415 | return NULL; |
1416 | } |
1417 | } |
1418 | #endif |
1419 | return res; |
1420 | } |
1421 | |
1422 | mod_ty |
1423 | _PyPegen_run_parser_from_file_pointer(FILE *fp, int start_rule, PyObject *filename_ob, |
1424 | const char *enc, const char *ps1, const char *ps2, |
1425 | PyCompilerFlags *flags, int *errcode, PyArena *arena) |
1426 | { |
1427 | struct tok_state *tok = PyTokenizer_FromFile(fp, enc, ps1, ps2); |
1428 | if (tok == NULL) { |
1429 | if (PyErr_Occurred()) { |
1430 | raise_tokenizer_init_error(filename_ob); |
1431 | return NULL; |
1432 | } |
1433 | return NULL; |
1434 | } |
1435 | if (!tok->fp || ps1 != NULL || ps2 != NULL || |
1436 | PyUnicode_CompareWithASCIIString(filename_ob, "<stdin>" ) == 0) { |
1437 | tok->fp_interactive = 1; |
1438 | } |
1439 | // This transfers the ownership to the tokenizer |
1440 | tok->filename = filename_ob; |
1441 | Py_INCREF(filename_ob); |
1442 | |
1443 | // From here on we need to clean up even if there's an error |
1444 | mod_ty result = NULL; |
1445 | |
1446 | int parser_flags = compute_parser_flags(flags); |
1447 | Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, PY_MINOR_VERSION, |
1448 | errcode, arena); |
1449 | if (p == NULL) { |
1450 | goto error; |
1451 | } |
1452 | |
1453 | result = _PyPegen_run_parser(p); |
1454 | _PyPegen_Parser_Free(p); |
1455 | |
1456 | error: |
1457 | PyTokenizer_Free(tok); |
1458 | return result; |
1459 | } |
1460 | |
1461 | mod_ty |
1462 | _PyPegen_run_parser_from_string(const char *str, int start_rule, PyObject *filename_ob, |
1463 | PyCompilerFlags *flags, PyArena *arena) |
1464 | { |
1465 | int exec_input = start_rule == Py_file_input; |
1466 | |
1467 | struct tok_state *tok; |
1468 | if (flags != NULL && flags->cf_flags & PyCF_IGNORE_COOKIE) { |
1469 | tok = PyTokenizer_FromUTF8(str, exec_input); |
1470 | } else { |
1471 | tok = PyTokenizer_FromString(str, exec_input); |
1472 | } |
1473 | if (tok == NULL) { |
1474 | if (PyErr_Occurred()) { |
1475 | raise_tokenizer_init_error(filename_ob); |
1476 | } |
1477 | return NULL; |
1478 | } |
1479 | // This transfers the ownership to the tokenizer |
1480 | tok->filename = filename_ob; |
1481 | Py_INCREF(filename_ob); |
1482 | |
1483 | // We need to clear up from here on |
1484 | mod_ty result = NULL; |
1485 | |
1486 | int parser_flags = compute_parser_flags(flags); |
1487 | int feature_version = flags && (flags->cf_flags & PyCF_ONLY_AST) ? |
1488 | flags->cf_feature_version : PY_MINOR_VERSION; |
1489 | Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, feature_version, |
1490 | NULL, arena); |
1491 | if (p == NULL) { |
1492 | goto error; |
1493 | } |
1494 | |
1495 | result = _PyPegen_run_parser(p); |
1496 | _PyPegen_Parser_Free(p); |
1497 | |
1498 | error: |
1499 | PyTokenizer_Free(tok); |
1500 | return result; |
1501 | } |
1502 | |
1503 | asdl_stmt_seq* |
1504 | _PyPegen_interactive_exit(Parser *p) |
1505 | { |
1506 | if (p->errcode) { |
1507 | *(p->errcode) = E_EOF; |
1508 | } |
1509 | return NULL; |
1510 | } |
1511 | |
1512 | /* Creates a single-element asdl_seq* that contains a */ |
1513 | asdl_seq * |
1514 | _PyPegen_singleton_seq(Parser *p, void *a) |
1515 | { |
1516 | assert(a != NULL); |
1517 | asdl_seq *seq = (asdl_seq*)_Py_asdl_generic_seq_new(1, p->arena); |
1518 | if (!seq) { |
1519 | return NULL; |
1520 | } |
1521 | asdl_seq_SET_UNTYPED(seq, 0, a); |
1522 | return seq; |
1523 | } |
1524 | |
1525 | /* Creates a copy of seq and prepends a to it */ |
1526 | asdl_seq * |
1527 | _PyPegen_seq_insert_in_front(Parser *p, void *a, asdl_seq *seq) |
1528 | { |
1529 | assert(a != NULL); |
1530 | if (!seq) { |
1531 | return _PyPegen_singleton_seq(p, a); |
1532 | } |
1533 | |
1534 | asdl_seq *new_seq = (asdl_seq*)_Py_asdl_generic_seq_new(asdl_seq_LEN(seq) + 1, p->arena); |
1535 | if (!new_seq) { |
1536 | return NULL; |
1537 | } |
1538 | |
1539 | asdl_seq_SET_UNTYPED(new_seq, 0, a); |
1540 | for (Py_ssize_t i = 1, l = asdl_seq_LEN(new_seq); i < l; i++) { |
1541 | asdl_seq_SET_UNTYPED(new_seq, i, asdl_seq_GET_UNTYPED(seq, i - 1)); |
1542 | } |
1543 | return new_seq; |
1544 | } |
1545 | |
1546 | /* Creates a copy of seq and appends a to it */ |
1547 | asdl_seq * |
1548 | _PyPegen_seq_append_to_end(Parser *p, asdl_seq *seq, void *a) |
1549 | { |
1550 | assert(a != NULL); |
1551 | if (!seq) { |
1552 | return _PyPegen_singleton_seq(p, a); |
1553 | } |
1554 | |
1555 | asdl_seq *new_seq = (asdl_seq*)_Py_asdl_generic_seq_new(asdl_seq_LEN(seq) + 1, p->arena); |
1556 | if (!new_seq) { |
1557 | return NULL; |
1558 | } |
1559 | |
1560 | for (Py_ssize_t i = 0, l = asdl_seq_LEN(new_seq); i + 1 < l; i++) { |
1561 | asdl_seq_SET_UNTYPED(new_seq, i, asdl_seq_GET_UNTYPED(seq, i)); |
1562 | } |
1563 | asdl_seq_SET_UNTYPED(new_seq, asdl_seq_LEN(new_seq) - 1, a); |
1564 | return new_seq; |
1565 | } |
1566 | |
1567 | static Py_ssize_t |
1568 | _get_flattened_seq_size(asdl_seq *seqs) |
1569 | { |
1570 | Py_ssize_t size = 0; |
1571 | for (Py_ssize_t i = 0, l = asdl_seq_LEN(seqs); i < l; i++) { |
1572 | asdl_seq *inner_seq = asdl_seq_GET_UNTYPED(seqs, i); |
1573 | size += asdl_seq_LEN(inner_seq); |
1574 | } |
1575 | return size; |
1576 | } |
1577 | |
1578 | /* Flattens an asdl_seq* of asdl_seq*s */ |
1579 | asdl_seq * |
1580 | _PyPegen_seq_flatten(Parser *p, asdl_seq *seqs) |
1581 | { |
1582 | Py_ssize_t flattened_seq_size = _get_flattened_seq_size(seqs); |
1583 | assert(flattened_seq_size > 0); |
1584 | |
1585 | asdl_seq *flattened_seq = (asdl_seq*)_Py_asdl_generic_seq_new(flattened_seq_size, p->arena); |
1586 | if (!flattened_seq) { |
1587 | return NULL; |
1588 | } |
1589 | |
1590 | int flattened_seq_idx = 0; |
1591 | for (Py_ssize_t i = 0, l = asdl_seq_LEN(seqs); i < l; i++) { |
1592 | asdl_seq *inner_seq = asdl_seq_GET_UNTYPED(seqs, i); |
1593 | for (Py_ssize_t j = 0, li = asdl_seq_LEN(inner_seq); j < li; j++) { |
1594 | asdl_seq_SET_UNTYPED(flattened_seq, flattened_seq_idx++, asdl_seq_GET_UNTYPED(inner_seq, j)); |
1595 | } |
1596 | } |
1597 | assert(flattened_seq_idx == flattened_seq_size); |
1598 | |
1599 | return flattened_seq; |
1600 | } |
1601 | |
1602 | void * |
1603 | _PyPegen_seq_last_item(asdl_seq *seq) |
1604 | { |
1605 | Py_ssize_t len = asdl_seq_LEN(seq); |
1606 | return asdl_seq_GET_UNTYPED(seq, len - 1); |
1607 | } |
1608 | |
1609 | void * |
1610 | _PyPegen_seq_first_item(asdl_seq *seq) |
1611 | { |
1612 | return asdl_seq_GET_UNTYPED(seq, 0); |
1613 | } |
1614 | |
1615 | |
1616 | /* Creates a new name of the form <first_name>.<second_name> */ |
1617 | expr_ty |
1618 | _PyPegen_join_names_with_dot(Parser *p, expr_ty first_name, expr_ty second_name) |
1619 | { |
1620 | assert(first_name != NULL && second_name != NULL); |
1621 | PyObject *first_identifier = first_name->v.Name.id; |
1622 | PyObject *second_identifier = second_name->v.Name.id; |
1623 | |
1624 | if (PyUnicode_READY(first_identifier) == -1) { |
1625 | return NULL; |
1626 | } |
1627 | if (PyUnicode_READY(second_identifier) == -1) { |
1628 | return NULL; |
1629 | } |
1630 | const char *first_str = PyUnicode_AsUTF8(first_identifier); |
1631 | if (!first_str) { |
1632 | return NULL; |
1633 | } |
1634 | const char *second_str = PyUnicode_AsUTF8(second_identifier); |
1635 | if (!second_str) { |
1636 | return NULL; |
1637 | } |
1638 | Py_ssize_t len = strlen(first_str) + strlen(second_str) + 1; // +1 for the dot |
1639 | |
1640 | PyObject *str = PyBytes_FromStringAndSize(NULL, len); |
1641 | if (!str) { |
1642 | return NULL; |
1643 | } |
1644 | |
1645 | char *s = PyBytes_AS_STRING(str); |
1646 | if (!s) { |
1647 | return NULL; |
1648 | } |
1649 | |
1650 | strcpy(s, first_str); |
1651 | s += strlen(first_str); |
1652 | *s++ = '.'; |
1653 | strcpy(s, second_str); |
1654 | s += strlen(second_str); |
1655 | *s = '\0'; |
1656 | |
1657 | PyObject *uni = PyUnicode_DecodeUTF8(PyBytes_AS_STRING(str), PyBytes_GET_SIZE(str), NULL); |
1658 | Py_DECREF(str); |
1659 | if (!uni) { |
1660 | return NULL; |
1661 | } |
1662 | PyUnicode_InternInPlace(&uni); |
1663 | if (_PyArena_AddPyObject(p->arena, uni) < 0) { |
1664 | Py_DECREF(uni); |
1665 | return NULL; |
1666 | } |
1667 | |
1668 | return _PyAST_Name(uni, Load, EXTRA_EXPR(first_name, second_name)); |
1669 | } |
1670 | |
1671 | /* Counts the total number of dots in seq's tokens */ |
1672 | int |
1673 | _PyPegen_seq_count_dots(asdl_seq *seq) |
1674 | { |
1675 | int number_of_dots = 0; |
1676 | for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) { |
1677 | Token *current_expr = asdl_seq_GET_UNTYPED(seq, i); |
1678 | switch (current_expr->type) { |
1679 | case ELLIPSIS: |
1680 | number_of_dots += 3; |
1681 | break; |
1682 | case DOT: |
1683 | number_of_dots += 1; |
1684 | break; |
1685 | default: |
1686 | Py_UNREACHABLE(); |
1687 | } |
1688 | } |
1689 | |
1690 | return number_of_dots; |
1691 | } |
1692 | |
1693 | /* Creates an alias with '*' as the identifier name */ |
1694 | alias_ty |
1695 | _PyPegen_alias_for_star(Parser *p, int lineno, int col_offset, int end_lineno, |
1696 | int end_col_offset, PyArena *arena) { |
1697 | PyObject *str = PyUnicode_InternFromString("*" ); |
1698 | if (!str) { |
1699 | return NULL; |
1700 | } |
1701 | if (_PyArena_AddPyObject(p->arena, str) < 0) { |
1702 | Py_DECREF(str); |
1703 | return NULL; |
1704 | } |
1705 | return _PyAST_alias(str, NULL, lineno, col_offset, end_lineno, end_col_offset, arena); |
1706 | } |
1707 | |
1708 | /* Creates a new asdl_seq* with the identifiers of all the names in seq */ |
1709 | asdl_identifier_seq * |
1710 | _PyPegen_map_names_to_ids(Parser *p, asdl_expr_seq *seq) |
1711 | { |
1712 | Py_ssize_t len = asdl_seq_LEN(seq); |
1713 | assert(len > 0); |
1714 | |
1715 | asdl_identifier_seq *new_seq = _Py_asdl_identifier_seq_new(len, p->arena); |
1716 | if (!new_seq) { |
1717 | return NULL; |
1718 | } |
1719 | for (Py_ssize_t i = 0; i < len; i++) { |
1720 | expr_ty e = asdl_seq_GET(seq, i); |
1721 | asdl_seq_SET(new_seq, i, e->v.Name.id); |
1722 | } |
1723 | return new_seq; |
1724 | } |
1725 | |
1726 | /* Constructs a CmpopExprPair */ |
1727 | CmpopExprPair * |
1728 | _PyPegen_cmpop_expr_pair(Parser *p, cmpop_ty cmpop, expr_ty expr) |
1729 | { |
1730 | assert(expr != NULL); |
1731 | CmpopExprPair *a = _PyArena_Malloc(p->arena, sizeof(CmpopExprPair)); |
1732 | if (!a) { |
1733 | return NULL; |
1734 | } |
1735 | a->cmpop = cmpop; |
1736 | a->expr = expr; |
1737 | return a; |
1738 | } |
1739 | |
1740 | asdl_int_seq * |
1741 | _PyPegen_get_cmpops(Parser *p, asdl_seq *seq) |
1742 | { |
1743 | Py_ssize_t len = asdl_seq_LEN(seq); |
1744 | assert(len > 0); |
1745 | |
1746 | asdl_int_seq *new_seq = _Py_asdl_int_seq_new(len, p->arena); |
1747 | if (!new_seq) { |
1748 | return NULL; |
1749 | } |
1750 | for (Py_ssize_t i = 0; i < len; i++) { |
1751 | CmpopExprPair *pair = asdl_seq_GET_UNTYPED(seq, i); |
1752 | asdl_seq_SET(new_seq, i, pair->cmpop); |
1753 | } |
1754 | return new_seq; |
1755 | } |
1756 | |
1757 | asdl_expr_seq * |
1758 | _PyPegen_get_exprs(Parser *p, asdl_seq *seq) |
1759 | { |
1760 | Py_ssize_t len = asdl_seq_LEN(seq); |
1761 | assert(len > 0); |
1762 | |
1763 | asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena); |
1764 | if (!new_seq) { |
1765 | return NULL; |
1766 | } |
1767 | for (Py_ssize_t i = 0; i < len; i++) { |
1768 | CmpopExprPair *pair = asdl_seq_GET_UNTYPED(seq, i); |
1769 | asdl_seq_SET(new_seq, i, pair->expr); |
1770 | } |
1771 | return new_seq; |
1772 | } |
1773 | |
1774 | /* Creates an asdl_seq* where all the elements have been changed to have ctx as context */ |
1775 | static asdl_expr_seq * |
1776 | _set_seq_context(Parser *p, asdl_expr_seq *seq, expr_context_ty ctx) |
1777 | { |
1778 | Py_ssize_t len = asdl_seq_LEN(seq); |
1779 | if (len == 0) { |
1780 | return NULL; |
1781 | } |
1782 | |
1783 | asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena); |
1784 | if (!new_seq) { |
1785 | return NULL; |
1786 | } |
1787 | for (Py_ssize_t i = 0; i < len; i++) { |
1788 | expr_ty e = asdl_seq_GET(seq, i); |
1789 | asdl_seq_SET(new_seq, i, _PyPegen_set_expr_context(p, e, ctx)); |
1790 | } |
1791 | return new_seq; |
1792 | } |
1793 | |
1794 | static expr_ty |
1795 | _set_name_context(Parser *p, expr_ty e, expr_context_ty ctx) |
1796 | { |
1797 | return _PyAST_Name(e->v.Name.id, ctx, EXTRA_EXPR(e, e)); |
1798 | } |
1799 | |
1800 | static expr_ty |
1801 | _set_tuple_context(Parser *p, expr_ty e, expr_context_ty ctx) |
1802 | { |
1803 | return _PyAST_Tuple( |
1804 | _set_seq_context(p, e->v.Tuple.elts, ctx), |
1805 | ctx, |
1806 | EXTRA_EXPR(e, e)); |
1807 | } |
1808 | |
1809 | static expr_ty |
1810 | _set_list_context(Parser *p, expr_ty e, expr_context_ty ctx) |
1811 | { |
1812 | return _PyAST_List( |
1813 | _set_seq_context(p, e->v.List.elts, ctx), |
1814 | ctx, |
1815 | EXTRA_EXPR(e, e)); |
1816 | } |
1817 | |
1818 | static expr_ty |
1819 | _set_subscript_context(Parser *p, expr_ty e, expr_context_ty ctx) |
1820 | { |
1821 | return _PyAST_Subscript(e->v.Subscript.value, e->v.Subscript.slice, |
1822 | ctx, EXTRA_EXPR(e, e)); |
1823 | } |
1824 | |
1825 | static expr_ty |
1826 | _set_attribute_context(Parser *p, expr_ty e, expr_context_ty ctx) |
1827 | { |
1828 | return _PyAST_Attribute(e->v.Attribute.value, e->v.Attribute.attr, |
1829 | ctx, EXTRA_EXPR(e, e)); |
1830 | } |
1831 | |
1832 | static expr_ty |
1833 | _set_starred_context(Parser *p, expr_ty e, expr_context_ty ctx) |
1834 | { |
1835 | return _PyAST_Starred(_PyPegen_set_expr_context(p, e->v.Starred.value, ctx), |
1836 | ctx, EXTRA_EXPR(e, e)); |
1837 | } |
1838 | |
1839 | /* Creates an `expr_ty` equivalent to `expr` but with `ctx` as context */ |
1840 | expr_ty |
1841 | _PyPegen_set_expr_context(Parser *p, expr_ty expr, expr_context_ty ctx) |
1842 | { |
1843 | assert(expr != NULL); |
1844 | |
1845 | expr_ty new = NULL; |
1846 | switch (expr->kind) { |
1847 | case Name_kind: |
1848 | new = _set_name_context(p, expr, ctx); |
1849 | break; |
1850 | case Tuple_kind: |
1851 | new = _set_tuple_context(p, expr, ctx); |
1852 | break; |
1853 | case List_kind: |
1854 | new = _set_list_context(p, expr, ctx); |
1855 | break; |
1856 | case Subscript_kind: |
1857 | new = _set_subscript_context(p, expr, ctx); |
1858 | break; |
1859 | case Attribute_kind: |
1860 | new = _set_attribute_context(p, expr, ctx); |
1861 | break; |
1862 | case Starred_kind: |
1863 | new = _set_starred_context(p, expr, ctx); |
1864 | break; |
1865 | default: |
1866 | new = expr; |
1867 | } |
1868 | return new; |
1869 | } |
1870 | |
1871 | /* Constructs a KeyValuePair that is used when parsing a dict's key value pairs */ |
1872 | KeyValuePair * |
1873 | _PyPegen_key_value_pair(Parser *p, expr_ty key, expr_ty value) |
1874 | { |
1875 | KeyValuePair *a = _PyArena_Malloc(p->arena, sizeof(KeyValuePair)); |
1876 | if (!a) { |
1877 | return NULL; |
1878 | } |
1879 | a->key = key; |
1880 | a->value = value; |
1881 | return a; |
1882 | } |
1883 | |
1884 | /* Extracts all keys from an asdl_seq* of KeyValuePair*'s */ |
1885 | asdl_expr_seq * |
1886 | _PyPegen_get_keys(Parser *p, asdl_seq *seq) |
1887 | { |
1888 | Py_ssize_t len = asdl_seq_LEN(seq); |
1889 | asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena); |
1890 | if (!new_seq) { |
1891 | return NULL; |
1892 | } |
1893 | for (Py_ssize_t i = 0; i < len; i++) { |
1894 | KeyValuePair *pair = asdl_seq_GET_UNTYPED(seq, i); |
1895 | asdl_seq_SET(new_seq, i, pair->key); |
1896 | } |
1897 | return new_seq; |
1898 | } |
1899 | |
1900 | /* Extracts all values from an asdl_seq* of KeyValuePair*'s */ |
1901 | asdl_expr_seq * |
1902 | _PyPegen_get_values(Parser *p, asdl_seq *seq) |
1903 | { |
1904 | Py_ssize_t len = asdl_seq_LEN(seq); |
1905 | asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena); |
1906 | if (!new_seq) { |
1907 | return NULL; |
1908 | } |
1909 | for (Py_ssize_t i = 0; i < len; i++) { |
1910 | KeyValuePair *pair = asdl_seq_GET_UNTYPED(seq, i); |
1911 | asdl_seq_SET(new_seq, i, pair->value); |
1912 | } |
1913 | return new_seq; |
1914 | } |
1915 | |
1916 | /* Constructs a KeyPatternPair that is used when parsing mapping & class patterns */ |
1917 | KeyPatternPair * |
1918 | _PyPegen_key_pattern_pair(Parser *p, expr_ty key, pattern_ty pattern) |
1919 | { |
1920 | KeyPatternPair *a = _PyArena_Malloc(p->arena, sizeof(KeyPatternPair)); |
1921 | if (!a) { |
1922 | return NULL; |
1923 | } |
1924 | a->key = key; |
1925 | a->pattern = pattern; |
1926 | return a; |
1927 | } |
1928 | |
1929 | /* Extracts all keys from an asdl_seq* of KeyPatternPair*'s */ |
1930 | asdl_expr_seq * |
1931 | _PyPegen_get_pattern_keys(Parser *p, asdl_seq *seq) |
1932 | { |
1933 | Py_ssize_t len = asdl_seq_LEN(seq); |
1934 | asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena); |
1935 | if (!new_seq) { |
1936 | return NULL; |
1937 | } |
1938 | for (Py_ssize_t i = 0; i < len; i++) { |
1939 | KeyPatternPair *pair = asdl_seq_GET_UNTYPED(seq, i); |
1940 | asdl_seq_SET(new_seq, i, pair->key); |
1941 | } |
1942 | return new_seq; |
1943 | } |
1944 | |
1945 | /* Extracts all patterns from an asdl_seq* of KeyPatternPair*'s */ |
1946 | asdl_pattern_seq * |
1947 | _PyPegen_get_patterns(Parser *p, asdl_seq *seq) |
1948 | { |
1949 | Py_ssize_t len = asdl_seq_LEN(seq); |
1950 | asdl_pattern_seq *new_seq = _Py_asdl_pattern_seq_new(len, p->arena); |
1951 | if (!new_seq) { |
1952 | return NULL; |
1953 | } |
1954 | for (Py_ssize_t i = 0; i < len; i++) { |
1955 | KeyPatternPair *pair = asdl_seq_GET_UNTYPED(seq, i); |
1956 | asdl_seq_SET(new_seq, i, pair->pattern); |
1957 | } |
1958 | return new_seq; |
1959 | } |
1960 | |
1961 | /* Constructs a NameDefaultPair */ |
1962 | NameDefaultPair * |
1963 | _PyPegen_name_default_pair(Parser *p, arg_ty arg, expr_ty value, Token *tc) |
1964 | { |
1965 | NameDefaultPair *a = _PyArena_Malloc(p->arena, sizeof(NameDefaultPair)); |
1966 | if (!a) { |
1967 | return NULL; |
1968 | } |
1969 | a->arg = _PyPegen_add_type_comment_to_arg(p, arg, tc); |
1970 | a->value = value; |
1971 | return a; |
1972 | } |
1973 | |
1974 | /* Constructs a SlashWithDefault */ |
1975 | SlashWithDefault * |
1976 | _PyPegen_slash_with_default(Parser *p, asdl_arg_seq *plain_names, asdl_seq *names_with_defaults) |
1977 | { |
1978 | SlashWithDefault *a = _PyArena_Malloc(p->arena, sizeof(SlashWithDefault)); |
1979 | if (!a) { |
1980 | return NULL; |
1981 | } |
1982 | a->plain_names = plain_names; |
1983 | a->names_with_defaults = names_with_defaults; |
1984 | return a; |
1985 | } |
1986 | |
1987 | /* Constructs a StarEtc */ |
1988 | StarEtc * |
1989 | _PyPegen_star_etc(Parser *p, arg_ty vararg, asdl_seq *kwonlyargs, arg_ty kwarg) |
1990 | { |
1991 | StarEtc *a = _PyArena_Malloc(p->arena, sizeof(StarEtc)); |
1992 | if (!a) { |
1993 | return NULL; |
1994 | } |
1995 | a->vararg = vararg; |
1996 | a->kwonlyargs = kwonlyargs; |
1997 | a->kwarg = kwarg; |
1998 | return a; |
1999 | } |
2000 | |
2001 | asdl_seq * |
2002 | _PyPegen_join_sequences(Parser *p, asdl_seq *a, asdl_seq *b) |
2003 | { |
2004 | Py_ssize_t first_len = asdl_seq_LEN(a); |
2005 | Py_ssize_t second_len = asdl_seq_LEN(b); |
2006 | asdl_seq *new_seq = (asdl_seq*)_Py_asdl_generic_seq_new(first_len + second_len, p->arena); |
2007 | if (!new_seq) { |
2008 | return NULL; |
2009 | } |
2010 | |
2011 | int k = 0; |
2012 | for (Py_ssize_t i = 0; i < first_len; i++) { |
2013 | asdl_seq_SET_UNTYPED(new_seq, k++, asdl_seq_GET_UNTYPED(a, i)); |
2014 | } |
2015 | for (Py_ssize_t i = 0; i < second_len; i++) { |
2016 | asdl_seq_SET_UNTYPED(new_seq, k++, asdl_seq_GET_UNTYPED(b, i)); |
2017 | } |
2018 | |
2019 | return new_seq; |
2020 | } |
2021 | |
2022 | static asdl_arg_seq* |
2023 | _get_names(Parser *p, asdl_seq *names_with_defaults) |
2024 | { |
2025 | Py_ssize_t len = asdl_seq_LEN(names_with_defaults); |
2026 | asdl_arg_seq *seq = _Py_asdl_arg_seq_new(len, p->arena); |
2027 | if (!seq) { |
2028 | return NULL; |
2029 | } |
2030 | for (Py_ssize_t i = 0; i < len; i++) { |
2031 | NameDefaultPair *pair = asdl_seq_GET_UNTYPED(names_with_defaults, i); |
2032 | asdl_seq_SET(seq, i, pair->arg); |
2033 | } |
2034 | return seq; |
2035 | } |
2036 | |
2037 | static asdl_expr_seq * |
2038 | _get_defaults(Parser *p, asdl_seq *names_with_defaults) |
2039 | { |
2040 | Py_ssize_t len = asdl_seq_LEN(names_with_defaults); |
2041 | asdl_expr_seq *seq = _Py_asdl_expr_seq_new(len, p->arena); |
2042 | if (!seq) { |
2043 | return NULL; |
2044 | } |
2045 | for (Py_ssize_t i = 0; i < len; i++) { |
2046 | NameDefaultPair *pair = asdl_seq_GET_UNTYPED(names_with_defaults, i); |
2047 | asdl_seq_SET(seq, i, pair->value); |
2048 | } |
2049 | return seq; |
2050 | } |
2051 | |
2052 | static int |
2053 | _make_posonlyargs(Parser *p, |
2054 | asdl_arg_seq *slash_without_default, |
2055 | SlashWithDefault *slash_with_default, |
2056 | asdl_arg_seq **posonlyargs) { |
2057 | if (slash_without_default != NULL) { |
2058 | *posonlyargs = slash_without_default; |
2059 | } |
2060 | else if (slash_with_default != NULL) { |
2061 | asdl_arg_seq *slash_with_default_names = |
2062 | _get_names(p, slash_with_default->names_with_defaults); |
2063 | if (!slash_with_default_names) { |
2064 | return -1; |
2065 | } |
2066 | *posonlyargs = (asdl_arg_seq*)_PyPegen_join_sequences( |
2067 | p, |
2068 | (asdl_seq*)slash_with_default->plain_names, |
2069 | (asdl_seq*)slash_with_default_names); |
2070 | } |
2071 | else { |
2072 | *posonlyargs = _Py_asdl_arg_seq_new(0, p->arena); |
2073 | } |
2074 | return *posonlyargs == NULL ? -1 : 0; |
2075 | } |
2076 | |
2077 | static int |
2078 | _make_posargs(Parser *p, |
2079 | asdl_arg_seq *plain_names, |
2080 | asdl_seq *names_with_default, |
2081 | asdl_arg_seq **posargs) { |
2082 | if (plain_names != NULL && names_with_default != NULL) { |
2083 | asdl_arg_seq *names_with_default_names = _get_names(p, names_with_default); |
2084 | if (!names_with_default_names) { |
2085 | return -1; |
2086 | } |
2087 | *posargs = (asdl_arg_seq*)_PyPegen_join_sequences( |
2088 | p,(asdl_seq*)plain_names, (asdl_seq*)names_with_default_names); |
2089 | } |
2090 | else if (plain_names == NULL && names_with_default != NULL) { |
2091 | *posargs = _get_names(p, names_with_default); |
2092 | } |
2093 | else if (plain_names != NULL && names_with_default == NULL) { |
2094 | *posargs = plain_names; |
2095 | } |
2096 | else { |
2097 | *posargs = _Py_asdl_arg_seq_new(0, p->arena); |
2098 | } |
2099 | return *posargs == NULL ? -1 : 0; |
2100 | } |
2101 | |
2102 | static int |
2103 | _make_posdefaults(Parser *p, |
2104 | SlashWithDefault *slash_with_default, |
2105 | asdl_seq *names_with_default, |
2106 | asdl_expr_seq **posdefaults) { |
2107 | if (slash_with_default != NULL && names_with_default != NULL) { |
2108 | asdl_expr_seq *slash_with_default_values = |
2109 | _get_defaults(p, slash_with_default->names_with_defaults); |
2110 | if (!slash_with_default_values) { |
2111 | return -1; |
2112 | } |
2113 | asdl_expr_seq *names_with_default_values = _get_defaults(p, names_with_default); |
2114 | if (!names_with_default_values) { |
2115 | return -1; |
2116 | } |
2117 | *posdefaults = (asdl_expr_seq*)_PyPegen_join_sequences( |
2118 | p, |
2119 | (asdl_seq*)slash_with_default_values, |
2120 | (asdl_seq*)names_with_default_values); |
2121 | } |
2122 | else if (slash_with_default == NULL && names_with_default != NULL) { |
2123 | *posdefaults = _get_defaults(p, names_with_default); |
2124 | } |
2125 | else if (slash_with_default != NULL && names_with_default == NULL) { |
2126 | *posdefaults = _get_defaults(p, slash_with_default->names_with_defaults); |
2127 | } |
2128 | else { |
2129 | *posdefaults = _Py_asdl_expr_seq_new(0, p->arena); |
2130 | } |
2131 | return *posdefaults == NULL ? -1 : 0; |
2132 | } |
2133 | |
2134 | static int |
2135 | _make_kwargs(Parser *p, StarEtc *star_etc, |
2136 | asdl_arg_seq **kwonlyargs, |
2137 | asdl_expr_seq **kwdefaults) { |
2138 | if (star_etc != NULL && star_etc->kwonlyargs != NULL) { |
2139 | *kwonlyargs = _get_names(p, star_etc->kwonlyargs); |
2140 | } |
2141 | else { |
2142 | *kwonlyargs = _Py_asdl_arg_seq_new(0, p->arena); |
2143 | } |
2144 | |
2145 | if (*kwonlyargs == NULL) { |
2146 | return -1; |
2147 | } |
2148 | |
2149 | if (star_etc != NULL && star_etc->kwonlyargs != NULL) { |
2150 | *kwdefaults = _get_defaults(p, star_etc->kwonlyargs); |
2151 | } |
2152 | else { |
2153 | *kwdefaults = _Py_asdl_expr_seq_new(0, p->arena); |
2154 | } |
2155 | |
2156 | if (*kwdefaults == NULL) { |
2157 | return -1; |
2158 | } |
2159 | |
2160 | return 0; |
2161 | } |
2162 | |
2163 | /* Constructs an arguments_ty object out of all the parsed constructs in the parameters rule */ |
2164 | arguments_ty |
2165 | _PyPegen_make_arguments(Parser *p, asdl_arg_seq *slash_without_default, |
2166 | SlashWithDefault *slash_with_default, asdl_arg_seq *plain_names, |
2167 | asdl_seq *names_with_default, StarEtc *star_etc) |
2168 | { |
2169 | asdl_arg_seq *posonlyargs; |
2170 | if (_make_posonlyargs(p, slash_without_default, slash_with_default, &posonlyargs) == -1) { |
2171 | return NULL; |
2172 | } |
2173 | |
2174 | asdl_arg_seq *posargs; |
2175 | if (_make_posargs(p, plain_names, names_with_default, &posargs) == -1) { |
2176 | return NULL; |
2177 | } |
2178 | |
2179 | asdl_expr_seq *posdefaults; |
2180 | if (_make_posdefaults(p,slash_with_default, names_with_default, &posdefaults) == -1) { |
2181 | return NULL; |
2182 | } |
2183 | |
2184 | arg_ty vararg = NULL; |
2185 | if (star_etc != NULL && star_etc->vararg != NULL) { |
2186 | vararg = star_etc->vararg; |
2187 | } |
2188 | |
2189 | asdl_arg_seq *kwonlyargs; |
2190 | asdl_expr_seq *kwdefaults; |
2191 | if (_make_kwargs(p, star_etc, &kwonlyargs, &kwdefaults) == -1) { |
2192 | return NULL; |
2193 | } |
2194 | |
2195 | arg_ty kwarg = NULL; |
2196 | if (star_etc != NULL && star_etc->kwarg != NULL) { |
2197 | kwarg = star_etc->kwarg; |
2198 | } |
2199 | |
2200 | return _PyAST_arguments(posonlyargs, posargs, vararg, kwonlyargs, |
2201 | kwdefaults, kwarg, posdefaults, p->arena); |
2202 | } |
2203 | |
2204 | |
2205 | /* Constructs an empty arguments_ty object, that gets used when a function accepts no |
2206 | * arguments. */ |
2207 | arguments_ty |
2208 | _PyPegen_empty_arguments(Parser *p) |
2209 | { |
2210 | asdl_arg_seq *posonlyargs = _Py_asdl_arg_seq_new(0, p->arena); |
2211 | if (!posonlyargs) { |
2212 | return NULL; |
2213 | } |
2214 | asdl_arg_seq *posargs = _Py_asdl_arg_seq_new(0, p->arena); |
2215 | if (!posargs) { |
2216 | return NULL; |
2217 | } |
2218 | asdl_expr_seq *posdefaults = _Py_asdl_expr_seq_new(0, p->arena); |
2219 | if (!posdefaults) { |
2220 | return NULL; |
2221 | } |
2222 | asdl_arg_seq *kwonlyargs = _Py_asdl_arg_seq_new(0, p->arena); |
2223 | if (!kwonlyargs) { |
2224 | return NULL; |
2225 | } |
2226 | asdl_expr_seq *kwdefaults = _Py_asdl_expr_seq_new(0, p->arena); |
2227 | if (!kwdefaults) { |
2228 | return NULL; |
2229 | } |
2230 | |
2231 | return _PyAST_arguments(posonlyargs, posargs, NULL, kwonlyargs, |
2232 | kwdefaults, NULL, posdefaults, p->arena); |
2233 | } |
2234 | |
2235 | /* Encapsulates the value of an operator_ty into an AugOperator struct */ |
2236 | AugOperator * |
2237 | _PyPegen_augoperator(Parser *p, operator_ty kind) |
2238 | { |
2239 | AugOperator *a = _PyArena_Malloc(p->arena, sizeof(AugOperator)); |
2240 | if (!a) { |
2241 | return NULL; |
2242 | } |
2243 | a->kind = kind; |
2244 | return a; |
2245 | } |
2246 | |
2247 | /* Construct a FunctionDef equivalent to function_def, but with decorators */ |
2248 | stmt_ty |
2249 | _PyPegen_function_def_decorators(Parser *p, asdl_expr_seq *decorators, stmt_ty function_def) |
2250 | { |
2251 | assert(function_def != NULL); |
2252 | if (function_def->kind == AsyncFunctionDef_kind) { |
2253 | return _PyAST_AsyncFunctionDef( |
2254 | function_def->v.FunctionDef.name, function_def->v.FunctionDef.args, |
2255 | function_def->v.FunctionDef.body, decorators, function_def->v.FunctionDef.returns, |
2256 | function_def->v.FunctionDef.type_comment, function_def->lineno, |
2257 | function_def->col_offset, function_def->end_lineno, function_def->end_col_offset, |
2258 | p->arena); |
2259 | } |
2260 | |
2261 | return _PyAST_FunctionDef( |
2262 | function_def->v.FunctionDef.name, function_def->v.FunctionDef.args, |
2263 | function_def->v.FunctionDef.body, decorators, |
2264 | function_def->v.FunctionDef.returns, |
2265 | function_def->v.FunctionDef.type_comment, function_def->lineno, |
2266 | function_def->col_offset, function_def->end_lineno, |
2267 | function_def->end_col_offset, p->arena); |
2268 | } |
2269 | |
2270 | /* Construct a ClassDef equivalent to class_def, but with decorators */ |
2271 | stmt_ty |
2272 | _PyPegen_class_def_decorators(Parser *p, asdl_expr_seq *decorators, stmt_ty class_def) |
2273 | { |
2274 | assert(class_def != NULL); |
2275 | return _PyAST_ClassDef( |
2276 | class_def->v.ClassDef.name, class_def->v.ClassDef.bases, |
2277 | class_def->v.ClassDef.keywords, class_def->v.ClassDef.body, decorators, |
2278 | class_def->lineno, class_def->col_offset, class_def->end_lineno, |
2279 | class_def->end_col_offset, p->arena); |
2280 | } |
2281 | |
2282 | /* Construct a KeywordOrStarred */ |
2283 | KeywordOrStarred * |
2284 | _PyPegen_keyword_or_starred(Parser *p, void *element, int is_keyword) |
2285 | { |
2286 | KeywordOrStarred *a = _PyArena_Malloc(p->arena, sizeof(KeywordOrStarred)); |
2287 | if (!a) { |
2288 | return NULL; |
2289 | } |
2290 | a->element = element; |
2291 | a->is_keyword = is_keyword; |
2292 | return a; |
2293 | } |
2294 | |
2295 | /* Get the number of starred expressions in an asdl_seq* of KeywordOrStarred*s */ |
2296 | static int |
2297 | _seq_number_of_starred_exprs(asdl_seq *seq) |
2298 | { |
2299 | int n = 0; |
2300 | for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) { |
2301 | KeywordOrStarred *k = asdl_seq_GET_UNTYPED(seq, i); |
2302 | if (!k->is_keyword) { |
2303 | n++; |
2304 | } |
2305 | } |
2306 | return n; |
2307 | } |
2308 | |
2309 | /* Extract the starred expressions of an asdl_seq* of KeywordOrStarred*s */ |
2310 | asdl_expr_seq * |
2311 | (Parser *p, asdl_seq *kwargs) |
2312 | { |
2313 | int new_len = _seq_number_of_starred_exprs(kwargs); |
2314 | if (new_len == 0) { |
2315 | return NULL; |
2316 | } |
2317 | asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(new_len, p->arena); |
2318 | if (!new_seq) { |
2319 | return NULL; |
2320 | } |
2321 | |
2322 | int idx = 0; |
2323 | for (Py_ssize_t i = 0, len = asdl_seq_LEN(kwargs); i < len; i++) { |
2324 | KeywordOrStarred *k = asdl_seq_GET_UNTYPED(kwargs, i); |
2325 | if (!k->is_keyword) { |
2326 | asdl_seq_SET(new_seq, idx++, k->element); |
2327 | } |
2328 | } |
2329 | return new_seq; |
2330 | } |
2331 | |
2332 | /* Return a new asdl_seq* with only the keywords in kwargs */ |
2333 | asdl_keyword_seq* |
2334 | _PyPegen_seq_delete_starred_exprs(Parser *p, asdl_seq *kwargs) |
2335 | { |
2336 | Py_ssize_t len = asdl_seq_LEN(kwargs); |
2337 | Py_ssize_t new_len = len - _seq_number_of_starred_exprs(kwargs); |
2338 | if (new_len == 0) { |
2339 | return NULL; |
2340 | } |
2341 | asdl_keyword_seq *new_seq = _Py_asdl_keyword_seq_new(new_len, p->arena); |
2342 | if (!new_seq) { |
2343 | return NULL; |
2344 | } |
2345 | |
2346 | int idx = 0; |
2347 | for (Py_ssize_t i = 0; i < len; i++) { |
2348 | KeywordOrStarred *k = asdl_seq_GET_UNTYPED(kwargs, i); |
2349 | if (k->is_keyword) { |
2350 | asdl_seq_SET(new_seq, idx++, k->element); |
2351 | } |
2352 | } |
2353 | return new_seq; |
2354 | } |
2355 | |
2356 | expr_ty |
2357 | _PyPegen_concatenate_strings(Parser *p, asdl_seq *strings) |
2358 | { |
2359 | Py_ssize_t len = asdl_seq_LEN(strings); |
2360 | assert(len > 0); |
2361 | |
2362 | Token *first = asdl_seq_GET_UNTYPED(strings, 0); |
2363 | Token *last = asdl_seq_GET_UNTYPED(strings, len - 1); |
2364 | |
2365 | int bytesmode = 0; |
2366 | PyObject *bytes_str = NULL; |
2367 | |
2368 | FstringParser state; |
2369 | _PyPegen_FstringParser_Init(&state); |
2370 | |
2371 | for (Py_ssize_t i = 0; i < len; i++) { |
2372 | Token *t = asdl_seq_GET_UNTYPED(strings, i); |
2373 | |
2374 | int this_bytesmode; |
2375 | int this_rawmode; |
2376 | PyObject *s; |
2377 | const char *fstr; |
2378 | Py_ssize_t fstrlen = -1; |
2379 | |
2380 | if (_PyPegen_parsestr(p, &this_bytesmode, &this_rawmode, &s, &fstr, &fstrlen, t) != 0) { |
2381 | goto error; |
2382 | } |
2383 | |
2384 | /* Check that we are not mixing bytes with unicode. */ |
2385 | if (i != 0 && bytesmode != this_bytesmode) { |
2386 | RAISE_SYNTAX_ERROR("cannot mix bytes and nonbytes literals" ); |
2387 | Py_XDECREF(s); |
2388 | goto error; |
2389 | } |
2390 | bytesmode = this_bytesmode; |
2391 | |
2392 | if (fstr != NULL) { |
2393 | assert(s == NULL && !bytesmode); |
2394 | |
2395 | int result = _PyPegen_FstringParser_ConcatFstring(p, &state, &fstr, fstr + fstrlen, |
2396 | this_rawmode, 0, first, t, last); |
2397 | if (result < 0) { |
2398 | goto error; |
2399 | } |
2400 | } |
2401 | else { |
2402 | /* String or byte string. */ |
2403 | assert(s != NULL && fstr == NULL); |
2404 | assert(bytesmode ? PyBytes_CheckExact(s) : PyUnicode_CheckExact(s)); |
2405 | |
2406 | if (bytesmode) { |
2407 | if (i == 0) { |
2408 | bytes_str = s; |
2409 | } |
2410 | else { |
2411 | PyBytes_ConcatAndDel(&bytes_str, s); |
2412 | if (!bytes_str) { |
2413 | goto error; |
2414 | } |
2415 | } |
2416 | } |
2417 | else { |
2418 | /* This is a regular string. Concatenate it. */ |
2419 | if (_PyPegen_FstringParser_ConcatAndDel(&state, s) < 0) { |
2420 | goto error; |
2421 | } |
2422 | } |
2423 | } |
2424 | } |
2425 | |
2426 | if (bytesmode) { |
2427 | if (_PyArena_AddPyObject(p->arena, bytes_str) < 0) { |
2428 | goto error; |
2429 | } |
2430 | return _PyAST_Constant(bytes_str, NULL, first->lineno, |
2431 | first->col_offset, last->end_lineno, |
2432 | last->end_col_offset, p->arena); |
2433 | } |
2434 | |
2435 | return _PyPegen_FstringParser_Finish(p, &state, first, last); |
2436 | |
2437 | error: |
2438 | Py_XDECREF(bytes_str); |
2439 | _PyPegen_FstringParser_Dealloc(&state); |
2440 | if (PyErr_Occurred()) { |
2441 | raise_decode_error(p); |
2442 | } |
2443 | return NULL; |
2444 | } |
2445 | |
2446 | expr_ty |
2447 | _PyPegen_ensure_imaginary(Parser *p, expr_ty exp) |
2448 | { |
2449 | if (exp->kind != Constant_kind || !PyComplex_CheckExact(exp->v.Constant.value)) { |
2450 | RAISE_SYNTAX_ERROR_KNOWN_LOCATION(exp, "imaginary number required in complex literal" ); |
2451 | return NULL; |
2452 | } |
2453 | return exp; |
2454 | } |
2455 | |
2456 | expr_ty |
2457 | _PyPegen_ensure_real(Parser *p, expr_ty exp) |
2458 | { |
2459 | if (exp->kind != Constant_kind || PyComplex_CheckExact(exp->v.Constant.value)) { |
2460 | RAISE_SYNTAX_ERROR_KNOWN_LOCATION(exp, "real number required in complex literal" ); |
2461 | return NULL; |
2462 | } |
2463 | return exp; |
2464 | } |
2465 | |
2466 | mod_ty |
2467 | _PyPegen_make_module(Parser *p, asdl_stmt_seq *a) { |
2468 | asdl_type_ignore_seq *type_ignores = NULL; |
2469 | Py_ssize_t num = p->type_ignore_comments.num_items; |
2470 | if (num > 0) { |
2471 | // Turn the raw (comment, lineno) pairs into TypeIgnore objects in the arena |
2472 | type_ignores = _Py_asdl_type_ignore_seq_new(num, p->arena); |
2473 | if (type_ignores == NULL) { |
2474 | return NULL; |
2475 | } |
2476 | for (int i = 0; i < num; i++) { |
2477 | PyObject *tag = _PyPegen_new_type_comment(p, p->type_ignore_comments.items[i].comment); |
2478 | if (tag == NULL) { |
2479 | return NULL; |
2480 | } |
2481 | type_ignore_ty ti = _PyAST_TypeIgnore(p->type_ignore_comments.items[i].lineno, |
2482 | tag, p->arena); |
2483 | if (ti == NULL) { |
2484 | return NULL; |
2485 | } |
2486 | asdl_seq_SET(type_ignores, i, ti); |
2487 | } |
2488 | } |
2489 | return _PyAST_Module(a, type_ignores, p->arena); |
2490 | } |
2491 | |
2492 | // Error reporting helpers |
2493 | |
2494 | expr_ty |
2495 | _PyPegen_get_invalid_target(expr_ty e, TARGETS_TYPE targets_type) |
2496 | { |
2497 | if (e == NULL) { |
2498 | return NULL; |
2499 | } |
2500 | |
2501 | #define VISIT_CONTAINER(CONTAINER, TYPE) do { \ |
2502 | Py_ssize_t len = asdl_seq_LEN((CONTAINER)->v.TYPE.elts);\ |
2503 | for (Py_ssize_t i = 0; i < len; i++) {\ |
2504 | expr_ty other = asdl_seq_GET((CONTAINER)->v.TYPE.elts, i);\ |
2505 | expr_ty child = _PyPegen_get_invalid_target(other, targets_type);\ |
2506 | if (child != NULL) {\ |
2507 | return child;\ |
2508 | }\ |
2509 | }\ |
2510 | } while (0) |
2511 | |
2512 | // We only need to visit List and Tuple nodes recursively as those |
2513 | // are the only ones that can contain valid names in targets when |
2514 | // they are parsed as expressions. Any other kind of expression |
2515 | // that is a container (like Sets or Dicts) is directly invalid and |
2516 | // we don't need to visit it recursively. |
2517 | |
2518 | switch (e->kind) { |
2519 | case List_kind: |
2520 | VISIT_CONTAINER(e, List); |
2521 | return NULL; |
2522 | case Tuple_kind: |
2523 | VISIT_CONTAINER(e, Tuple); |
2524 | return NULL; |
2525 | case Starred_kind: |
2526 | if (targets_type == DEL_TARGETS) { |
2527 | return e; |
2528 | } |
2529 | return _PyPegen_get_invalid_target(e->v.Starred.value, targets_type); |
2530 | case Compare_kind: |
2531 | // This is needed, because the `a in b` in `for a in b` gets parsed |
2532 | // as a comparison, and so we need to search the left side of the comparison |
2533 | // for invalid targets. |
2534 | if (targets_type == FOR_TARGETS) { |
2535 | cmpop_ty cmpop = (cmpop_ty) asdl_seq_GET(e->v.Compare.ops, 0); |
2536 | if (cmpop == In) { |
2537 | return _PyPegen_get_invalid_target(e->v.Compare.left, targets_type); |
2538 | } |
2539 | return NULL; |
2540 | } |
2541 | return e; |
2542 | case Name_kind: |
2543 | case Subscript_kind: |
2544 | case Attribute_kind: |
2545 | return NULL; |
2546 | default: |
2547 | return e; |
2548 | } |
2549 | } |
2550 | |
2551 | void *_PyPegen_arguments_parsing_error(Parser *p, expr_ty e) { |
2552 | int kwarg_unpacking = 0; |
2553 | for (Py_ssize_t i = 0, l = asdl_seq_LEN(e->v.Call.keywords); i < l; i++) { |
2554 | keyword_ty keyword = asdl_seq_GET(e->v.Call.keywords, i); |
2555 | if (!keyword->arg) { |
2556 | kwarg_unpacking = 1; |
2557 | } |
2558 | } |
2559 | |
2560 | const char *msg = NULL; |
2561 | if (kwarg_unpacking) { |
2562 | msg = "positional argument follows keyword argument unpacking" ; |
2563 | } else { |
2564 | msg = "positional argument follows keyword argument" ; |
2565 | } |
2566 | |
2567 | return RAISE_SYNTAX_ERROR(msg); |
2568 | } |
2569 | |
2570 | |
2571 | expr_ty |
2572 | _PyPegen_get_last_comprehension_item(comprehension_ty comprehension) { |
2573 | if (comprehension->ifs == NULL || asdl_seq_LEN(comprehension->ifs) == 0) { |
2574 | return comprehension->iter; |
2575 | } |
2576 | return PyPegen_last_item(comprehension->ifs, expr_ty); |
2577 | } |
2578 | |
2579 | void * |
2580 | _PyPegen_nonparen_genexp_in_call(Parser *p, expr_ty args, asdl_comprehension_seq *comprehensions) |
2581 | { |
2582 | /* The rule that calls this function is 'args for_if_clauses'. |
2583 | For the input f(L, x for x in y), L and x are in args and |
2584 | the for is parsed as a for_if_clause. We have to check if |
2585 | len <= 1, so that input like dict((a, b) for a, b in x) |
2586 | gets successfully parsed and then we pass the last |
2587 | argument (x in the above example) as the location of the |
2588 | error */ |
2589 | Py_ssize_t len = asdl_seq_LEN(args->v.Call.args); |
2590 | if (len <= 1) { |
2591 | return NULL; |
2592 | } |
2593 | |
2594 | comprehension_ty last_comprehension = PyPegen_last_item(comprehensions, comprehension_ty); |
2595 | |
2596 | return RAISE_SYNTAX_ERROR_KNOWN_RANGE( |
2597 | (expr_ty) asdl_seq_GET(args->v.Call.args, len - 1), |
2598 | _PyPegen_get_last_comprehension_item(last_comprehension), |
2599 | "Generator expression must be parenthesized" |
2600 | ); |
2601 | } |
2602 | |
2603 | |
2604 | expr_ty _PyPegen_collect_call_seqs(Parser *p, asdl_expr_seq *a, asdl_seq *b, |
2605 | int lineno, int col_offset, int end_lineno, |
2606 | int end_col_offset, PyArena *arena) { |
2607 | Py_ssize_t args_len = asdl_seq_LEN(a); |
2608 | Py_ssize_t total_len = args_len; |
2609 | |
2610 | if (b == NULL) { |
2611 | return _PyAST_Call(_PyPegen_dummy_name(p), a, NULL, lineno, col_offset, |
2612 | end_lineno, end_col_offset, arena); |
2613 | |
2614 | } |
2615 | |
2616 | asdl_expr_seq *starreds = _PyPegen_seq_extract_starred_exprs(p, b); |
2617 | asdl_keyword_seq *keywords = _PyPegen_seq_delete_starred_exprs(p, b); |
2618 | |
2619 | if (starreds) { |
2620 | total_len += asdl_seq_LEN(starreds); |
2621 | } |
2622 | |
2623 | asdl_expr_seq *args = _Py_asdl_expr_seq_new(total_len, arena); |
2624 | |
2625 | Py_ssize_t i = 0; |
2626 | for (i = 0; i < args_len; i++) { |
2627 | asdl_seq_SET(args, i, asdl_seq_GET(a, i)); |
2628 | } |
2629 | for (; i < total_len; i++) { |
2630 | asdl_seq_SET(args, i, asdl_seq_GET(starreds, i - args_len)); |
2631 | } |
2632 | |
2633 | return _PyAST_Call(_PyPegen_dummy_name(p), args, keywords, lineno, |
2634 | col_offset, end_lineno, end_col_offset, arena); |
2635 | } |
2636 | |