pegen.c source code [python/Parser/pegen.c]

1	#include <Python.h>
2	#include "pycore_ast.h" // _PyAST_Validate(),
3	#include <errcode.h>
4	#include "tokenizer.h"
5
6	#include "pegen.h"
7	#include "string_parser.h"
8
9	PyObject *
10	_PyPegen_new_type_comment(Parser p, const* char *s)
11	{
12	PyObject *res = PyUnicode_DecodeUTF8(s, strlen(s), NULL);
13	if (res == NULL) {
14	return NULL;
15	}
16	if (_PyArena_AddPyObject(p->arena, res) < `0`) {
17	Py_DECREF(res);
18	return NULL;
19	}
20	return res;
21	}
22
23	arg_ty
24	_PyPegen_add_type_comment_to_arg(Parser p, arg_ty a, Token tc)
25	{
26	if (tc == NULL) {
27	return a;
28	}
29	const char *bytes = PyBytes_AsString(tc->bytes);
30	if (bytes == NULL) {
31	return NULL;
32	}
33	PyObject *tco = _PyPegen_new_type_comment(p, bytes);
34	if (tco == NULL) {
35	return NULL;
36	}
37	return _PyAST_arg(a->arg, a->annotation, tco,
38	a->lineno, a->col_offset, a->end_lineno, a->end_col_offset,
39	p->arena);
40	}
41
42	static int
43	init_normalization(Parser *p)
44	{
45	if (p->normalize) {
46	return `1`;
47	}
48	PyObject *m = PyImport_ImportModuleNoBlock("unicodedata");
49	if (!m)
50	{
51	return `0`;
52	}
53	p->normalize = PyObject_GetAttrString(m, "normalize");
54	Py_DECREF(m);
55	if (!p->normalize)
56	{
57	return `0`;
58	}
59	return `1`;
60	}
61
62	/ Checks if the NOTEQUAL token is valid given the current parser flags*
63	0 indicates success and nonzero indicates failure (an exception may be set) /*
64	int
65	_PyPegen_check_barry_as_flufl(Parser p, Token t) {
66	assert(t->bytes != NULL);
67	assert(t->type == NOTEQUAL);
68
69	const char* tok_str = PyBytes_AS_STRING(t->bytes);
70	if (p->flags & PyPARSE_BARRY_AS_BDFL && strcmp(tok_str, "<>") != `0`) {
71	RAISE_SYNTAX_ERROR("with Barry as BDFL, use '<>' instead of '!='");
72	return -`1`;
73	}
74	if (!(p->flags & PyPARSE_BARRY_AS_BDFL)) {
75	return strcmp(tok_str, "!=");
76	}
77	return `0`;
78	}
79
80	int
81	_PyPegen_check_legacy_stmt(Parser *p, expr_ty name) {
82	if (name->kind != Name_kind) {
83	return `0`;
84	}
85	const char* candidates[`2`] = {"print", "exec"};
86	for (int i=`0`; i<`2`; i++) {
87	if (PyUnicode_CompareWithASCIIString(name->v.Name.id, candidates[i]) == `0`) {
88	return `1`;
89	}
90	}
91	return `0`;
92	}
93
94	PyObject *
95	_PyPegen_new_identifier(Parser p, const* char *n)
96	{
97	PyObject *id = PyUnicode_DecodeUTF8(n, strlen(n), NULL);
98	if (!id) {
99	goto error;
100	}
101	/ PyUnicode_DecodeUTF8 should always return a ready string. /
102	assert(PyUnicode_IS_READY(id));
103	/ Check whether there are non-ASCII characters in the*
104	identifier; if so, normalize to NFKC. /*
105	if (!PyUnicode_IS_ASCII(id))
106	{
107	PyObject *id2;
108	if (!init_normalization(p))
109	{
110	Py_DECREF(id);
111	goto error;
112	}
113	PyObject *form = PyUnicode_InternFromString("NFKC");
114	if (form == NULL)
115	{
116	Py_DECREF(id);
117	goto error;
118	}
119	PyObject *args[`2`] = {form, id};
120	id2 = _PyObject_FastCall(p->normalize, args, `2`);
121	Py_DECREF(id);
122	Py_DECREF(form);
123	if (!id2) {
124	goto error;
125	}
126	if (!PyUnicode_Check(id2))
127	{
128	PyErr_Format(PyExc_TypeError,
129	"unicodedata.normalize() must return a string, not "
130	"%.200s",
131	_PyType_Name(Py_TYPE(id2)));
132	Py_DECREF(id2);
133	goto error;
134	}
135	id = id2;
136	}
137	PyUnicode_InternInPlace(&id);
138	if (_PyArena_AddPyObject(p->arena, id) < `0`)
139	{
140	Py_DECREF(id);
141	goto error;
142	}
143	return id;
144
145	error:
146	p->error_indicator = `1`;
147	return NULL;
148	}
149
150	static PyObject *
151	_create_dummy_identifier(Parser *p)
152	{
153	return _PyPegen_new_identifier(p, "");
154	}
155
156	static inline Py_ssize_t
157	byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset)
158	{
159	const char *str = PyUnicode_AsUTF8(line);
160	if (!str) {
161	return `0`;
162	}
163	Py_ssize_t len = strlen(str);
164	if (col_offset > len + `1`) {
165	col_offset = len + `1`;
166	}
167	assert(col_offset >= `0`);
168	PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, "replace");
169	if (!text) {
170	return `0`;
171	}
172	Py_ssize_t size = PyUnicode_GET_LENGTH(text);
173	Py_DECREF(text);
174	return size;
175	}
176
177	const char *
178	_PyPegen_get_expr_name(expr_ty e)
179	{
180	assert(e != NULL);
181	switch (e->kind) {
182	case Attribute_kind:
183	return "attribute";
184	case Subscript_kind:
185	return "subscript";
186	case Starred_kind:
187	return "starred";
188	case Name_kind:
189	return "name";
190	case List_kind:
191	return "list";
192	case Tuple_kind:
193	return "tuple";
194	case Lambda_kind:
195	return "lambda";
196	case Call_kind:
197	return "function call";
198	case BoolOp_kind:
199	case BinOp_kind:
200	case UnaryOp_kind:
201	return "expression";
202	case GeneratorExp_kind:
203	return "generator expression";
204	case Yield_kind:
205	case YieldFrom_kind:
206	return "yield expression";
207	case Await_kind:
208	return "await expression";
209	case ListComp_kind:
210	return "list comprehension";
211	case SetComp_kind:
212	return "set comprehension";
213	case DictComp_kind:
214	return "dict comprehension";
215	case Dict_kind:
216	return "dict literal";
217	case Set_kind:
218	return "set display";
219	case JoinedStr_kind:
220	case FormattedValue_kind:
221	return "f-string expression";
222	case Constant_kind: {
223	PyObject *value = e->v.Constant.value;
224	if (value == Py_None) {
225	return "None";
226	}
227	if (value == Py_False) {
228	return "False";
229	}
230	if (value == Py_True) {
231	return "True";
232	}
233	if (value == Py_Ellipsis) {
234	return "ellipsis";
235	}
236	return "literal";
237	}
238	case Compare_kind:
239	return "comparison";
240	case IfExp_kind:
241	return "conditional expression";
242	case NamedExpr_kind:
243	return "named expression";
244	default:
245	PyErr_Format(PyExc_SystemError,
246	"unexpected expression in assignment %d (line %d)",
247	e->kind, e->lineno);
248	return NULL;
249	}
250	}
251
252	static int
253	raise_decode_error(Parser *p)
254	{
255	assert(PyErr_Occurred());
256	const char *errtype = NULL;
257	if (PyErr_ExceptionMatches(PyExc_UnicodeError)) {
258	errtype = "unicode error";
259	}
260	else if (PyErr_ExceptionMatches(PyExc_ValueError)) {
261	errtype = "value error";
262	}
263	if (errtype) {
264	PyObject *type;
265	PyObject *value;
266	PyObject *tback;
267	PyObject *errstr;
268	PyErr_Fetch(&type, &value, &tback);
269	errstr = PyObject_Str(value);
270	if (errstr) {
271	RAISE_SYNTAX_ERROR("(%s) %U", errtype, errstr);
272	Py_DECREF(errstr);
273	}
274	else {
275	PyErr_Clear();
276	RAISE_SYNTAX_ERROR("(%s) unknown error", errtype);
277	}
278	Py_XDECREF(type);
279	Py_XDECREF(value);
280	Py_XDECREF(tback);
281	}
282
283	return -`1`;
284	}
285
286	static inline void
287	raise_unclosed_parentheses_error(Parser *p) {
288	int error_lineno = p->tok->parenlinenostack[p->tok->level-`1`];
289	int error_col = p->tok->parencolstack[p->tok->level-`1`];
290	RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError,
291	error_lineno, error_col, error_lineno, -`1`,
292	"'%c' was never closed",
293	p->tok->parenstack[p->tok->level-`1`]);
294	}
295
296	static void
297	raise_tokenizer_init_error(PyObject *filename)
298	{
299	if (!(PyErr_ExceptionMatches(PyExc_LookupError)
300	\|\| PyErr_ExceptionMatches(PyExc_SyntaxError)
301	\|\| PyErr_ExceptionMatches(PyExc_ValueError)
302	\|\| PyErr_ExceptionMatches(PyExc_UnicodeDecodeError))) {
303	return;
304	}
305	PyObject *errstr = NULL;
306	PyObject *tuple = NULL;
307	PyObject *type;
308	PyObject *value;
309	PyObject *tback;
310	PyErr_Fetch(&type, &value, &tback);
311	errstr = PyObject_Str(value);
312	if (!errstr) {
313	goto error;
314	}
315
316	PyObject *tmp = Py_BuildValue("(OiiO)", filename, `0`, -`1`, Py_None);
317	if (!tmp) {
318	goto error;
319	}
320
321	tuple = PyTuple_Pack(`2`, errstr, tmp);
322	Py_DECREF(tmp);
323	if (!value) {
324	goto error;
325	}
326	PyErr_SetObject(PyExc_SyntaxError, tuple);
327
328	error:
329	Py_XDECREF(type);
330	Py_XDECREF(value);
331	Py_XDECREF(tback);
332	Py_XDECREF(errstr);
333	Py_XDECREF(tuple);
334	}
335
336	static int
337	tokenizer_error(Parser *p)
338	{
339	if (PyErr_Occurred()) {
340	return -`1`;
341	}
342
343	const char *msg = NULL;
344	PyObject* errtype = PyExc_SyntaxError;
345	Py_ssize_t col_offset = -`1`;
346	switch (p->tok->done) {
347	case E_TOKEN:
348	msg = "invalid token";
349	break;
350	case E_EOF:
351	if (p->tok->level) {
352	raise_unclosed_parentheses_error(p);
353	} else {
354	RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
355	}
356	return -`1`;
357	case E_DEDENT:
358	RAISE_INDENTATION_ERROR("unindent does not match any outer indentation level");
359	return -`1`;
360	case E_INTR:
361	if (!PyErr_Occurred()) {
362	PyErr_SetNone(PyExc_KeyboardInterrupt);
363	}
364	return -`1`;
365	case E_NOMEM:
366	PyErr_NoMemory();
367	return -`1`;
368	case E_TABSPACE:
369	errtype = PyExc_TabError;
370	msg = "inconsistent use of tabs and spaces in indentation";
371	break;
372	case E_TOODEEP:
373	errtype = PyExc_IndentationError;
374	msg = "too many levels of indentation";
375	break;
376	case E_LINECONT: {
377	col_offset = p->tok->cur - p->tok->buf - `1`;
378	msg = "unexpected character after line continuation character";
379	break;
380	}
381	default:
382	msg = "unknown parsing error";
383	}
384
385	RAISE_ERROR_KNOWN_LOCATION(p, errtype, p->tok->lineno,
386	col_offset >= `0` ? col_offset : `0`,
387	p->tok->lineno, -`1`, msg);
388	return -`1`;
389	}
390
391	void *
392	_PyPegen_raise_error(Parser p, PyObject errtype, const char *errmsg, ...)
393	{
394	if (p->fill == `0`) {
395	va_list va;
396	va_start(va, errmsg);
397	_PyPegen_raise_error_known_location(p, errtype, `0`, `0`, `0`, -`1`, errmsg, va);
398	va_end(va);
399	return NULL;
400	}
401
402	Token *t = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - `1`];
403	Py_ssize_t col_offset;
404	Py_ssize_t end_col_offset = -`1`;
405	if (t->col_offset == -`1`) {
406	if (p->tok->cur == p->tok->buf) {
407	col_offset = `0`;
408	} else {
409	const char* start = p->tok->buf ? p->tok->line_start : p->tok->buf;
410	col_offset = Py_SAFE_DOWNCAST(p->tok->cur - start, intptr_t, int);
411	}
412	} else {
413	col_offset = t->col_offset + `1`;
414	}
415
416	if (t->end_col_offset != -`1`) {
417	end_col_offset = t->end_col_offset + `1`;
418	}
419
420	va_list va;
421	va_start(va, errmsg);
422	_PyPegen_raise_error_known_location(p, errtype, t->lineno, col_offset, t->end_lineno, end_col_offset, errmsg, va);
423	va_end(va);
424
425	return NULL;
426	}
427
428	static PyObject *
429	get_error_line(Parser *p, Py_ssize_t lineno)
430	{
431	/ If the file descriptor is interactive, the source lines of the current*
432	* (multi-line) statement are stored in p->tok->interactive_src_start.
433	* If not, we're parsing from a string, which means that the whole source
434	* is stored in p->tok->str. */
435	assert((p->tok->fp == NULL && p->tok->str != NULL) \|\| p->tok->fp == stdin);
436
437	char *cur_line = p->tok->fp_interactive ? p->tok->interactive_src_start : p->tok->str;
438	if (cur_line == NULL) {
439	assert(p->tok->fp_interactive);
440	// We can reach this point if the tokenizer buffers for interactive source have not been
441	// initialized because we failed to decode the original source with the given locale.
442	return PyUnicode_FromStringAndSize("", `0`);
443	}
444	const char* buf_end = p->tok->fp_interactive ? p->tok->interactive_src_end : p->tok->inp;
445
446	Py_ssize_t relative_lineno = p->starting_lineno ? lineno - p->starting_lineno + `1` : lineno;
447
448	for (int i = `0`; i < relative_lineno - `1`; i++) {
449	char *new_line = strchr(cur_line, `'\n'`) + `1`;
450	assert(new_line != NULL && new_line <= buf_end);
451	if (new_line == NULL \|\| new_line > buf_end) {
452	break;
453	}
454	cur_line = new_line;
455	}
456
457	char *next_newline;
458	if ((next_newline = strchr(cur_line, `'\n'`)) == NULL) { // This is the last line
459	next_newline = cur_line + strlen(cur_line);
460	}
461	return PyUnicode_DecodeUTF8(cur_line, next_newline - cur_line, "replace");
462	}
463
464	void *
465	_PyPegen_raise_error_known_location(Parser p, PyObject errtype,
466	Py_ssize_t lineno, Py_ssize_t col_offset,
467	Py_ssize_t end_lineno, Py_ssize_t end_col_offset,
468	const char *errmsg, va_list va)
469	{
470	PyObject *value = NULL;
471	PyObject *errstr = NULL;
472	PyObject *error_line = NULL;
473	PyObject *tmp = NULL;
474	p->error_indicator = `1`;
475
476	if (end_lineno == CURRENT_POS) {
477	end_lineno = p->tok->lineno;
478	}
479	if (end_col_offset == CURRENT_POS) {
480	end_col_offset = p->tok->cur - p->tok->line_start;
481	}
482
483	if (p->start_rule == Py_fstring_input) {
484	const char *fstring_msg = "f-string: ";
485	Py_ssize_t len = strlen(fstring_msg) + strlen(errmsg);
486
487	char new_errmsg = PyMem_Malloc(len + `1`); // Lengths of both strings plus NULL character*
488	if (!new_errmsg) {
489	return (void *) PyErr_NoMemory();
490	}
491
492	// Copy both strings into new buffer
493	memcpy(new_errmsg, fstring_msg, strlen(fstring_msg));
494	memcpy(new_errmsg + strlen(fstring_msg), errmsg, strlen(errmsg));
495	new_errmsg[len] = `0`;
496	errmsg = new_errmsg;
497	}
498	errstr = PyUnicode_FromFormatV(errmsg, va);
499	if (!errstr) {
500	goto error;
501	}
502
503	if (p->tok->fp_interactive && p->tok->interactive_src_start != NULL) {
504	error_line = get_error_line(p, lineno);
505	}
506	else if (p->start_rule == Py_file_input) {
507	error_line = _PyErr_ProgramDecodedTextObject(p->tok->filename,
508	(int) lineno, p->tok->encoding);
509	}
510
511	if (!error_line) {
512	/ PyErr_ProgramTextObject was not called or returned NULL. If it was not called,*
513	then we need to find the error line from some other source, because
514	p->start_rule != Py_file_input. If it returned NULL, then it either unexpectedly
515	failed or we're parsing from a string or the REPL. There's a third edge case where
516	we're actually parsing from a file, which has an E_EOF SyntaxError and in that case
517	`PyErr_ProgramTextObject` fails because lineno points to last_file_line + 1, which
518	does not physically exist /*
519	assert(p->tok->fp == NULL \|\| p->tok->fp == stdin \|\| p->tok->done == E_EOF);
520
521	if (p->tok->lineno <= lineno && p->tok->inp > p->tok->buf) {
522	Py_ssize_t size = p->tok->inp - p->tok->buf;
523	error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace");
524	}
525	else if (p->tok->fp == NULL \|\| p->tok->fp == stdin) {
526	error_line = get_error_line(p, lineno);
527	}
528	else {
529	error_line = PyUnicode_FromStringAndSize("", `0`);
530	}
531	if (!error_line) {
532	goto error;
533	}
534	}
535
536	if (p->start_rule == Py_fstring_input) {
537	col_offset -= p->starting_col_offset;
538	end_col_offset -= p->starting_col_offset;
539	}
540
541	Py_ssize_t col_number = col_offset;
542	Py_ssize_t end_col_number = end_col_offset;
543
544	if (p->tok->encoding != NULL) {
545	col_number = byte_offset_to_character_offset(error_line, col_offset);
546	end_col_number = end_col_number > `0` ?
547	byte_offset_to_character_offset(error_line, end_col_offset) :
548	end_col_number;
549	}
550	tmp = Py_BuildValue("(OiiNii)", p->tok->filename, lineno, col_number, error_line, end_lineno, end_col_number);
551	if (!tmp) {
552	goto error;
553	}
554	value = PyTuple_Pack(`2`, errstr, tmp);
555	Py_DECREF(tmp);
556	if (!value) {
557	goto error;
558	}
559	PyErr_SetObject(errtype, value);
560
561	Py_DECREF(errstr);
562	Py_DECREF(value);
563	if (p->start_rule == Py_fstring_input) {
564	PyMem_Free((void *)errmsg);
565	}
566	return NULL;
567
568	error:
569	Py_XDECREF(errstr);
570	Py_XDECREF(error_line);
571	if (p->start_rule == Py_fstring_input) {
572	PyMem_Free((void *)errmsg);
573	}
574	return NULL;
575	}
576
577	#if 0
578	static const char *
579	token_name(int type)
580	{
581	if (`0` <= type && type <= N_TOKENS) {
582	return _PyParser_TokenNames[type];
583	}
584	return "<Huh?>";
585	}
586	#endif
587
588	// Here, mark is the start of the node, while p->mark is the end.
589	// If node==NULL, they should be the same.
590	int
591	_PyPegen_insert_memo(Parser p, int* mark, int type, void *node)
592	{
593	// Insert in front
594	Memo m = _PyArena_Malloc(p->arena, sizeof*(Memo));
595	if (m == NULL) {
596	return -`1`;
597	}
598	m->type = type;
599	m->node = node;
600	m->mark = p->mark;
601	m->next = p->tokens[mark]->memo;
602	p->tokens[mark]->memo = m;
603	return `0`;
604	}
605
606	// Like _PyPegen_insert_memo(), but updates an existing node if found.
607	int
608	_PyPegen_update_memo(Parser p, int* mark, int type, void *node)
609	{
610	for (Memo *m = p->tokens[mark]->memo; m != NULL; m = m->next) {
611	if (m->type == type) {
612	// Update existing node.
613	m->node = node;
614	m->mark = p->mark;
615	return `0`;
616	}
617	}
618	// Insert new node.
619	return _PyPegen_insert_memo(p, mark, type, node);
620	}
621
622	// Return dummy NAME.
623	void *
624	_PyPegen_dummy_name(Parser *p, ...)
625	{
626	static void *cache = NULL;
627
628	if (cache != NULL) {
629	return cache;
630	}
631
632	PyObject *id = _create_dummy_identifier(p);
633	if (!id) {
634	return NULL;
635	}
636	cache = _PyAST_Name(id, Load, `1`, `0`, `1`, `0`, p->arena);
637	return cache;
638	}
639
640	static int
641	_get_keyword_or_name_type(Parser p, const* char name, int* name_len)
642	{
643	assert(name_len > `0`);
644	if (name_len >= p->n_keyword_lists \|\|
645	p->keywords[name_len] == NULL \|\|
646	p->keywords[name_len]->type == -`1`) {
647	return NAME;
648	}
649	for (KeywordToken *k = p->keywords[name_len]; k != NULL && k->type != -`1`; k++) {
650	if (strncmp(k->str, name, name_len) == `0`) {
651	return k->type;
652	}
653	}
654	return NAME;
655	}
656
657	static int
658	growable_comment_array_init(growable_comment_array *arr, size_t initial_size) {
659	assert(initial_size > `0`);
660	arr->items = PyMem_Malloc(initial_size * sizeof(*arr->items));
661	arr->size = initial_size;
662	arr->num_items = `0`;
663
664	return arr->items != NULL;
665	}
666
667	static int
668	growable_comment_array_add(growable_comment_array arr, int* lineno, char *comment) {
669	if (arr->num_items >= arr->size) {
670	size_t new_size = arr->size * `2`;
671	void new_items_array = PyMem_Realloc(arr->items, new_size sizeof(*arr->items));
672	if (!new_items_array) {
673	return `0`;
674	}
675	arr->items = new_items_array;
676	arr->size = new_size;
677	}
678
679	arr->items[arr->num_items].lineno = lineno;
680	arr->items[arr->num_items].comment = comment; // Take ownership
681	arr->num_items++;
682	return `1`;
683	}
684
685	static void
686	growable_comment_array_deallocate(growable_comment_array *arr) {
687	for (unsigned i = `0`; i < arr->num_items; i++) {
688	PyMem_Free(arr->items[i].comment);
689	}
690	PyMem_Free(arr->items);
691	}
692
693	static int
694	initialize_token(Parser p, Token token, const char start, const* char end, int* token_type) {
695	assert(token != NULL);
696
697	token->type = (token_type == NAME) ? _get_keyword_or_name_type(p, start, (int)(end - start)) : token_type;
698	token->bytes = PyBytes_FromStringAndSize(start, end - start);
699	if (token->bytes == NULL) {
700	return -`1`;
701	}
702
703	if (_PyArena_AddPyObject(p->arena, token->bytes) < `0`) {
704	Py_DECREF(token->bytes);
705	return -`1`;
706	}
707
708	token->level = p->tok->level;
709
710	const char *line_start = token_type == STRING ? p->tok->multi_line_start : p->tok->line_start;
711	int lineno = token_type == STRING ? p->tok->first_lineno : p->tok->lineno;
712	int end_lineno = p->tok->lineno;
713
714	int col_offset = (start != NULL && start >= line_start) ? (int)(start - line_start) : -`1`;
715	int end_col_offset = (end != NULL && end >= p->tok->line_start) ? (int)(end - p->tok->line_start) : -`1`;
716
717	token->lineno = lineno;
718	token->col_offset = p->tok->lineno == p->starting_lineno ? p->starting_col_offset + col_offset : col_offset;
719	token->end_lineno = end_lineno;
720	token->end_col_offset = p->tok->lineno == p->starting_lineno ? p->starting_col_offset + end_col_offset : end_col_offset;
721
722	p->fill += `1`;
723
724	if (token_type == ERRORTOKEN && p->tok->done == E_DECODE) {
725	return raise_decode_error(p);
726	}
727
728	return (token_type == ERRORTOKEN ? tokenizer_error(p) : `0`);
729	}
730
731	static int
732	_resize_tokens_array(Parser *p) {
733	int newsize = p->size * `2`;
734	Token *new_tokens = PyMem_Realloc(p->tokens, newsize sizeof(Token *));
735	if (new_tokens == NULL) {
736	PyErr_NoMemory();
737	return -`1`;
738	}
739	p->tokens = new_tokens;
740
741	for (int i = p->size; i < newsize; i++) {
742	p->tokens[i] = PyMem_Calloc(`1`, sizeof(Token));
743	if (p->tokens[i] == NULL) {
744	p->size = i; // Needed, in order to cleanup correctly after parser fails
745	PyErr_NoMemory();
746	return -`1`;
747	}
748	}
749	p->size = newsize;
750	return `0`;
751	}
752
753	int
754	_PyPegen_fill_token(Parser *p)
755	{
756	const char *start;
757	const char *end;
758	int type = PyTokenizer_Get(p->tok, &start, &end);
759
760	// Record and skip '# type: ignore' comments
761	while (type == TYPE_IGNORE) {
762	Py_ssize_t len = end - start;
763	char *tag = PyMem_Malloc(len + `1`);
764	if (tag == NULL) {
765	PyErr_NoMemory();
766	return -`1`;
767	}
768	strncpy(tag, start, len);
769	tag[len] = `'\0'`;
770	// Ownership of tag passes to the growable array
771	if (!growable_comment_array_add(&p->type_ignore_comments, p->tok->lineno, tag)) {
772	PyErr_NoMemory();
773	return -`1`;
774	}
775	type = PyTokenizer_Get(p->tok, &start, &end);
776	}
777
778	// If we have reached the end and we are in single input mode we need to insert a newline and reset the parsing
779	if (p->start_rule == Py_single_input && type == ENDMARKER && p->parsing_started) {
780	type = NEWLINE; / Add an extra newline /
781	p->parsing_started = `0`;
782
783	if (p->tok->indent && !(p->flags & PyPARSE_DONT_IMPLY_DEDENT)) {
784	p->tok->pendin = -p->tok->indent;
785	p->tok->indent = `0`;
786	}
787	}
788	else {
789	p->parsing_started = `1`;
790	}
791
792	// Check if we are at the limit of the token array capacity and resize if needed
793	if ((p->fill == p->size) && (_resize_tokens_array(p) != `0`)) {
794	return -`1`;
795	}
796
797	Token *t = p->tokens[p->fill];
798	return initialize_token(p, t, start, end, type);
799	}
800
801
802	#if defined(Py_DEBUG)
803	// Instrumentation to count the effectiveness of memoization.
804	// The array counts the number of tokens skipped by memoization,
805	// indexed by type.
806
807	#define NSTATISTICS 2000
808	static long memo_statistics[NSTATISTICS];
809
810	void
811	_PyPegen_clear_memo_statistics()
812	{
813	for (int i = `0`; i < NSTATISTICS; i++) {
814	memo_statistics[i] = `0`;
815	}
816	}
817
818	PyObject *
819	_PyPegen_get_memo_statistics()
820	{
821	PyObject *ret = PyList_New(NSTATISTICS);
822	if (ret == NULL) {
823	return NULL;
824	}
825	for (int i = `0`; i < NSTATISTICS; i++) {
826	PyObject *value = PyLong_FromLong(memo_statistics[i]);
827	if (value == NULL) {
828	Py_DECREF(ret);
829	return NULL;
830	}
831	// PyList_SetItem borrows a reference to value.
832	if (PyList_SetItem(ret, i, value) < `0`) {
833	Py_DECREF(ret);
834	return NULL;
835	}
836	}
837	return ret;
838	}
839	#endif
840
841	int // bool
842	_PyPegen_is_memoized(Parser p, int* type, void *pres)
843	{
844	if (p->mark == p->fill) {
845	if (_PyPegen_fill_token(p) < `0`) {
846	p->error_indicator = `1`;
847	return -`1`;
848	}
849	}
850
851	Token *t = p->tokens[p->mark];
852
853	for (Memo *m = t->memo; m != NULL; m = m->next) {
854	if (m->type == type) {
855	#if defined(PY_DEBUG)
856	if (`0` <= type && type < NSTATISTICS) {
857	long count = m->mark - p->mark;
858	// A memoized negative result counts for one.
859	if (count <= `0`) {
860	count = `1`;
861	}
862	memo_statistics[type] += count;
863	}
864	#endif
865	p->mark = m->mark;
866	(void* **)(pres) = m->node;
867	return `1`;
868	}
869	}
870	return `0`;
871	}
872
873	int
874	_PyPegen_lookahead_with_name(int positive, expr_ty (func)(Parser ), Parser p)
875	{
876	int mark = p->mark;
877	void *res = func(p);
878	p->mark = mark;
879	return (res != NULL) == positive;
880	}
881
882	int
883	_PyPegen_lookahead_with_string(int positive, expr_ty (func)(Parser , const* char), Parser p, const char* arg)
884	{
885	int mark = p->mark;
886	void *res = func(p, arg);
887	p->mark = mark;
888	return (res != NULL) == positive;
889	}
890
891	int
892	_PyPegen_lookahead_with_int(int positive, Token (func)(Parser , int), Parser p, int* arg)
893	{
894	int mark = p->mark;
895	void *res = func(p, arg);
896	p->mark = mark;
897	return (res != NULL) == positive;
898	}
899
900	int
901	_PyPegen_lookahead(int positive, void (func)(Parser ), Parser *p)
902	{
903	int mark = p->mark;
904	void res = (void**)func(p);
905	p->mark = mark;
906	return (res != NULL) == positive;
907	}
908
909	Token *
910	_PyPegen_expect_token(Parser p, int* type)
911	{
912	if (p->mark == p->fill) {
913	if (_PyPegen_fill_token(p) < `0`) {
914	p->error_indicator = `1`;
915	return NULL;
916	}
917	}
918	Token *t = p->tokens[p->mark];
919	if (t->type != type) {
920	return NULL;
921	}
922	p->mark += `1`;
923	return t;
924	}
925
926	Token *
927	_PyPegen_expect_forced_token(Parser p, int* type, const char* expected) {
928
929	if (p->error_indicator == `1`) {
930	return NULL;
931	}
932
933	if (p->mark == p->fill) {
934	if (_PyPegen_fill_token(p) < `0`) {
935	p->error_indicator = `1`;
936	return NULL;
937	}
938	}
939	Token *t = p->tokens[p->mark];
940	if (t->type != type) {
941	RAISE_SYNTAX_ERROR_KNOWN_LOCATION(t, "expected '%s'", expected);
942	return NULL;
943	}
944	p->mark += `1`;
945	return t;
946	}
947
948	expr_ty
949	_PyPegen_expect_soft_keyword(Parser p, const* char *keyword)
950	{
951	if (p->mark == p->fill) {
952	if (_PyPegen_fill_token(p) < `0`) {
953	p->error_indicator = `1`;
954	return NULL;
955	}
956	}
957	Token *t = p->tokens[p->mark];
958	if (t->type != NAME) {
959	return NULL;
960	}
961	const char *s = PyBytes_AsString(t->bytes);
962	if (!s) {
963	p->error_indicator = `1`;
964	return NULL;
965	}
966	if (strcmp(s, keyword) != `0`) {
967	return NULL;
968	}
969	return _PyPegen_name_token(p);
970	}
971
972	Token *
973	_PyPegen_get_last_nonnwhitespace_token(Parser *p)
974	{
975	assert(p->mark >= `0`);
976	Token *token = NULL;
977	for (int m = p->mark - `1`; m >= `0`; m--) {
978	token = p->tokens[m];
979	if (token->type != ENDMARKER && (token->type < NEWLINE \|\| token->type > DEDENT)) {
980	break;
981	}
982	}
983	return token;
984	}
985
986	static expr_ty
987	_PyPegen_name_from_token(Parser p, Token t)
988	{
989	if (t == NULL) {
990	return NULL;
991	}
992	const char *s = PyBytes_AsString(t->bytes);
993	if (!s) {
994	p->error_indicator = `1`;
995	return NULL;
996	}
997	PyObject *id = _PyPegen_new_identifier(p, s);
998	if (id == NULL) {
999	p->error_indicator = `1`;
1000	return NULL;
1001	}
1002	return _PyAST_Name(id, Load, t->lineno, t->col_offset, t->end_lineno,
1003	t->end_col_offset, p->arena);
1004	}
1005
1006
1007	expr_ty
1008	_PyPegen_name_token(Parser *p)
1009	{
1010	Token *t = _PyPegen_expect_token(p, NAME);
1011	return _PyPegen_name_from_token(p, t);
1012	}
1013
1014	void *
1015	_PyPegen_string_token(Parser *p)
1016	{
1017	return _PyPegen_expect_token(p, STRING);
1018	}
1019
1020
1021	expr_ty _PyPegen_soft_keyword_token(Parser *p) {
1022	Token *t = _PyPegen_expect_token(p, NAME);
1023	if (t == NULL) {
1024	return NULL;
1025	}
1026	char *the_token;
1027	Py_ssize_t size;
1028	PyBytes_AsStringAndSize(t->bytes, &the_token, &size);
1029	for (char *keyword = p->soft_keywords; keyword != NULL; keyword++) {
1030	if (strncmp(*keyword, the_token, size) == `0`) {
1031	return _PyPegen_name_from_token(p, t);
1032	}
1033	}
1034	return NULL;
1035	}
1036
1037	static PyObject *
1038	parsenumber_raw(const char *s)
1039	{
1040	const char *end;
1041	long x;
1042	double dx;
1043	Py_complex compl;
1044	int imflag;
1045
1046	assert(s != NULL);
1047	errno = `0`;
1048	end = s + strlen(s) - `1`;
1049	imflag = end == `'j'` \|\| end == `'J'`;
1050	if (s[`0`] == `'0'`) {
1051	x = (long)PyOS_strtoul(s, (char **)&end, `0`);
1052	if (x < `0` && errno == `0`) {
1053	return PyLong_FromString(s, (char **)`0`, `0`);
1054	}
1055	}
1056	else {
1057	x = PyOS_strtol(s, (char **)&end, `0`);
1058	}
1059	if (*end == `'\0'`) {
1060	if (errno != `0`) {
1061	return PyLong_FromString(s, (char **)`0`, `0`);
1062	}
1063	return PyLong_FromLong(x);
1064	}
1065	/ XXX Huge floats may silently fail /
1066	if (imflag) {
1067	compl.real = `0.`;
1068	compl.imag = PyOS_string_to_double(s, (char **)&end, NULL);
1069	if (compl.imag == -`1.0` && PyErr_Occurred()) {
1070	return NULL;
1071	}
1072	return PyComplex_FromCComplex(compl);
1073	}
1074	dx = PyOS_string_to_double(s, NULL, NULL);
1075	if (dx == -`1.0` && PyErr_Occurred()) {
1076	return NULL;
1077	}
1078	return PyFloat_FromDouble(dx);
1079	}
1080
1081	static PyObject *
1082	parsenumber(const char *s)
1083	{
1084	char *dup;
1085	char *end;
1086	PyObject *res = NULL;
1087
1088	assert(s != NULL);
1089
1090	if (strchr(s, `'_'`) == NULL) {
1091	return parsenumber_raw(s);
1092	}
1093	/ Create a duplicate without underscores. /
1094	dup = PyMem_Malloc(strlen(s) + `1`);
1095	if (dup == NULL) {
1096	return PyErr_NoMemory();
1097	}
1098	end = dup;
1099	for (; *s; s++) {
1100	if (*s != `'_'`) {
1101	end++ = s;
1102	}
1103	}
1104	*end = `'\0'`;
1105	res = parsenumber_raw(dup);
1106	PyMem_Free(dup);
1107	return res;
1108	}
1109
1110	expr_ty
1111	_PyPegen_number_token(Parser *p)
1112	{
1113	Token *t = _PyPegen_expect_token(p, NUMBER);
1114	if (t == NULL) {
1115	return NULL;
1116	}
1117
1118	const char *num_raw = PyBytes_AsString(t->bytes);
1119	if (num_raw == NULL) {
1120	p->error_indicator = `1`;
1121	return NULL;
1122	}
1123
1124	if (p->feature_version < `6` && strchr(num_raw, `'_'`) != NULL) {
1125	p->error_indicator = `1`;
1126	return RAISE_SYNTAX_ERROR("Underscores in numeric literals are only supported "
1127	"in Python 3.6 and greater");
1128	}
1129
1130	PyObject *c = parsenumber(num_raw);
1131
1132	if (c == NULL) {
1133	p->error_indicator = `1`;
1134	return NULL;
1135	}
1136
1137	if (_PyArena_AddPyObject(p->arena, c) < `0`) {
1138	Py_DECREF(c);
1139	p->error_indicator = `1`;
1140	return NULL;
1141	}
1142
1143	return _PyAST_Constant(c, NULL, t->lineno, t->col_offset, t->end_lineno,
1144	t->end_col_offset, p->arena);
1145	}
1146
1147	/ Check that the source for a single input statement really is a single*
1148	statement by looking at what is left in the buffer after parsing.
1149	Trailing whitespace and comments are OK. /*
1150	static int // bool
1151	bad_single_statement(Parser *p)
1152	{
1153	char *cur = p->tok->cur;
1154	char c = *cur;
1155
1156	for (;;) {
1157	while (c == `' '` \|\| c == `'\t'` \|\| c == `'\n'` \|\| c == `'\014'`) {
1158	c = *++cur;
1159	}
1160
1161	if (!c) {
1162	return `0`;
1163	}
1164
1165	if (c != `'#'`) {
1166	return `1`;
1167	}
1168
1169	/ Suck up comment. /
1170	while (c && c != `'\n'`) {
1171	c = *++cur;
1172	}
1173	}
1174	}
1175
1176	void
1177	_PyPegen_Parser_Free(Parser *p)
1178	{
1179	Py_XDECREF(p->normalize);
1180	for (int i = `0`; i < p->size; i++) {
1181	PyMem_Free(p->tokens[i]);
1182	}
1183	PyMem_Free(p->tokens);
1184	growable_comment_array_deallocate(&p->type_ignore_comments);
1185	PyMem_Free(p);
1186	}
1187
1188	static int
1189	compute_parser_flags(PyCompilerFlags *flags)
1190	{
1191	int parser_flags = `0`;
1192	if (!flags) {
1193	return `0`;
1194	}
1195	if (flags->cf_flags & PyCF_DONT_IMPLY_DEDENT) {
1196	parser_flags \|= PyPARSE_DONT_IMPLY_DEDENT;
1197	}
1198	if (flags->cf_flags & PyCF_IGNORE_COOKIE) {
1199	parser_flags \|= PyPARSE_IGNORE_COOKIE;
1200	}
1201	if (flags->cf_flags & CO_FUTURE_BARRY_AS_BDFL) {
1202	parser_flags \|= PyPARSE_BARRY_AS_BDFL;
1203	}
1204	if (flags->cf_flags & PyCF_TYPE_COMMENTS) {
1205	parser_flags \|= PyPARSE_TYPE_COMMENTS;
1206	}
1207	if ((flags->cf_flags & PyCF_ONLY_AST) && flags->cf_feature_version < `7`) {
1208	parser_flags \|= PyPARSE_ASYNC_HACKS;
1209	}
1210	if (flags->cf_flags & PyCF_ALLOW_INCOMPLETE_INPUT) {
1211	parser_flags \|= PyPARSE_ALLOW_INCOMPLETE_INPUT;
1212	}
1213	return parser_flags;
1214	}
1215
1216	Parser *
1217	_PyPegen_Parser_New(struct tok_state tok, int* start_rule, int flags,
1218	int feature_version, int errcode, PyArena arena)
1219	{
1220	Parser p = PyMem_Malloc(sizeof*(Parser));
1221	if (p == NULL) {
1222	return (Parser *) PyErr_NoMemory();
1223	}
1224	assert(tok != NULL);
1225	tok->type_comments = (flags & PyPARSE_TYPE_COMMENTS) > `0`;
1226	tok->async_hacks = (flags & PyPARSE_ASYNC_HACKS) > `0`;
1227	p->tok = tok;
1228	p->keywords = NULL;
1229	p->n_keyword_lists = -`1`;
1230	p->soft_keywords = NULL;
1231	p->tokens = PyMem_Malloc(sizeof(Token *));
1232	if (!p->tokens) {
1233	PyMem_Free(p);
1234	return (Parser *) PyErr_NoMemory();
1235	}
1236	p->tokens[`0`] = PyMem_Calloc(`1`, sizeof(Token));
1237	if (!p->tokens) {
1238	PyMem_Free(p->tokens);
1239	PyMem_Free(p);
1240	return (Parser *) PyErr_NoMemory();
1241	}
1242	if (!growable_comment_array_init(&p->type_ignore_comments, `10`)) {
1243	PyMem_Free(p->tokens[`0`]);
1244	PyMem_Free(p->tokens);
1245	PyMem_Free(p);
1246	return (Parser *) PyErr_NoMemory();
1247	}
1248
1249	p->mark = `0`;
1250	p->fill = `0`;
1251	p->size = `1`;
1252
1253	p->errcode = errcode;
1254	p->arena = arena;
1255	p->start_rule = start_rule;
1256	p->parsing_started = `0`;
1257	p->normalize = NULL;
1258	p->error_indicator = `0`;
1259
1260	p->starting_lineno = `0`;
1261	p->starting_col_offset = `0`;
1262	p->flags = flags;
1263	p->feature_version = feature_version;
1264	p->known_err_token = NULL;
1265	p->level = `0`;
1266	p->call_invalid_rules = `0`;
1267	return p;
1268	}
1269
1270	static void
1271	reset_parser_state(Parser *p)
1272	{
1273	for (int i = `0`; i < p->fill; i++) {
1274	p->tokens[i]->memo = NULL;
1275	}
1276	p->mark = `0`;
1277	p->call_invalid_rules = `1`;
1278	// Don't try to get extra tokens in interactive mode when trying to
1279	// raise specialized errors in the second pass.
1280	p->tok->interactive_underflow = IUNDERFLOW_STOP;
1281	}
1282
1283	static int
1284	_PyPegen_check_tokenizer_errors(Parser *p) {
1285	// Tokenize the whole input to see if there are any tokenization
1286	// errors such as mistmatching parentheses. These will get priority
1287	// over generic syntax errors only if the line number of the error is
1288	// before the one that we had for the generic error.
1289
1290	// We don't want to tokenize to the end for interactive input
1291	if (p->tok->prompt != NULL) {
1292	return `0`;
1293	}
1294
1295	PyObject type, value, *traceback;
1296	PyErr_Fetch(&type, &value, &traceback);
1297
1298	Token *current_token = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - `1`];
1299	Py_ssize_t current_err_line = current_token->lineno;
1300
1301	int ret = `0`;
1302
1303	for (;;) {
1304	const char *start;
1305	const char *end;
1306	switch (PyTokenizer_Get(p->tok, &start, &end)) {
1307	case ERRORTOKEN:
1308	if (p->tok->level != `0`) {
1309	int error_lineno = p->tok->parenlinenostack[p->tok->level-`1`];
1310	if (current_err_line > error_lineno) {
1311	raise_unclosed_parentheses_error(p);
1312	ret = -`1`;
1313	goto exit;
1314	}
1315	}
1316	break;
1317	case ENDMARKER:
1318	break;
1319	default:
1320	continue;
1321	}
1322	break;
1323	}
1324
1325
1326	exit:
1327	if (PyErr_Occurred()) {
1328	Py_XDECREF(value);
1329	Py_XDECREF(type);
1330	Py_XDECREF(traceback);
1331	} else {
1332	PyErr_Restore(type, value, traceback);
1333	}
1334	return ret;
1335	}
1336
1337
1338	static inline int
1339	_is_end_of_source(Parser *p) {
1340	int err = p->tok->done;
1341	return err == E_EOF \|\| err == E_EOFS \|\| err == E_EOLS;
1342	}
1343
1344	void *
1345	_PyPegen_run_parser(Parser *p)
1346	{
1347	void *res = _PyPegen_parse(p);
1348	assert(p->level == `0`);
1349	if (res == NULL) {
1350	if ((p->flags & PyPARSE_ALLOW_INCOMPLETE_INPUT) && _is_end_of_source(p)) {
1351	PyErr_Clear();
1352	return RAISE_SYNTAX_ERROR("incomplete input");
1353	}
1354	if (PyErr_Occurred() && !PyErr_ExceptionMatches(PyExc_SyntaxError)) {
1355	return NULL;
1356	}
1357	// Make a second parser pass. In this pass we activate heavier and slower checks
1358	// to produce better error messages and more complete diagnostics. Extra "invalid_"*
1359	// rules will be active during parsing.
1360	Token *last_token = p->tokens[p->fill - `1`];
1361	reset_parser_state(p);
1362	_PyPegen_parse(p);
1363	if (PyErr_Occurred()) {
1364	// Prioritize tokenizer errors to custom syntax errors raised
1365	// on the second phase only if the errors come from the parser.
1366	int is_tok_ok = (p->tok->done == E_DONE \|\| p->tok->done == E_OK);
1367	if (is_tok_ok && PyErr_ExceptionMatches(PyExc_SyntaxError)) {
1368	_PyPegen_check_tokenizer_errors(p);
1369	}
1370	return NULL;
1371	}
1372	if (p->fill == `0`) {
1373	RAISE_SYNTAX_ERROR("error at start before reading any input");
1374	}
1375	else if (last_token->type == ERRORTOKEN && p->tok->done == E_EOF) {
1376	if (p->tok->level) {
1377	raise_unclosed_parentheses_error(p);
1378	} else {
1379	RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
1380	}
1381	}
1382	else {
1383	if (p->tokens[p->fill-`1`]->type == INDENT) {
1384	RAISE_INDENTATION_ERROR("unexpected indent");
1385	}
1386	else if (p->tokens[p->fill-`1`]->type == DEDENT) {
1387	RAISE_INDENTATION_ERROR("unexpected unindent");
1388	}
1389	else {
1390	// Use the last token we found on the first pass to avoid reporting
1391	// incorrect locations for generic syntax errors just because we reached
1392	// further away when trying to find specific syntax errors in the second
1393	// pass.
1394	RAISE_SYNTAX_ERROR_KNOWN_LOCATION(last_token, "invalid syntax");
1395	// _PyPegen_check_tokenizer_errors will override the existing
1396	// generic SyntaxError we just raised if errors are found.
1397	_PyPegen_check_tokenizer_errors(p);
1398	}
1399	}
1400	return NULL;
1401	}
1402
1403	if (p->start_rule == Py_single_input && bad_single_statement(p)) {
1404	p->tok->done = E_BADSINGLE; // This is not necessary for now, but might be in the future
1405	return RAISE_SYNTAX_ERROR("multiple statements found while compiling a single statement");
1406	}
1407
1408	// test_peg_generator defines _Py_TEST_PEGEN to not call PyAST_Validate()
1409	#if defined(Py_DEBUG) && !defined(_Py_TEST_PEGEN)
1410	if (p->start_rule == Py_single_input \|\|
1411	p->start_rule == Py_file_input \|\|
1412	p->start_rule == Py_eval_input)
1413	{
1414	if (!_PyAST_Validate(res)) {
1415	return NULL;
1416	}
1417	}
1418	#endif
1419	return res;
1420	}
1421
1422	mod_ty
1423	_PyPegen_run_parser_from_file_pointer(FILE fp, int* start_rule, PyObject *filename_ob,
1424	const char enc, const* char ps1, const* char *ps2,
1425	PyCompilerFlags flags, int* errcode, PyArena arena)
1426	{
1427	struct tok_state *tok = PyTokenizer_FromFile(fp, enc, ps1, ps2);
1428	if (tok == NULL) {
1429	if (PyErr_Occurred()) {
1430	raise_tokenizer_init_error(filename_ob);
1431	return NULL;
1432	}
1433	return NULL;
1434	}
1435	if (!tok->fp \|\| ps1 != NULL \|\| ps2 != NULL \|\|
1436	PyUnicode_CompareWithASCIIString(filename_ob, "<stdin>") == `0`) {
1437	tok->fp_interactive = `1`;
1438	}
1439	// This transfers the ownership to the tokenizer
1440	tok->filename = filename_ob;
1441	Py_INCREF(filename_ob);
1442
1443	// From here on we need to clean up even if there's an error
1444	mod_ty result = NULL;
1445
1446	int parser_flags = compute_parser_flags(flags);
1447	Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, PY_MINOR_VERSION,
1448	errcode, arena);
1449	if (p == NULL) {
1450	goto error;
1451	}
1452
1453	result = _PyPegen_run_parser(p);
1454	_PyPegen_Parser_Free(p);
1455
1456	error:
1457	PyTokenizer_Free(tok);
1458	return result;
1459	}
1460
1461	mod_ty
1462	_PyPegen_run_parser_from_string(const char str, int* start_rule, PyObject *filename_ob,
1463	PyCompilerFlags flags, PyArena arena)
1464	{
1465	int exec_input = start_rule == Py_file_input;
1466
1467	struct tok_state *tok;
1468	if (flags != NULL && flags->cf_flags & PyCF_IGNORE_COOKIE) {
1469	tok = PyTokenizer_FromUTF8(str, exec_input);
1470	} else {
1471	tok = PyTokenizer_FromString(str, exec_input);
1472	}
1473	if (tok == NULL) {
1474	if (PyErr_Occurred()) {
1475	raise_tokenizer_init_error(filename_ob);
1476	}
1477	return NULL;
1478	}
1479	// This transfers the ownership to the tokenizer
1480	tok->filename = filename_ob;
1481	Py_INCREF(filename_ob);
1482
1483	// We need to clear up from here on
1484	mod_ty result = NULL;
1485
1486	int parser_flags = compute_parser_flags(flags);
1487	int feature_version = flags && (flags->cf_flags & PyCF_ONLY_AST) ?
1488	flags->cf_feature_version : PY_MINOR_VERSION;
1489	Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, feature_version,
1490	NULL, arena);
1491	if (p == NULL) {
1492	goto error;
1493	}
1494
1495	result = _PyPegen_run_parser(p);
1496	_PyPegen_Parser_Free(p);
1497
1498	error:
1499	PyTokenizer_Free(tok);
1500	return result;
1501	}
1502
1503	asdl_stmt_seq*
1504	_PyPegen_interactive_exit(Parser *p)
1505	{
1506	if (p->errcode) {
1507	*(p->errcode) = E_EOF;
1508	}
1509	return NULL;
1510	}
1511
1512	/ Creates a single-element asdl_seq* that contains a /
1513	asdl_seq *
1514	_PyPegen_singleton_seq(Parser p, void* *a)
1515	{
1516	assert(a != NULL);
1517	asdl_seq seq = (asdl_seq)_Py_asdl_generic_seq_new(`1`, p->arena);
1518	if (!seq) {
1519	return NULL;
1520	}
1521	asdl_seq_SET_UNTYPED(seq, `0`, a);
1522	return seq;
1523	}
1524
1525	/ Creates a copy of seq and prepends a to it /
1526	asdl_seq *
1527	_PyPegen_seq_insert_in_front(Parser p, void* a, asdl_seq seq)
1528	{
1529	assert(a != NULL);
1530	if (!seq) {
1531	return _PyPegen_singleton_seq(p, a);
1532	}
1533
1534	asdl_seq new_seq = (asdl_seq)_Py_asdl_generic_seq_new(asdl_seq_LEN(seq) + `1`, p->arena);
1535	if (!new_seq) {
1536	return NULL;
1537	}
1538
1539	asdl_seq_SET_UNTYPED(new_seq, `0`, a);
1540	for (Py_ssize_t i = `1`, l = asdl_seq_LEN(new_seq); i < l; i++) {
1541	asdl_seq_SET_UNTYPED(new_seq, i, asdl_seq_GET_UNTYPED(seq, i - `1`));
1542	}
1543	return new_seq;
1544	}
1545
1546	/ Creates a copy of seq and appends a to it /
1547	asdl_seq *
1548	_PyPegen_seq_append_to_end(Parser p, asdl_seq seq, void *a)
1549	{
1550	assert(a != NULL);
1551	if (!seq) {
1552	return _PyPegen_singleton_seq(p, a);
1553	}
1554
1555	asdl_seq new_seq = (asdl_seq)_Py_asdl_generic_seq_new(asdl_seq_LEN(seq) + `1`, p->arena);
1556	if (!new_seq) {
1557	return NULL;
1558	}
1559
1560	for (Py_ssize_t i = `0`, l = asdl_seq_LEN(new_seq); i + `1` < l; i++) {
1561	asdl_seq_SET_UNTYPED(new_seq, i, asdl_seq_GET_UNTYPED(seq, i));
1562	}
1563	asdl_seq_SET_UNTYPED(new_seq, asdl_seq_LEN(new_seq) - `1`, a);
1564	return new_seq;
1565	}
1566
1567	static Py_ssize_t
1568	_get_flattened_seq_size(asdl_seq *seqs)
1569	{
1570	Py_ssize_t size = `0`;
1571	for (Py_ssize_t i = `0`, l = asdl_seq_LEN(seqs); i < l; i++) {
1572	asdl_seq *inner_seq = asdl_seq_GET_UNTYPED(seqs, i);
1573	size += asdl_seq_LEN(inner_seq);
1574	}
1575	return size;
1576	}
1577
1578	/ Flattens an asdl_seq* of asdl_seqs /*
1579	asdl_seq *
1580	_PyPegen_seq_flatten(Parser p, asdl_seq seqs)
1581	{
1582	Py_ssize_t flattened_seq_size = _get_flattened_seq_size(seqs);
1583	assert(flattened_seq_size > `0`);
1584
1585	asdl_seq flattened_seq = (asdl_seq)_Py_asdl_generic_seq_new(flattened_seq_size, p->arena);
1586	if (!flattened_seq) {
1587	return NULL;
1588	}
1589
1590	int flattened_seq_idx = `0`;
1591	for (Py_ssize_t i = `0`, l = asdl_seq_LEN(seqs); i < l; i++) {
1592	asdl_seq *inner_seq = asdl_seq_GET_UNTYPED(seqs, i);
1593	for (Py_ssize_t j = `0`, li = asdl_seq_LEN(inner_seq); j < li; j++) {
1594	asdl_seq_SET_UNTYPED(flattened_seq, flattened_seq_idx++, asdl_seq_GET_UNTYPED(inner_seq, j));
1595	}
1596	}
1597	assert(flattened_seq_idx == flattened_seq_size);
1598
1599	return flattened_seq;
1600	}
1601
1602	void *
1603	_PyPegen_seq_last_item(asdl_seq *seq)
1604	{
1605	Py_ssize_t len = asdl_seq_LEN(seq);
1606	return asdl_seq_GET_UNTYPED(seq, len - `1`);
1607	}
1608
1609	void *
1610	_PyPegen_seq_first_item(asdl_seq *seq)
1611	{
1612	return asdl_seq_GET_UNTYPED(seq, `0`);
1613	}
1614
1615
1616	/ Creates a new name of the form <first_name>.<second_name> /
1617	expr_ty
1618	_PyPegen_join_names_with_dot(Parser *p, expr_ty first_name, expr_ty second_name)
1619	{
1620	assert(first_name != NULL && second_name != NULL);
1621	PyObject *first_identifier = first_name->v.Name.id;
1622	PyObject *second_identifier = second_name->v.Name.id;
1623
1624	if (PyUnicode_READY(first_identifier) == -`1`) {
1625	return NULL;
1626	}
1627	if (PyUnicode_READY(second_identifier) == -`1`) {
1628	return NULL;
1629	}
1630	const char *first_str = PyUnicode_AsUTF8(first_identifier);
1631	if (!first_str) {
1632	return NULL;
1633	}
1634	const char *second_str = PyUnicode_AsUTF8(second_identifier);
1635	if (!second_str) {
1636	return NULL;
1637	}
1638	Py_ssize_t len = strlen(first_str) + strlen(second_str) + `1`; // +1 for the dot
1639
1640	PyObject *str = PyBytes_FromStringAndSize(NULL, len);
1641	if (!str) {
1642	return NULL;
1643	}
1644
1645	char *s = PyBytes_AS_STRING(str);
1646	if (!s) {
1647	return NULL;
1648	}
1649
1650	strcpy(s, first_str);
1651	s += strlen(first_str);
1652	*s++ = `'.'`;
1653	strcpy(s, second_str);
1654	s += strlen(second_str);
1655	*s = `'\0'`;
1656
1657	PyObject *uni = PyUnicode_DecodeUTF8(PyBytes_AS_STRING(str), PyBytes_GET_SIZE(str), NULL);
1658	Py_DECREF(str);
1659	if (!uni) {
1660	return NULL;
1661	}
1662	PyUnicode_InternInPlace(&uni);
1663	if (_PyArena_AddPyObject(p->arena, uni) < `0`) {
1664	Py_DECREF(uni);
1665	return NULL;
1666	}
1667
1668	return _PyAST_Name(uni, Load, EXTRA_EXPR(first_name, second_name));
1669	}
1670
1671	/ Counts the total number of dots in seq's tokens /
1672	int
1673	_PyPegen_seq_count_dots(asdl_seq *seq)
1674	{
1675	int number_of_dots = `0`;
1676	for (Py_ssize_t i = `0`, l = asdl_seq_LEN(seq); i < l; i++) {
1677	Token *current_expr = asdl_seq_GET_UNTYPED(seq, i);
1678	switch (current_expr->type) {
1679	case ELLIPSIS:
1680	number_of_dots += `3`;
1681	break;
1682	case DOT:
1683	number_of_dots += `1`;
1684	break;
1685	default:
1686	Py_UNREACHABLE();
1687	}
1688	}
1689
1690	return number_of_dots;
1691	}
1692
1693	/ Creates an alias with '' as the identifier name /*
1694	alias_ty
1695	_PyPegen_alias_for_star(Parser p, int* lineno, int col_offset, int end_lineno,
1696	int end_col_offset, PyArena *arena) {
1697	PyObject str = PyUnicode_InternFromString("");
1698	if (!str) {
1699	return NULL;
1700	}
1701	if (_PyArena_AddPyObject(p->arena, str) < `0`) {
1702	Py_DECREF(str);
1703	return NULL;
1704	}
1705	return _PyAST_alias(str, NULL, lineno, col_offset, end_lineno, end_col_offset, arena);
1706	}
1707
1708	/ Creates a new asdl_seq* with the identifiers of all the names in seq /
1709	asdl_identifier_seq *
1710	_PyPegen_map_names_to_ids(Parser p, asdl_expr_seq seq)
1711	{
1712	Py_ssize_t len = asdl_seq_LEN(seq);
1713	assert(len > `0`);
1714
1715	asdl_identifier_seq *new_seq = _Py_asdl_identifier_seq_new(len, p->arena);
1716	if (!new_seq) {
1717	return NULL;
1718	}
1719	for (Py_ssize_t i = `0`; i < len; i++) {
1720	expr_ty e = asdl_seq_GET(seq, i);
1721	asdl_seq_SET(new_seq, i, e->v.Name.id);
1722	}
1723	return new_seq;
1724	}
1725
1726	/ Constructs a CmpopExprPair /
1727	CmpopExprPair *
1728	_PyPegen_cmpop_expr_pair(Parser *p, cmpop_ty cmpop, expr_ty expr)
1729	{
1730	assert(expr != NULL);
1731	CmpopExprPair a = _PyArena_Malloc(p->arena, sizeof*(CmpopExprPair));
1732	if (!a) {
1733	return NULL;
1734	}
1735	a->cmpop = cmpop;
1736	a->expr = expr;
1737	return a;
1738	}
1739
1740	asdl_int_seq *
1741	_PyPegen_get_cmpops(Parser p, asdl_seq seq)
1742	{
1743	Py_ssize_t len = asdl_seq_LEN(seq);
1744	assert(len > `0`);
1745
1746	asdl_int_seq *new_seq = _Py_asdl_int_seq_new(len, p->arena);
1747	if (!new_seq) {
1748	return NULL;
1749	}
1750	for (Py_ssize_t i = `0`; i < len; i++) {
1751	CmpopExprPair *pair = asdl_seq_GET_UNTYPED(seq, i);
1752	asdl_seq_SET(new_seq, i, pair->cmpop);
1753	}
1754	return new_seq;
1755	}
1756
1757	asdl_expr_seq *
1758	_PyPegen_get_exprs(Parser p, asdl_seq seq)
1759	{
1760	Py_ssize_t len = asdl_seq_LEN(seq);
1761	assert(len > `0`);
1762
1763	asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
1764	if (!new_seq) {
1765	return NULL;
1766	}
1767	for (Py_ssize_t i = `0`; i < len; i++) {
1768	CmpopExprPair *pair = asdl_seq_GET_UNTYPED(seq, i);
1769	asdl_seq_SET(new_seq, i, pair->expr);
1770	}
1771	return new_seq;
1772	}
1773
1774	/ Creates an asdl_seq* where all the elements have been changed to have ctx as context /
1775	static asdl_expr_seq *
1776	_set_seq_context(Parser p, asdl_expr_seq seq, expr_context_ty ctx)
1777	{
1778	Py_ssize_t len = asdl_seq_LEN(seq);
1779	if (len == `0`) {
1780	return NULL;
1781	}
1782
1783	asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
1784	if (!new_seq) {
1785	return NULL;
1786	}
1787	for (Py_ssize_t i = `0`; i < len; i++) {
1788	expr_ty e = asdl_seq_GET(seq, i);
1789	asdl_seq_SET(new_seq, i, _PyPegen_set_expr_context(p, e, ctx));
1790	}
1791	return new_seq;
1792	}
1793
1794	static expr_ty
1795	_set_name_context(Parser *p, expr_ty e, expr_context_ty ctx)
1796	{
1797	return _PyAST_Name(e->v.Name.id, ctx, EXTRA_EXPR(e, e));
1798	}
1799
1800	static expr_ty
1801	_set_tuple_context(Parser *p, expr_ty e, expr_context_ty ctx)
1802	{
1803	return _PyAST_Tuple(
1804	_set_seq_context(p, e->v.Tuple.elts, ctx),
1805	ctx,
1806	EXTRA_EXPR(e, e));
1807	}
1808
1809	static expr_ty
1810	_set_list_context(Parser *p, expr_ty e, expr_context_ty ctx)
1811	{
1812	return _PyAST_List(
1813	_set_seq_context(p, e->v.List.elts, ctx),
1814	ctx,
1815	EXTRA_EXPR(e, e));
1816	}
1817
1818	static expr_ty
1819	_set_subscript_context(Parser *p, expr_ty e, expr_context_ty ctx)
1820	{
1821	return _PyAST_Subscript(e->v.Subscript.value, e->v.Subscript.slice,
1822	ctx, EXTRA_EXPR(e, e));
1823	}
1824
1825	static expr_ty
1826	_set_attribute_context(Parser *p, expr_ty e, expr_context_ty ctx)
1827	{
1828	return _PyAST_Attribute(e->v.Attribute.value, e->v.Attribute.attr,
1829	ctx, EXTRA_EXPR(e, e));
1830	}
1831
1832	static expr_ty
1833	_set_starred_context(Parser *p, expr_ty e, expr_context_ty ctx)
1834	{
1835	return _PyAST_Starred(_PyPegen_set_expr_context(p, e->v.Starred.value, ctx),
1836	ctx, EXTRA_EXPR(e, e));
1837	}
1838
1839	/ Creates an `expr_ty` equivalent to `expr` but with `ctx` as context /
1840	expr_ty
1841	_PyPegen_set_expr_context(Parser *p, expr_ty expr, expr_context_ty ctx)
1842	{
1843	assert(expr != NULL);
1844
1845	expr_ty new = NULL;
1846	switch (expr->kind) {
1847	case Name_kind:
1848	new = _set_name_context(p, expr, ctx);
1849	break;
1850	case Tuple_kind:
1851	new = _set_tuple_context(p, expr, ctx);
1852	break;
1853	case List_kind:
1854	new = _set_list_context(p, expr, ctx);
1855	break;
1856	case Subscript_kind:
1857	new = _set_subscript_context(p, expr, ctx);
1858	break;
1859	case Attribute_kind:
1860	new = _set_attribute_context(p, expr, ctx);
1861	break;
1862	case Starred_kind:
1863	new = _set_starred_context(p, expr, ctx);
1864	break;
1865	default:
1866	new = expr;
1867	}
1868	return new;
1869	}
1870
1871	/ Constructs a KeyValuePair that is used when parsing a dict's key value pairs /
1872	KeyValuePair *
1873	_PyPegen_key_value_pair(Parser *p, expr_ty key, expr_ty value)
1874	{
1875	KeyValuePair a = _PyArena_Malloc(p->arena, sizeof*(KeyValuePair));
1876	if (!a) {
1877	return NULL;
1878	}
1879	a->key = key;
1880	a->value = value;
1881	return a;
1882	}
1883
1884	/ Extracts all keys from an asdl_seq* of KeyValuePair's /*
1885	asdl_expr_seq *
1886	_PyPegen_get_keys(Parser p, asdl_seq seq)
1887	{
1888	Py_ssize_t len = asdl_seq_LEN(seq);
1889	asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
1890	if (!new_seq) {
1891	return NULL;
1892	}
1893	for (Py_ssize_t i = `0`; i < len; i++) {
1894	KeyValuePair *pair = asdl_seq_GET_UNTYPED(seq, i);
1895	asdl_seq_SET(new_seq, i, pair->key);
1896	}
1897	return new_seq;
1898	}
1899
1900	/ Extracts all values from an asdl_seq* of KeyValuePair's /*
1901	asdl_expr_seq *
1902	_PyPegen_get_values(Parser p, asdl_seq seq)
1903	{
1904	Py_ssize_t len = asdl_seq_LEN(seq);
1905	asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
1906	if (!new_seq) {
1907	return NULL;
1908	}
1909	for (Py_ssize_t i = `0`; i < len; i++) {
1910	KeyValuePair *pair = asdl_seq_GET_UNTYPED(seq, i);
1911	asdl_seq_SET(new_seq, i, pair->value);
1912	}
1913	return new_seq;
1914	}
1915
1916	/ Constructs a KeyPatternPair that is used when parsing mapping & class patterns /
1917	KeyPatternPair *
1918	_PyPegen_key_pattern_pair(Parser *p, expr_ty key, pattern_ty pattern)
1919	{
1920	KeyPatternPair a = _PyArena_Malloc(p->arena, sizeof*(KeyPatternPair));
1921	if (!a) {
1922	return NULL;
1923	}
1924	a->key = key;
1925	a->pattern = pattern;
1926	return a;
1927	}
1928
1929	/ Extracts all keys from an asdl_seq* of KeyPatternPair's /*
1930	asdl_expr_seq *
1931	_PyPegen_get_pattern_keys(Parser p, asdl_seq seq)
1932	{
1933	Py_ssize_t len = asdl_seq_LEN(seq);
1934	asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
1935	if (!new_seq) {
1936	return NULL;
1937	}
1938	for (Py_ssize_t i = `0`; i < len; i++) {
1939	KeyPatternPair *pair = asdl_seq_GET_UNTYPED(seq, i);
1940	asdl_seq_SET(new_seq, i, pair->key);
1941	}
1942	return new_seq;
1943	}
1944
1945	/ Extracts all patterns from an asdl_seq* of KeyPatternPair's /*
1946	asdl_pattern_seq *
1947	_PyPegen_get_patterns(Parser p, asdl_seq seq)
1948	{
1949	Py_ssize_t len = asdl_seq_LEN(seq);
1950	asdl_pattern_seq *new_seq = _Py_asdl_pattern_seq_new(len, p->arena);
1951	if (!new_seq) {
1952	return NULL;
1953	}
1954	for (Py_ssize_t i = `0`; i < len; i++) {
1955	KeyPatternPair *pair = asdl_seq_GET_UNTYPED(seq, i);
1956	asdl_seq_SET(new_seq, i, pair->pattern);
1957	}
1958	return new_seq;
1959	}
1960
1961	/ Constructs a NameDefaultPair /
1962	NameDefaultPair *
1963	_PyPegen_name_default_pair(Parser p, arg_ty arg, expr_ty value, Token tc)
1964	{
1965	NameDefaultPair a = _PyArena_Malloc(p->arena, sizeof*(NameDefaultPair));
1966	if (!a) {
1967	return NULL;
1968	}
1969	a->arg = _PyPegen_add_type_comment_to_arg(p, arg, tc);
1970	a->value = value;
1971	return a;
1972	}
1973
1974	/ Constructs a SlashWithDefault /
1975	SlashWithDefault *
1976	_PyPegen_slash_with_default(Parser p, asdl_arg_seq plain_names, asdl_seq *names_with_defaults)
1977	{
1978	SlashWithDefault a = _PyArena_Malloc(p->arena, sizeof*(SlashWithDefault));
1979	if (!a) {
1980	return NULL;
1981	}
1982	a->plain_names = plain_names;
1983	a->names_with_defaults = names_with_defaults;
1984	return a;
1985	}
1986
1987	/ Constructs a StarEtc /
1988	StarEtc *
1989	_PyPegen_star_etc(Parser p, arg_ty vararg, asdl_seq kwonlyargs, arg_ty kwarg)
1990	{
1991	StarEtc a = _PyArena_Malloc(p->arena, sizeof*(StarEtc));
1992	if (!a) {
1993	return NULL;
1994	}
1995	a->vararg = vararg;
1996	a->kwonlyargs = kwonlyargs;
1997	a->kwarg = kwarg;
1998	return a;
1999	}
2000
2001	asdl_seq *
2002	_PyPegen_join_sequences(Parser p, asdl_seq a, asdl_seq *b)
2003	{
2004	Py_ssize_t first_len = asdl_seq_LEN(a);
2005	Py_ssize_t second_len = asdl_seq_LEN(b);
2006	asdl_seq new_seq = (asdl_seq)_Py_asdl_generic_seq_new(first_len + second_len, p->arena);
2007	if (!new_seq) {
2008	return NULL;
2009	}
2010
2011	int k = `0`;
2012	for (Py_ssize_t i = `0`; i < first_len; i++) {
2013	asdl_seq_SET_UNTYPED(new_seq, k++, asdl_seq_GET_UNTYPED(a, i));
2014	}
2015	for (Py_ssize_t i = `0`; i < second_len; i++) {
2016	asdl_seq_SET_UNTYPED(new_seq, k++, asdl_seq_GET_UNTYPED(b, i));
2017	}
2018
2019	return new_seq;
2020	}
2021
2022	static asdl_arg_seq*
2023	_get_names(Parser p, asdl_seq names_with_defaults)
2024	{
2025	Py_ssize_t len = asdl_seq_LEN(names_with_defaults);
2026	asdl_arg_seq *seq = _Py_asdl_arg_seq_new(len, p->arena);
2027	if (!seq) {
2028	return NULL;
2029	}
2030	for (Py_ssize_t i = `0`; i < len; i++) {
2031	NameDefaultPair *pair = asdl_seq_GET_UNTYPED(names_with_defaults, i);
2032	asdl_seq_SET(seq, i, pair->arg);
2033	}
2034	return seq;
2035	}
2036
2037	static asdl_expr_seq *
2038	_get_defaults(Parser p, asdl_seq names_with_defaults)
2039	{
2040	Py_ssize_t len = asdl_seq_LEN(names_with_defaults);
2041	asdl_expr_seq *seq = _Py_asdl_expr_seq_new(len, p->arena);
2042	if (!seq) {
2043	return NULL;
2044	}
2045	for (Py_ssize_t i = `0`; i < len; i++) {
2046	NameDefaultPair *pair = asdl_seq_GET_UNTYPED(names_with_defaults, i);
2047	asdl_seq_SET(seq, i, pair->value);
2048	}
2049	return seq;
2050	}
2051
2052	static int
2053	_make_posonlyargs(Parser *p,
2054	asdl_arg_seq *slash_without_default,
2055	SlashWithDefault *slash_with_default,
2056	asdl_arg_seq **posonlyargs) {
2057	if (slash_without_default != NULL) {
2058	*posonlyargs = slash_without_default;
2059	}
2060	else if (slash_with_default != NULL) {
2061	asdl_arg_seq *slash_with_default_names =
2062	_get_names(p, slash_with_default->names_with_defaults);
2063	if (!slash_with_default_names) {
2064	return -`1`;
2065	}
2066	posonlyargs = (asdl_arg_seq)_PyPegen_join_sequences(
2067	p,
2068	(asdl_seq*)slash_with_default->plain_names,
2069	(asdl_seq*)slash_with_default_names);
2070	}
2071	else {
2072	*posonlyargs = _Py_asdl_arg_seq_new(`0`, p->arena);
2073	}
2074	return *posonlyargs == NULL ? -`1` : `0`;
2075	}
2076
2077	static int
2078	_make_posargs(Parser *p,
2079	asdl_arg_seq *plain_names,
2080	asdl_seq *names_with_default,
2081	asdl_arg_seq **posargs) {
2082	if (plain_names != NULL && names_with_default != NULL) {
2083	asdl_arg_seq *names_with_default_names = _get_names(p, names_with_default);
2084	if (!names_with_default_names) {
2085	return -`1`;
2086	}
2087	posargs = (asdl_arg_seq)_PyPegen_join_sequences(
2088	p,(asdl_seq)plain_names, (asdl_seq)names_with_default_names);
2089	}
2090	else if (plain_names == NULL && names_with_default != NULL) {
2091	*posargs = _get_names(p, names_with_default);
2092	}
2093	else if (plain_names != NULL && names_with_default == NULL) {
2094	*posargs = plain_names;
2095	}
2096	else {
2097	*posargs = _Py_asdl_arg_seq_new(`0`, p->arena);
2098	}
2099	return *posargs == NULL ? -`1` : `0`;
2100	}
2101
2102	static int
2103	_make_posdefaults(Parser *p,
2104	SlashWithDefault *slash_with_default,
2105	asdl_seq *names_with_default,
2106	asdl_expr_seq **posdefaults) {
2107	if (slash_with_default != NULL && names_with_default != NULL) {
2108	asdl_expr_seq *slash_with_default_values =
2109	_get_defaults(p, slash_with_default->names_with_defaults);
2110	if (!slash_with_default_values) {
2111	return -`1`;
2112	}
2113	asdl_expr_seq *names_with_default_values = _get_defaults(p, names_with_default);
2114	if (!names_with_default_values) {
2115	return -`1`;
2116	}
2117	posdefaults = (asdl_expr_seq)_PyPegen_join_sequences(
2118	p,
2119	(asdl_seq*)slash_with_default_values,
2120	(asdl_seq*)names_with_default_values);
2121	}
2122	else if (slash_with_default == NULL && names_with_default != NULL) {
2123	*posdefaults = _get_defaults(p, names_with_default);
2124	}
2125	else if (slash_with_default != NULL && names_with_default == NULL) {
2126	*posdefaults = _get_defaults(p, slash_with_default->names_with_defaults);
2127	}
2128	else {
2129	*posdefaults = _Py_asdl_expr_seq_new(`0`, p->arena);
2130	}
2131	return *posdefaults == NULL ? -`1` : `0`;
2132	}
2133
2134	static int
2135	_make_kwargs(Parser p, StarEtc star_etc,
2136	asdl_arg_seq **kwonlyargs,
2137	asdl_expr_seq **kwdefaults) {
2138	if (star_etc != NULL && star_etc->kwonlyargs != NULL) {
2139	*kwonlyargs = _get_names(p, star_etc->kwonlyargs);
2140	}
2141	else {
2142	*kwonlyargs = _Py_asdl_arg_seq_new(`0`, p->arena);
2143	}
2144
2145	if (*kwonlyargs == NULL) {
2146	return -`1`;
2147	}
2148
2149	if (star_etc != NULL && star_etc->kwonlyargs != NULL) {
2150	*kwdefaults = _get_defaults(p, star_etc->kwonlyargs);
2151	}
2152	else {
2153	*kwdefaults = _Py_asdl_expr_seq_new(`0`, p->arena);
2154	}
2155
2156	if (*kwdefaults == NULL) {
2157	return -`1`;
2158	}
2159
2160	return `0`;
2161	}
2162
2163	/ Constructs an arguments_ty object out of all the parsed constructs in the parameters rule /
2164	arguments_ty
2165	_PyPegen_make_arguments(Parser p, asdl_arg_seq slash_without_default,
2166	SlashWithDefault slash_with_default, asdl_arg_seq plain_names,
2167	asdl_seq names_with_default, StarEtc star_etc)
2168	{
2169	asdl_arg_seq *posonlyargs;
2170	if (_make_posonlyargs(p, slash_without_default, slash_with_default, &posonlyargs) == -`1`) {
2171	return NULL;
2172	}
2173
2174	asdl_arg_seq *posargs;
2175	if (_make_posargs(p, plain_names, names_with_default, &posargs) == -`1`) {
2176	return NULL;
2177	}
2178
2179	asdl_expr_seq *posdefaults;
2180	if (_make_posdefaults(p,slash_with_default, names_with_default, &posdefaults) == -`1`) {
2181	return NULL;
2182	}
2183
2184	arg_ty vararg = NULL;
2185	if (star_etc != NULL && star_etc->vararg != NULL) {
2186	vararg = star_etc->vararg;
2187	}
2188
2189	asdl_arg_seq *kwonlyargs;
2190	asdl_expr_seq *kwdefaults;
2191	if (_make_kwargs(p, star_etc, &kwonlyargs, &kwdefaults) == -`1`) {
2192	return NULL;
2193	}
2194
2195	arg_ty kwarg = NULL;
2196	if (star_etc != NULL && star_etc->kwarg != NULL) {
2197	kwarg = star_etc->kwarg;
2198	}
2199
2200	return _PyAST_arguments(posonlyargs, posargs, vararg, kwonlyargs,
2201	kwdefaults, kwarg, posdefaults, p->arena);
2202	}
2203
2204
2205	/ Constructs an empty arguments_ty object, that gets used when a function accepts no*
2206	* arguments. */
2207	arguments_ty
2208	_PyPegen_empty_arguments(Parser *p)
2209	{
2210	asdl_arg_seq *posonlyargs = _Py_asdl_arg_seq_new(`0`, p->arena);
2211	if (!posonlyargs) {
2212	return NULL;
2213	}
2214	asdl_arg_seq *posargs = _Py_asdl_arg_seq_new(`0`, p->arena);
2215	if (!posargs) {
2216	return NULL;
2217	}
2218	asdl_expr_seq *posdefaults = _Py_asdl_expr_seq_new(`0`, p->arena);
2219	if (!posdefaults) {
2220	return NULL;
2221	}
2222	asdl_arg_seq *kwonlyargs = _Py_asdl_arg_seq_new(`0`, p->arena);
2223	if (!kwonlyargs) {
2224	return NULL;
2225	}
2226	asdl_expr_seq *kwdefaults = _Py_asdl_expr_seq_new(`0`, p->arena);
2227	if (!kwdefaults) {
2228	return NULL;
2229	}
2230
2231	return _PyAST_arguments(posonlyargs, posargs, NULL, kwonlyargs,
2232	kwdefaults, NULL, posdefaults, p->arena);
2233	}
2234
2235	/ Encapsulates the value of an operator_ty into an AugOperator struct /
2236	AugOperator *
2237	_PyPegen_augoperator(Parser *p, operator_ty kind)
2238	{
2239	AugOperator a = _PyArena_Malloc(p->arena, sizeof*(AugOperator));
2240	if (!a) {
2241	return NULL;
2242	}
2243	a->kind = kind;
2244	return a;
2245	}
2246
2247	/ Construct a FunctionDef equivalent to function_def, but with decorators /
2248	stmt_ty
2249	_PyPegen_function_def_decorators(Parser p, asdl_expr_seq decorators, stmt_ty function_def)
2250	{
2251	assert(function_def != NULL);
2252	if (function_def->kind == AsyncFunctionDef_kind) {
2253	return _PyAST_AsyncFunctionDef(
2254	function_def->v.FunctionDef.name, function_def->v.FunctionDef.args,
2255	function_def->v.FunctionDef.body, decorators, function_def->v.FunctionDef.returns,
2256	function_def->v.FunctionDef.type_comment, function_def->lineno,
2257	function_def->col_offset, function_def->end_lineno, function_def->end_col_offset,
2258	p->arena);
2259	}
2260
2261	return _PyAST_FunctionDef(
2262	function_def->v.FunctionDef.name, function_def->v.FunctionDef.args,
2263	function_def->v.FunctionDef.body, decorators,
2264	function_def->v.FunctionDef.returns,
2265	function_def->v.FunctionDef.type_comment, function_def->lineno,
2266	function_def->col_offset, function_def->end_lineno,
2267	function_def->end_col_offset, p->arena);
2268	}
2269
2270	/ Construct a ClassDef equivalent to class_def, but with decorators /
2271	stmt_ty
2272	_PyPegen_class_def_decorators(Parser p, asdl_expr_seq decorators, stmt_ty class_def)
2273	{
2274	assert(class_def != NULL);
2275	return _PyAST_ClassDef(
2276	class_def->v.ClassDef.name, class_def->v.ClassDef.bases,
2277	class_def->v.ClassDef.keywords, class_def->v.ClassDef.body, decorators,
2278	class_def->lineno, class_def->col_offset, class_def->end_lineno,
2279	class_def->end_col_offset, p->arena);
2280	}
2281
2282	/ Construct a KeywordOrStarred /
2283	KeywordOrStarred *
2284	_PyPegen_keyword_or_starred(Parser p, void* element, int* is_keyword)
2285	{
2286	KeywordOrStarred a = _PyArena_Malloc(p->arena, sizeof*(KeywordOrStarred));
2287	if (!a) {
2288	return NULL;
2289	}
2290	a->element = element;
2291	a->is_keyword = is_keyword;
2292	return a;
2293	}
2294
2295	/ Get the number of starred expressions in an asdl_seq* of KeywordOrStarreds /*
2296	static int
2297	_seq_number_of_starred_exprs(asdl_seq *seq)
2298	{
2299	int n = `0`;
2300	for (Py_ssize_t i = `0`, l = asdl_seq_LEN(seq); i < l; i++) {
2301	KeywordOrStarred *k = asdl_seq_GET_UNTYPED(seq, i);
2302	if (!k->is_keyword) {
2303	n++;
2304	}
2305	}
2306	return n;
2307	}
2308
2309	/ Extract the starred expressions of an asdl_seq* of KeywordOrStarreds /*
2310	asdl_expr_seq *
2311	_PyPegen_seq_extract_starred_exprs(Parser p, asdl_seq kwargs)
2312	{
2313	int new_len = _seq_number_of_starred_exprs(kwargs);
2314	if (new_len == `0`) {
2315	return NULL;
2316	}
2317	asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(new_len, p->arena);
2318	if (!new_seq) {
2319	return NULL;
2320	}
2321
2322	int idx = `0`;
2323	for (Py_ssize_t i = `0`, len = asdl_seq_LEN(kwargs); i < len; i++) {
2324	KeywordOrStarred *k = asdl_seq_GET_UNTYPED(kwargs, i);
2325	if (!k->is_keyword) {
2326	asdl_seq_SET(new_seq, idx++, k->element);
2327	}
2328	}
2329	return new_seq;
2330	}
2331
2332	/ Return a new asdl_seq* with only the keywords in kwargs /
2333	asdl_keyword_seq*
2334	_PyPegen_seq_delete_starred_exprs(Parser p, asdl_seq kwargs)
2335	{
2336	Py_ssize_t len = asdl_seq_LEN(kwargs);
2337	Py_ssize_t new_len = len - _seq_number_of_starred_exprs(kwargs);
2338	if (new_len == `0`) {
2339	return NULL;
2340	}
2341	asdl_keyword_seq *new_seq = _Py_asdl_keyword_seq_new(new_len, p->arena);
2342	if (!new_seq) {
2343	return NULL;
2344	}
2345
2346	int idx = `0`;
2347	for (Py_ssize_t i = `0`; i < len; i++) {
2348	KeywordOrStarred *k = asdl_seq_GET_UNTYPED(kwargs, i);
2349	if (k->is_keyword) {
2350	asdl_seq_SET(new_seq, idx++, k->element);
2351	}
2352	}
2353	return new_seq;
2354	}
2355
2356	expr_ty
2357	_PyPegen_concatenate_strings(Parser p, asdl_seq strings)
2358	{
2359	Py_ssize_t len = asdl_seq_LEN(strings);
2360	assert(len > `0`);
2361
2362	Token *first = asdl_seq_GET_UNTYPED(strings, `0`);
2363	Token *last = asdl_seq_GET_UNTYPED(strings, len - `1`);
2364
2365	int bytesmode = `0`;
2366	PyObject *bytes_str = NULL;
2367
2368	FstringParser state;
2369	_PyPegen_FstringParser_Init(&state);
2370
2371	for (Py_ssize_t i = `0`; i < len; i++) {
2372	Token *t = asdl_seq_GET_UNTYPED(strings, i);
2373
2374	int this_bytesmode;
2375	int this_rawmode;
2376	PyObject *s;
2377	const char *fstr;
2378	Py_ssize_t fstrlen = -`1`;
2379
2380	if (_PyPegen_parsestr(p, &this_bytesmode, &this_rawmode, &s, &fstr, &fstrlen, t) != `0`) {
2381	goto error;
2382	}
2383
2384	/ Check that we are not mixing bytes with unicode. /
2385	if (i != `0` && bytesmode != this_bytesmode) {
2386	RAISE_SYNTAX_ERROR("cannot mix bytes and nonbytes literals");
2387	Py_XDECREF(s);
2388	goto error;
2389	}
2390	bytesmode = this_bytesmode;
2391
2392	if (fstr != NULL) {
2393	assert(s == NULL && !bytesmode);
2394
2395	int result = _PyPegen_FstringParser_ConcatFstring(p, &state, &fstr, fstr + fstrlen,
2396	this_rawmode, `0`, first, t, last);
2397	if (result < `0`) {
2398	goto error;
2399	}
2400	}
2401	else {
2402	/ String or byte string. /
2403	assert(s != NULL && fstr == NULL);
2404	assert(bytesmode ? PyBytes_CheckExact(s) : PyUnicode_CheckExact(s));
2405
2406	if (bytesmode) {
2407	if (i == `0`) {
2408	bytes_str = s;
2409	}
2410	else {
2411	PyBytes_ConcatAndDel(&bytes_str, s);
2412	if (!bytes_str) {
2413	goto error;
2414	}
2415	}
2416	}
2417	else {
2418	/ This is a regular string. Concatenate it. /
2419	if (_PyPegen_FstringParser_ConcatAndDel(&state, s) < `0`) {
2420	goto error;
2421	}
2422	}
2423	}
2424	}
2425
2426	if (bytesmode) {
2427	if (_PyArena_AddPyObject(p->arena, bytes_str) < `0`) {
2428	goto error;
2429	}
2430	return _PyAST_Constant(bytes_str, NULL, first->lineno,
2431	first->col_offset, last->end_lineno,
2432	last->end_col_offset, p->arena);
2433	}
2434
2435	return _PyPegen_FstringParser_Finish(p, &state, first, last);
2436
2437	error:
2438	Py_XDECREF(bytes_str);
2439	_PyPegen_FstringParser_Dealloc(&state);
2440	if (PyErr_Occurred()) {
2441	raise_decode_error(p);
2442	}
2443	return NULL;
2444	}
2445
2446	expr_ty
2447	_PyPegen_ensure_imaginary(Parser *p, expr_ty exp)
2448	{
2449	if (exp->kind != Constant_kind \|\| !PyComplex_CheckExact(exp->v.Constant.value)) {
2450	RAISE_SYNTAX_ERROR_KNOWN_LOCATION(exp, "imaginary number required in complex literal");
2451	return NULL;
2452	}
2453	return exp;
2454	}
2455
2456	expr_ty
2457	_PyPegen_ensure_real(Parser *p, expr_ty exp)
2458	{
2459	if (exp->kind != Constant_kind \|\| PyComplex_CheckExact(exp->v.Constant.value)) {
2460	RAISE_SYNTAX_ERROR_KNOWN_LOCATION(exp, "real number required in complex literal");
2461	return NULL;
2462	}
2463	return exp;
2464	}
2465
2466	mod_ty
2467	_PyPegen_make_module(Parser p, asdl_stmt_seq a) {
2468	asdl_type_ignore_seq *type_ignores = NULL;
2469	Py_ssize_t num = p->type_ignore_comments.num_items;
2470	if (num > `0`) {
2471	// Turn the raw (comment, lineno) pairs into TypeIgnore objects in the arena
2472	type_ignores = _Py_asdl_type_ignore_seq_new(num, p->arena);
2473	if (type_ignores == NULL) {
2474	return NULL;
2475	}
2476	for (int i = `0`; i < num; i++) {
2477	PyObject *tag = _PyPegen_new_type_comment(p, p->type_ignore_comments.items[i].comment);
2478	if (tag == NULL) {
2479	return NULL;
2480	}
2481	type_ignore_ty ti = _PyAST_TypeIgnore(p->type_ignore_comments.items[i].lineno,
2482	tag, p->arena);
2483	if (ti == NULL) {
2484	return NULL;
2485	}
2486	asdl_seq_SET(type_ignores, i, ti);
2487	}
2488	}
2489	return _PyAST_Module(a, type_ignores, p->arena);
2490	}
2491
2492	// Error reporting helpers
2493
2494	expr_ty
2495	_PyPegen_get_invalid_target(expr_ty e, TARGETS_TYPE targets_type)
2496	{
2497	if (e == NULL) {
2498	return NULL;
2499	}
2500
2501	#define VISIT_CONTAINER(CONTAINER, TYPE) do { \
2502	Py_ssize_t len = asdl_seq_LEN((CONTAINER)->v.TYPE.elts);\
2503	for (Py_ssize_t i = 0; i < len; i++) {\
2504	expr_ty other = asdl_seq_GET((CONTAINER)->v.TYPE.elts, i);\
2505	expr_ty child = _PyPegen_get_invalid_target(other, targets_type);\
2506	if (child != NULL) {\
2507	return child;\
2508	}\
2509	}\
2510	} while (0)
2511
2512	// We only need to visit List and Tuple nodes recursively as those
2513	// are the only ones that can contain valid names in targets when
2514	// they are parsed as expressions. Any other kind of expression
2515	// that is a container (like Sets or Dicts) is directly invalid and
2516	// we don't need to visit it recursively.
2517
2518	switch (e->kind) {
2519	case List_kind:
2520	VISIT_CONTAINER(e, List);
2521	return NULL;
2522	case Tuple_kind:
2523	VISIT_CONTAINER(e, Tuple);
2524	return NULL;
2525	case Starred_kind:
2526	if (targets_type == DEL_TARGETS) {
2527	return e;
2528	}
2529	return _PyPegen_get_invalid_target(e->v.Starred.value, targets_type);
2530	case Compare_kind:
2531	// This is needed, because the `a in b` in `for a in b` gets parsed
2532	// as a comparison, and so we need to search the left side of the comparison
2533	// for invalid targets.
2534	if (targets_type == FOR_TARGETS) {
2535	cmpop_ty cmpop = (cmpop_ty) asdl_seq_GET(e->v.Compare.ops, `0`);
2536	if (cmpop == In) {
2537	return _PyPegen_get_invalid_target(e->v.Compare.left, targets_type);
2538	}
2539	return NULL;
2540	}
2541	return e;
2542	case Name_kind:
2543	case Subscript_kind:
2544	case Attribute_kind:
2545	return NULL;
2546	default:
2547	return e;
2548	}
2549	}
2550
2551	void _PyPegen_arguments_parsing_error(Parser p, expr_ty e) {
2552	int kwarg_unpacking = `0`;
2553	for (Py_ssize_t i = `0`, l = asdl_seq_LEN(e->v.Call.keywords); i < l; i++) {
2554	keyword_ty keyword = asdl_seq_GET(e->v.Call.keywords, i);
2555	if (!keyword->arg) {
2556	kwarg_unpacking = `1`;
2557	}
2558	}
2559
2560	const char *msg = NULL;
2561	if (kwarg_unpacking) {
2562	msg = "positional argument follows keyword argument unpacking";
2563	} else {
2564	msg = "positional argument follows keyword argument";
2565	}
2566
2567	return RAISE_SYNTAX_ERROR(msg);
2568	}
2569
2570
2571	expr_ty
2572	_PyPegen_get_last_comprehension_item(comprehension_ty comprehension) {
2573	if (comprehension->ifs == NULL \|\| asdl_seq_LEN(comprehension->ifs) == `0`) {
2574	return comprehension->iter;
2575	}
2576	return PyPegen_last_item(comprehension->ifs, expr_ty);
2577	}
2578
2579	void *
2580	_PyPegen_nonparen_genexp_in_call(Parser p, expr_ty args, asdl_comprehension_seq comprehensions)
2581	{
2582	/ The rule that calls this function is 'args for_if_clauses'.*
2583	For the input f(L, x for x in y), L and x are in args and
2584	the for is parsed as a for_if_clause. We have to check if
2585	len <= 1, so that input like dict((a, b) for a, b in x)
2586	gets successfully parsed and then we pass the last
2587	argument (x in the above example) as the location of the
2588	error /*
2589	Py_ssize_t len = asdl_seq_LEN(args->v.Call.args);
2590	if (len <= `1`) {
2591	return NULL;
2592	}
2593
2594	comprehension_ty last_comprehension = PyPegen_last_item(comprehensions, comprehension_ty);
2595
2596	return RAISE_SYNTAX_ERROR_KNOWN_RANGE(
2597	(expr_ty) asdl_seq_GET(args->v.Call.args, len - `1`),
2598	_PyPegen_get_last_comprehension_item(last_comprehension),
2599	"Generator expression must be parenthesized"
2600	);
2601	}
2602
2603
2604	expr_ty _PyPegen_collect_call_seqs(Parser p, asdl_expr_seq a, asdl_seq *b,
2605	int lineno, int col_offset, int end_lineno,
2606	int end_col_offset, PyArena *arena) {
2607	Py_ssize_t args_len = asdl_seq_LEN(a);
2608	Py_ssize_t total_len = args_len;
2609
2610	if (b == NULL) {
2611	return _PyAST_Call(_PyPegen_dummy_name(p), a, NULL, lineno, col_offset,
2612	end_lineno, end_col_offset, arena);
2613
2614	}
2615
2616	asdl_expr_seq *starreds = _PyPegen_seq_extract_starred_exprs(p, b);
2617	asdl_keyword_seq *keywords = _PyPegen_seq_delete_starred_exprs(p, b);
2618
2619	if (starreds) {
2620	total_len += asdl_seq_LEN(starreds);
2621	}
2622
2623	asdl_expr_seq *args = _Py_asdl_expr_seq_new(total_len, arena);
2624
2625	Py_ssize_t i = `0`;
2626	for (i = `0`; i < args_len; i++) {
2627	asdl_seq_SET(args, i, asdl_seq_GET(a, i));
2628	}
2629	for (; i < total_len; i++) {
2630	asdl_seq_SET(args, i, asdl_seq_GET(starreds, i - args_len));
2631	}
2632
2633	return _PyAST_Call(_PyPegen_dummy_name(p), args, keywords, lineno,
2634	col_offset, end_lineno, end_col_offset, arena);
2635	}
2636

Browse the source code of python/Parser/pegen.c