1/*
2 * Secret Labs' Regular Expression Engine
3 *
4 * regular expression matching engine
5 *
6 * partial history:
7 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-08-01 fl fixes for 1.6b1
10 * 2000-08-07 fl use PyOS_CheckStack() if available
11 * 2000-09-20 fl added expand method
12 * 2001-03-20 fl lots of fixes for 2.1b2
13 * 2001-04-15 fl export copyright as Python attribute, not global
14 * 2001-04-28 fl added __copy__ methods (work in progress)
15 * 2001-05-14 fl fixes for 1.5.2 compatibility
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18 * 2001-10-20 fl added split primitive; re-enable unicode for 1.6/2.0/2.1
19 * 2001-10-21 fl added sub/subn primitive
20 * 2001-10-24 fl added finditer primitive (for 2.2 only)
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22 * 2002-11-09 fl fixed empty sub/subn return type
23 * 2003-04-18 mvl fully support 4-byte codes
24 * 2003-10-17 gn implemented non recursive scheme
25 * 2013-02-04 mrab added fullmatch primitive
26 *
27 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
28 *
29 * This version of the SRE library can be redistributed under CNRI's
30 * Python 1.6 license. For any other use, please contact Secret Labs
31 * AB ([email protected]).
32 *
33 * Portions of this engine have been developed in cooperation with
34 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
35 * other compatibility work.
36 */
37
38static const char copyright[] =
39 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
40
41#define PY_SSIZE_T_CLEAN
42
43#include "Python.h"
44#include "pycore_long.h" // _PyLong_GetZero()
45#include "pycore_moduleobject.h" // _PyModule_GetState()
46#include "structmember.h" // PyMemberDef
47
48#include "sre.h"
49
50#define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
51
52#include <ctype.h>
53
54/* name of this module, minus the leading underscore */
55#if !defined(SRE_MODULE)
56#define SRE_MODULE "sre"
57#endif
58
59#define SRE_PY_MODULE "re"
60
61/* defining this one enables tracing */
62#undef VERBOSE
63
64/* -------------------------------------------------------------------- */
65
66#if defined(_MSC_VER)
67#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
68#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
69/* fastest possible local call under MSVC */
70#define LOCAL(type) static __inline type __fastcall
71#else
72#define LOCAL(type) static inline type
73#endif
74
75/* error codes */
76#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
77#define SRE_ERROR_STATE -2 /* illegal state */
78#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
79#define SRE_ERROR_MEMORY -9 /* out of memory */
80#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
81
82#if defined(VERBOSE)
83#define TRACE(v) printf v
84#else
85#define TRACE(v)
86#endif
87
88/* -------------------------------------------------------------------- */
89/* search engine state */
90
91#define SRE_IS_DIGIT(ch)\
92 ((ch) <= '9' && Py_ISDIGIT(ch))
93#define SRE_IS_SPACE(ch)\
94 ((ch) <= ' ' && Py_ISSPACE(ch))
95#define SRE_IS_LINEBREAK(ch)\
96 ((ch) == '\n')
97#define SRE_IS_WORD(ch)\
98 ((ch) <= 'z' && (Py_ISALNUM(ch) || (ch) == '_'))
99
100static unsigned int sre_lower_ascii(unsigned int ch)
101{
102 return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
103}
104
105/* locale-specific character predicates */
106/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
107 * warnings when c's type supports only numbers < N+1 */
108#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
109#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
110
111static unsigned int sre_lower_locale(unsigned int ch)
112{
113 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
114}
115
116static unsigned int sre_upper_locale(unsigned int ch)
117{
118 return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch);
119}
120
121/* unicode-specific character predicates */
122
123#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
124#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
125#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
126#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
127#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
128
129static unsigned int sre_lower_unicode(unsigned int ch)
130{
131 return (unsigned int) Py_UNICODE_TOLOWER(ch);
132}
133
134static unsigned int sre_upper_unicode(unsigned int ch)
135{
136 return (unsigned int) Py_UNICODE_TOUPPER(ch);
137}
138
139LOCAL(int)
140sre_category(SRE_CODE category, unsigned int ch)
141{
142 switch (category) {
143
144 case SRE_CATEGORY_DIGIT:
145 return SRE_IS_DIGIT(ch);
146 case SRE_CATEGORY_NOT_DIGIT:
147 return !SRE_IS_DIGIT(ch);
148 case SRE_CATEGORY_SPACE:
149 return SRE_IS_SPACE(ch);
150 case SRE_CATEGORY_NOT_SPACE:
151 return !SRE_IS_SPACE(ch);
152 case SRE_CATEGORY_WORD:
153 return SRE_IS_WORD(ch);
154 case SRE_CATEGORY_NOT_WORD:
155 return !SRE_IS_WORD(ch);
156 case SRE_CATEGORY_LINEBREAK:
157 return SRE_IS_LINEBREAK(ch);
158 case SRE_CATEGORY_NOT_LINEBREAK:
159 return !SRE_IS_LINEBREAK(ch);
160
161 case SRE_CATEGORY_LOC_WORD:
162 return SRE_LOC_IS_WORD(ch);
163 case SRE_CATEGORY_LOC_NOT_WORD:
164 return !SRE_LOC_IS_WORD(ch);
165
166 case SRE_CATEGORY_UNI_DIGIT:
167 return SRE_UNI_IS_DIGIT(ch);
168 case SRE_CATEGORY_UNI_NOT_DIGIT:
169 return !SRE_UNI_IS_DIGIT(ch);
170 case SRE_CATEGORY_UNI_SPACE:
171 return SRE_UNI_IS_SPACE(ch);
172 case SRE_CATEGORY_UNI_NOT_SPACE:
173 return !SRE_UNI_IS_SPACE(ch);
174 case SRE_CATEGORY_UNI_WORD:
175 return SRE_UNI_IS_WORD(ch);
176 case SRE_CATEGORY_UNI_NOT_WORD:
177 return !SRE_UNI_IS_WORD(ch);
178 case SRE_CATEGORY_UNI_LINEBREAK:
179 return SRE_UNI_IS_LINEBREAK(ch);
180 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
181 return !SRE_UNI_IS_LINEBREAK(ch);
182 }
183 return 0;
184}
185
186LOCAL(int)
187char_loc_ignore(SRE_CODE pattern, SRE_CODE ch)
188{
189 return ch == pattern
190 || (SRE_CODE) sre_lower_locale(ch) == pattern
191 || (SRE_CODE) sre_upper_locale(ch) == pattern;
192}
193
194
195/* helpers */
196
197static void
198data_stack_dealloc(SRE_STATE* state)
199{
200 if (state->data_stack) {
201 PyMem_Free(state->data_stack);
202 state->data_stack = NULL;
203 }
204 state->data_stack_size = state->data_stack_base = 0;
205}
206
207static int
208data_stack_grow(SRE_STATE* state, Py_ssize_t size)
209{
210 Py_ssize_t minsize, cursize;
211 minsize = state->data_stack_base+size;
212 cursize = state->data_stack_size;
213 if (cursize < minsize) {
214 void* stack;
215 cursize = minsize+minsize/4+1024;
216 TRACE(("allocate/grow stack %zd\n", cursize));
217 stack = PyMem_Realloc(state->data_stack, cursize);
218 if (!stack) {
219 data_stack_dealloc(state);
220 return SRE_ERROR_MEMORY;
221 }
222 state->data_stack = (char *)stack;
223 state->data_stack_size = cursize;
224 }
225 return 0;
226}
227
228/* generate 8-bit version */
229
230#define SRE_CHAR Py_UCS1
231#define SIZEOF_SRE_CHAR 1
232#define SRE(F) sre_ucs1_##F
233#include "sre_lib.h"
234
235/* generate 16-bit unicode version */
236
237#define SRE_CHAR Py_UCS2
238#define SIZEOF_SRE_CHAR 2
239#define SRE(F) sre_ucs2_##F
240#include "sre_lib.h"
241
242/* generate 32-bit unicode version */
243
244#define SRE_CHAR Py_UCS4
245#define SIZEOF_SRE_CHAR 4
246#define SRE(F) sre_ucs4_##F
247#include "sre_lib.h"
248
249/* -------------------------------------------------------------------- */
250/* factories and destructors */
251
252/* module state */
253typedef struct {
254 PyTypeObject *Pattern_Type;
255 PyTypeObject *Match_Type;
256 PyTypeObject *Scanner_Type;
257} _sremodulestate;
258
259static _sremodulestate *
260get_sre_module_state(PyObject *m)
261{
262 _sremodulestate *state = (_sremodulestate *)_PyModule_GetState(m);
263 assert(state);
264 return state;
265}
266
267static struct PyModuleDef sremodule;
268#define get_sre_module_state_by_class(cls) \
269 (get_sre_module_state(PyType_GetModule(cls)))
270
271/* see sre.h for object declarations */
272static PyObject*pattern_new_match(_sremodulestate *, PatternObject*, SRE_STATE*, Py_ssize_t);
273static PyObject *pattern_scanner(_sremodulestate *, PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);
274
275/*[clinic input]
276module _sre
277class _sre.SRE_Pattern "PatternObject *" "get_sre_module_state_by_class(tp)->Pattern_Type"
278class _sre.SRE_Match "MatchObject *" "get_sre_module_state_by_class(tp)->Match_Type"
279class _sre.SRE_Scanner "ScannerObject *" "get_sre_module_state_by_class(tp)->Scanner_Type"
280[clinic start generated code]*/
281/*[clinic end generated code: output=da39a3ee5e6b4b0d input=fe2966e32b66a231]*/
282
283/*[clinic input]
284_sre.getcodesize -> int
285[clinic start generated code]*/
286
287static int
288_sre_getcodesize_impl(PyObject *module)
289/*[clinic end generated code: output=e0db7ce34a6dd7b1 input=bd6f6ecf4916bb2b]*/
290{
291 return sizeof(SRE_CODE);
292}
293
294/*[clinic input]
295_sre.ascii_iscased -> bool
296
297 character: int
298 /
299
300[clinic start generated code]*/
301
302static int
303_sre_ascii_iscased_impl(PyObject *module, int character)
304/*[clinic end generated code: output=4f454b630fbd19a2 input=9f0bd952812c7ed3]*/
305{
306 unsigned int ch = (unsigned int)character;
307 return ch < 128 && Py_ISALPHA(ch);
308}
309
310/*[clinic input]
311_sre.unicode_iscased -> bool
312
313 character: int
314 /
315
316[clinic start generated code]*/
317
318static int
319_sre_unicode_iscased_impl(PyObject *module, int character)
320/*[clinic end generated code: output=9c5ddee0dc2bc258 input=51e42c3b8dddb78e]*/
321{
322 unsigned int ch = (unsigned int)character;
323 return ch != sre_lower_unicode(ch) || ch != sre_upper_unicode(ch);
324}
325
326/*[clinic input]
327_sre.ascii_tolower -> int
328
329 character: int
330 /
331
332[clinic start generated code]*/
333
334static int
335_sre_ascii_tolower_impl(PyObject *module, int character)
336/*[clinic end generated code: output=228294ed6ff2a612 input=272c609b5b61f136]*/
337{
338 return sre_lower_ascii(character);
339}
340
341/*[clinic input]
342_sre.unicode_tolower -> int
343
344 character: int
345 /
346
347[clinic start generated code]*/
348
349static int
350_sre_unicode_tolower_impl(PyObject *module, int character)
351/*[clinic end generated code: output=6422272d7d7fee65 input=91d708c5f3c2045a]*/
352{
353 return sre_lower_unicode(character);
354}
355
356LOCAL(void)
357state_reset(SRE_STATE* state)
358{
359 /* state->mark will be set to 0 in SRE_OP_MARK dynamically. */
360 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
361
362 state->lastmark = -1;
363 state->lastindex = -1;
364
365 state->repeat = NULL;
366
367 data_stack_dealloc(state);
368}
369
370static const void*
371getstring(PyObject* string, Py_ssize_t* p_length,
372 int* p_isbytes, int* p_charsize,
373 Py_buffer *view)
374{
375 /* given a python object, return a data pointer, a length (in
376 characters), and a character size. return NULL if the object
377 is not a string (or not compatible) */
378
379 /* Unicode objects do not support the buffer API. So, get the data
380 directly instead. */
381 if (PyUnicode_Check(string)) {
382 if (PyUnicode_READY(string) == -1)
383 return NULL;
384 *p_length = PyUnicode_GET_LENGTH(string);
385 *p_charsize = PyUnicode_KIND(string);
386 *p_isbytes = 0;
387 return PyUnicode_DATA(string);
388 }
389
390 /* get pointer to byte string buffer */
391 if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
392 PyErr_SetString(PyExc_TypeError, "expected string or bytes-like object");
393 return NULL;
394 }
395
396 *p_length = view->len;
397 *p_charsize = 1;
398 *p_isbytes = 1;
399
400 if (view->buf == NULL) {
401 PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
402 PyBuffer_Release(view);
403 view->buf = NULL;
404 return NULL;
405 }
406 return view->buf;
407}
408
409LOCAL(PyObject*)
410state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
411 Py_ssize_t start, Py_ssize_t end)
412{
413 /* prepare state object */
414
415 Py_ssize_t length;
416 int isbytes, charsize;
417 const void* ptr;
418
419 memset(state, 0, sizeof(SRE_STATE));
420
421 state->mark = PyMem_New(const void *, pattern->groups * 2);
422 if (!state->mark) {
423 PyErr_NoMemory();
424 goto err;
425 }
426 state->lastmark = -1;
427 state->lastindex = -1;
428
429 state->buffer.buf = NULL;
430 ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
431 if (!ptr)
432 goto err;
433
434 if (isbytes && pattern->isbytes == 0) {
435 PyErr_SetString(PyExc_TypeError,
436 "cannot use a string pattern on a bytes-like object");
437 goto err;
438 }
439 if (!isbytes && pattern->isbytes > 0) {
440 PyErr_SetString(PyExc_TypeError,
441 "cannot use a bytes pattern on a string-like object");
442 goto err;
443 }
444
445 /* adjust boundaries */
446 if (start < 0)
447 start = 0;
448 else if (start > length)
449 start = length;
450
451 if (end < 0)
452 end = 0;
453 else if (end > length)
454 end = length;
455
456 state->isbytes = isbytes;
457 state->charsize = charsize;
458 state->match_all = 0;
459 state->must_advance = 0;
460
461 state->beginning = ptr;
462
463 state->start = (void*) ((char*) ptr + start * state->charsize);
464 state->end = (void*) ((char*) ptr + end * state->charsize);
465
466 Py_INCREF(string);
467 state->string = string;
468 state->pos = start;
469 state->endpos = end;
470
471 return string;
472 err:
473 /* We add an explicit cast here because MSVC has a bug when
474 compiling C code where it believes that `const void**` cannot be
475 safely casted to `void*`, see bpo-39943 for details. */
476 PyMem_Free((void*) state->mark);
477 state->mark = NULL;
478 if (state->buffer.buf)
479 PyBuffer_Release(&state->buffer);
480 return NULL;
481}
482
483LOCAL(void)
484state_fini(SRE_STATE* state)
485{
486 if (state->buffer.buf)
487 PyBuffer_Release(&state->buffer);
488 Py_XDECREF(state->string);
489 data_stack_dealloc(state);
490 /* See above PyMem_Del for why we explicitly cast here. */
491 PyMem_Free((void*) state->mark);
492 state->mark = NULL;
493}
494
495/* calculate offset from start of string */
496#define STATE_OFFSET(state, member)\
497 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
498
499LOCAL(PyObject*)
500getslice(int isbytes, const void *ptr,
501 PyObject* string, Py_ssize_t start, Py_ssize_t end)
502{
503 if (isbytes) {
504 if (PyBytes_CheckExact(string) &&
505 start == 0 && end == PyBytes_GET_SIZE(string)) {
506 Py_INCREF(string);
507 return string;
508 }
509 return PyBytes_FromStringAndSize(
510 (const char *)ptr + start, end - start);
511 }
512 else {
513 return PyUnicode_Substring(string, start, end);
514 }
515}
516
517LOCAL(PyObject*)
518state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
519{
520 Py_ssize_t i, j;
521
522 index = (index - 1) * 2;
523
524 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
525 if (empty)
526 /* want empty string */
527 i = j = 0;
528 else {
529 Py_RETURN_NONE;
530 }
531 } else {
532 i = STATE_OFFSET(state, state->mark[index]);
533 j = STATE_OFFSET(state, state->mark[index+1]);
534 }
535
536 return getslice(state->isbytes, state->beginning, string, i, j);
537}
538
539static void
540pattern_error(Py_ssize_t status)
541{
542 switch (status) {
543 case SRE_ERROR_RECURSION_LIMIT:
544 /* This error code seems to be unused. */
545 PyErr_SetString(
546 PyExc_RecursionError,
547 "maximum recursion limit exceeded"
548 );
549 break;
550 case SRE_ERROR_MEMORY:
551 PyErr_NoMemory();
552 break;
553 case SRE_ERROR_INTERRUPTED:
554 /* An exception has already been raised, so let it fly */
555 break;
556 default:
557 /* other error codes indicate compiler/engine bugs */
558 PyErr_SetString(
559 PyExc_RuntimeError,
560 "internal error in regular expression engine"
561 );
562 }
563}
564
565static int
566pattern_traverse(PatternObject *self, visitproc visit, void *arg)
567{
568 Py_VISIT(Py_TYPE(self));
569 Py_VISIT(self->groupindex);
570 Py_VISIT(self->indexgroup);
571 Py_VISIT(self->pattern);
572 return 0;
573}
574
575static int
576pattern_clear(PatternObject *self)
577{
578 Py_CLEAR(self->groupindex);
579 Py_CLEAR(self->indexgroup);
580 Py_CLEAR(self->pattern);
581 return 0;
582}
583
584static void
585pattern_dealloc(PatternObject* self)
586{
587 PyTypeObject *tp = Py_TYPE(self);
588
589 PyObject_GC_UnTrack(self);
590 if (self->weakreflist != NULL) {
591 PyObject_ClearWeakRefs((PyObject *) self);
592 }
593 (void)pattern_clear(self);
594 tp->tp_free(self);
595 Py_DECREF(tp);
596}
597
598LOCAL(Py_ssize_t)
599sre_match(SRE_STATE* state, SRE_CODE* pattern)
600{
601 if (state->charsize == 1)
602 return sre_ucs1_match(state, pattern, 1);
603 if (state->charsize == 2)
604 return sre_ucs2_match(state, pattern, 1);
605 assert(state->charsize == 4);
606 return sre_ucs4_match(state, pattern, 1);
607}
608
609LOCAL(Py_ssize_t)
610sre_search(SRE_STATE* state, SRE_CODE* pattern)
611{
612 if (state->charsize == 1)
613 return sre_ucs1_search(state, pattern);
614 if (state->charsize == 2)
615 return sre_ucs2_search(state, pattern);
616 assert(state->charsize == 4);
617 return sre_ucs4_search(state, pattern);
618}
619
620/*[clinic input]
621_sre.SRE_Pattern.match
622
623 cls: defining_class
624 /
625 string: object
626 pos: Py_ssize_t = 0
627 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
628
629Matches zero or more characters at the beginning of the string.
630[clinic start generated code]*/
631
632static PyObject *
633_sre_SRE_Pattern_match_impl(PatternObject *self, PyTypeObject *cls,
634 PyObject *string, Py_ssize_t pos,
635 Py_ssize_t endpos)
636/*[clinic end generated code: output=ec6208ea58a0cca0 input=4bdb9c3e564d13ac]*/
637{
638 _sremodulestate *module_state = get_sre_module_state_by_class(cls);
639 SRE_STATE state;
640 Py_ssize_t status;
641 PyObject *match;
642
643 if (!state_init(&state, (PatternObject *)self, string, pos, endpos))
644 return NULL;
645
646 state.ptr = state.start;
647
648 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
649
650 status = sre_match(&state, PatternObject_GetCode(self));
651
652 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
653 if (PyErr_Occurred()) {
654 state_fini(&state);
655 return NULL;
656 }
657
658 match = pattern_new_match(module_state, self, &state, status);
659 state_fini(&state);
660 return match;
661}
662
663/*[clinic input]
664_sre.SRE_Pattern.fullmatch
665
666 cls: defining_class
667 /
668 string: object
669 pos: Py_ssize_t = 0
670 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
671
672Matches against all of the string.
673[clinic start generated code]*/
674
675static PyObject *
676_sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyTypeObject *cls,
677 PyObject *string, Py_ssize_t pos,
678 Py_ssize_t endpos)
679/*[clinic end generated code: output=625b75b027ef94da input=50981172ab0fcfdd]*/
680{
681 _sremodulestate *module_state = get_sre_module_state_by_class(cls);
682 SRE_STATE state;
683 Py_ssize_t status;
684 PyObject *match;
685
686 if (!state_init(&state, self, string, pos, endpos))
687 return NULL;
688
689 state.ptr = state.start;
690
691 TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
692
693 state.match_all = 1;
694 status = sre_match(&state, PatternObject_GetCode(self));
695
696 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
697 if (PyErr_Occurred()) {
698 state_fini(&state);
699 return NULL;
700 }
701
702 match = pattern_new_match(module_state, self, &state, status);
703 state_fini(&state);
704 return match;
705}
706
707/*[clinic input]
708_sre.SRE_Pattern.search
709
710 cls: defining_class
711 /
712 string: object
713 pos: Py_ssize_t = 0
714 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
715
716Scan through string looking for a match, and return a corresponding match object instance.
717
718Return None if no position in the string matches.
719[clinic start generated code]*/
720
721static PyObject *
722_sre_SRE_Pattern_search_impl(PatternObject *self, PyTypeObject *cls,
723 PyObject *string, Py_ssize_t pos,
724 Py_ssize_t endpos)
725/*[clinic end generated code: output=bd7f2d9d583e1463 input=afa9afb66a74a4b3]*/
726{
727 _sremodulestate *module_state = get_sre_module_state_by_class(cls);
728 SRE_STATE state;
729 Py_ssize_t status;
730 PyObject *match;
731
732 if (!state_init(&state, self, string, pos, endpos))
733 return NULL;
734
735 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
736
737 status = sre_search(&state, PatternObject_GetCode(self));
738
739 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
740
741 if (PyErr_Occurred()) {
742 state_fini(&state);
743 return NULL;
744 }
745
746 match = pattern_new_match(module_state, self, &state, status);
747 state_fini(&state);
748 return match;
749}
750
751static PyObject*
752call(const char* module, const char* function, PyObject* args)
753{
754 PyObject* name;
755 PyObject* mod;
756 PyObject* func;
757 PyObject* result;
758
759 if (!args)
760 return NULL;
761 name = PyUnicode_FromString(module);
762 if (!name)
763 return NULL;
764 mod = PyImport_Import(name);
765 Py_DECREF(name);
766 if (!mod)
767 return NULL;
768 func = PyObject_GetAttrString(mod, function);
769 Py_DECREF(mod);
770 if (!func)
771 return NULL;
772 result = PyObject_CallObject(func, args);
773 Py_DECREF(func);
774 Py_DECREF(args);
775 return result;
776}
777
778/*[clinic input]
779_sre.SRE_Pattern.findall
780
781 string: object
782 pos: Py_ssize_t = 0
783 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
784
785Return a list of all non-overlapping matches of pattern in string.
786[clinic start generated code]*/
787
788static PyObject *
789_sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
790 Py_ssize_t pos, Py_ssize_t endpos)
791/*[clinic end generated code: output=f4966baceea60aca input=5b6a4ee799741563]*/
792{
793 SRE_STATE state;
794 PyObject* list;
795 Py_ssize_t status;
796 Py_ssize_t i, b, e;
797
798 if (!state_init(&state, self, string, pos, endpos))
799 return NULL;
800
801 list = PyList_New(0);
802 if (!list) {
803 state_fini(&state);
804 return NULL;
805 }
806
807 while (state.start <= state.end) {
808
809 PyObject* item;
810
811 state_reset(&state);
812
813 state.ptr = state.start;
814
815 status = sre_search(&state, PatternObject_GetCode(self));
816 if (PyErr_Occurred())
817 goto error;
818
819 if (status <= 0) {
820 if (status == 0)
821 break;
822 pattern_error(status);
823 goto error;
824 }
825
826 /* don't bother to build a match object */
827 switch (self->groups) {
828 case 0:
829 b = STATE_OFFSET(&state, state.start);
830 e = STATE_OFFSET(&state, state.ptr);
831 item = getslice(state.isbytes, state.beginning,
832 string, b, e);
833 if (!item)
834 goto error;
835 break;
836 case 1:
837 item = state_getslice(&state, 1, string, 1);
838 if (!item)
839 goto error;
840 break;
841 default:
842 item = PyTuple_New(self->groups);
843 if (!item)
844 goto error;
845 for (i = 0; i < self->groups; i++) {
846 PyObject* o = state_getslice(&state, i+1, string, 1);
847 if (!o) {
848 Py_DECREF(item);
849 goto error;
850 }
851 PyTuple_SET_ITEM(item, i, o);
852 }
853 break;
854 }
855
856 status = PyList_Append(list, item);
857 Py_DECREF(item);
858 if (status < 0)
859 goto error;
860
861 state.must_advance = (state.ptr == state.start);
862 state.start = state.ptr;
863 }
864
865 state_fini(&state);
866 return list;
867
868error:
869 Py_DECREF(list);
870 state_fini(&state);
871 return NULL;
872
873}
874
875/*[clinic input]
876_sre.SRE_Pattern.finditer
877
878 cls: defining_class
879 /
880 string: object
881 pos: Py_ssize_t = 0
882 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
883
884Return an iterator over all non-overlapping matches for the RE pattern in string.
885
886For each match, the iterator returns a match object.
887[clinic start generated code]*/
888
889static PyObject *
890_sre_SRE_Pattern_finditer_impl(PatternObject *self, PyTypeObject *cls,
891 PyObject *string, Py_ssize_t pos,
892 Py_ssize_t endpos)
893/*[clinic end generated code: output=1791dbf3618ade56 input=812e332a4848cbaf]*/
894{
895 _sremodulestate *module_state = get_sre_module_state_by_class(cls);
896 PyObject* scanner;
897 PyObject* search;
898 PyObject* iterator;
899
900 scanner = pattern_scanner(module_state, self, string, pos, endpos);
901 if (!scanner)
902 return NULL;
903
904 search = PyObject_GetAttrString(scanner, "search");
905 Py_DECREF(scanner);
906 if (!search)
907 return NULL;
908
909 iterator = PyCallIter_New(search, Py_None);
910 Py_DECREF(search);
911
912 return iterator;
913}
914
915/*[clinic input]
916_sre.SRE_Pattern.scanner
917
918 cls: defining_class
919 /
920 string: object
921 pos: Py_ssize_t = 0
922 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
923
924[clinic start generated code]*/
925
926static PyObject *
927_sre_SRE_Pattern_scanner_impl(PatternObject *self, PyTypeObject *cls,
928 PyObject *string, Py_ssize_t pos,
929 Py_ssize_t endpos)
930/*[clinic end generated code: output=f70cd506112f1bd9 input=2e487e5151bcee4c]*/
931{
932 _sremodulestate *module_state = get_sre_module_state_by_class(cls);
933
934 return pattern_scanner(module_state, self, string, pos, endpos);
935}
936
937/*[clinic input]
938_sre.SRE_Pattern.split
939
940 string: object
941 maxsplit: Py_ssize_t = 0
942
943Split string by the occurrences of pattern.
944[clinic start generated code]*/
945
946static PyObject *
947_sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
948 Py_ssize_t maxsplit)
949/*[clinic end generated code: output=7ac66f381c45e0be input=1eeeb10dafc9947a]*/
950{
951 SRE_STATE state;
952 PyObject* list;
953 PyObject* item;
954 Py_ssize_t status;
955 Py_ssize_t n;
956 Py_ssize_t i;
957 const void* last;
958
959 assert(self->codesize != 0);
960
961 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))
962 return NULL;
963
964 list = PyList_New(0);
965 if (!list) {
966 state_fini(&state);
967 return NULL;
968 }
969
970 n = 0;
971 last = state.start;
972
973 while (!maxsplit || n < maxsplit) {
974
975 state_reset(&state);
976
977 state.ptr = state.start;
978
979 status = sre_search(&state, PatternObject_GetCode(self));
980 if (PyErr_Occurred())
981 goto error;
982
983 if (status <= 0) {
984 if (status == 0)
985 break;
986 pattern_error(status);
987 goto error;
988 }
989
990 /* get segment before this match */
991 item = getslice(state.isbytes, state.beginning,
992 string, STATE_OFFSET(&state, last),
993 STATE_OFFSET(&state, state.start)
994 );
995 if (!item)
996 goto error;
997 status = PyList_Append(list, item);
998 Py_DECREF(item);
999 if (status < 0)
1000 goto error;
1001
1002 /* add groups (if any) */
1003 for (i = 0; i < self->groups; i++) {
1004 item = state_getslice(&state, i+1, string, 0);
1005 if (!item)
1006 goto error;
1007 status = PyList_Append(list, item);
1008 Py_DECREF(item);
1009 if (status < 0)
1010 goto error;
1011 }
1012
1013 n = n + 1;
1014 state.must_advance = (state.ptr == state.start);
1015 last = state.start = state.ptr;
1016
1017 }
1018
1019 /* get segment following last match (even if empty) */
1020 item = getslice(state.isbytes, state.beginning,
1021 string, STATE_OFFSET(&state, last), state.endpos
1022 );
1023 if (!item)
1024 goto error;
1025 status = PyList_Append(list, item);
1026 Py_DECREF(item);
1027 if (status < 0)
1028 goto error;
1029
1030 state_fini(&state);
1031 return list;
1032
1033error:
1034 Py_DECREF(list);
1035 state_fini(&state);
1036 return NULL;
1037
1038}
1039
1040static PyObject*
1041pattern_subx(_sremodulestate* module_state,
1042 PatternObject* self,
1043 PyObject* ptemplate,
1044 PyObject* string,
1045 Py_ssize_t count,
1046 Py_ssize_t subn)
1047{
1048 SRE_STATE state;
1049 PyObject* list;
1050 PyObject* joiner;
1051 PyObject* item;
1052 PyObject* filter;
1053 PyObject* match;
1054 const void* ptr;
1055 Py_ssize_t status;
1056 Py_ssize_t n;
1057 Py_ssize_t i, b, e;
1058 int isbytes, charsize;
1059 int filter_is_callable;
1060 Py_buffer view;
1061
1062 if (PyCallable_Check(ptemplate)) {
1063 /* sub/subn takes either a function or a template */
1064 filter = ptemplate;
1065 Py_INCREF(filter);
1066 filter_is_callable = 1;
1067 } else {
1068 /* if not callable, check if it's a literal string */
1069 int literal;
1070 view.buf = NULL;
1071 ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
1072 if (ptr) {
1073 if (charsize == 1)
1074 literal = memchr(ptr, '\\', n) == NULL;
1075 else
1076 literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
1077 } else {
1078 PyErr_Clear();
1079 literal = 0;
1080 }
1081 if (view.buf)
1082 PyBuffer_Release(&view);
1083 if (literal) {
1084 filter = ptemplate;
1085 Py_INCREF(filter);
1086 filter_is_callable = 0;
1087 } else {
1088 /* not a literal; hand it over to the template compiler */
1089 filter = call(
1090 SRE_PY_MODULE, "_subx",
1091 PyTuple_Pack(2, self, ptemplate)
1092 );
1093 if (!filter)
1094 return NULL;
1095 filter_is_callable = PyCallable_Check(filter);
1096 }
1097 }
1098
1099 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) {
1100 Py_DECREF(filter);
1101 return NULL;
1102 }
1103
1104 list = PyList_New(0);
1105 if (!list) {
1106 Py_DECREF(filter);
1107 state_fini(&state);
1108 return NULL;
1109 }
1110
1111 n = i = 0;
1112
1113 while (!count || n < count) {
1114
1115 state_reset(&state);
1116
1117 state.ptr = state.start;
1118
1119 status = sre_search(&state, PatternObject_GetCode(self));
1120 if (PyErr_Occurred())
1121 goto error;
1122
1123 if (status <= 0) {
1124 if (status == 0)
1125 break;
1126 pattern_error(status);
1127 goto error;
1128 }
1129
1130 b = STATE_OFFSET(&state, state.start);
1131 e = STATE_OFFSET(&state, state.ptr);
1132
1133 if (i < b) {
1134 /* get segment before this match */
1135 item = getslice(state.isbytes, state.beginning,
1136 string, i, b);
1137 if (!item)
1138 goto error;
1139 status = PyList_Append(list, item);
1140 Py_DECREF(item);
1141 if (status < 0)
1142 goto error;
1143
1144 }
1145
1146 if (filter_is_callable) {
1147 /* pass match object through filter */
1148 match = pattern_new_match(module_state, self, &state, 1);
1149 if (!match)
1150 goto error;
1151 item = PyObject_CallOneArg(filter, match);
1152 Py_DECREF(match);
1153 if (!item)
1154 goto error;
1155 } else {
1156 /* filter is literal string */
1157 item = filter;
1158 Py_INCREF(item);
1159 }
1160
1161 /* add to list */
1162 if (item != Py_None) {
1163 status = PyList_Append(list, item);
1164 Py_DECREF(item);
1165 if (status < 0)
1166 goto error;
1167 }
1168
1169 i = e;
1170 n = n + 1;
1171 state.must_advance = (state.ptr == state.start);
1172 state.start = state.ptr;
1173 }
1174
1175 /* get segment following last match */
1176 if (i < state.endpos) {
1177 item = getslice(state.isbytes, state.beginning,
1178 string, i, state.endpos);
1179 if (!item)
1180 goto error;
1181 status = PyList_Append(list, item);
1182 Py_DECREF(item);
1183 if (status < 0)
1184 goto error;
1185 }
1186
1187 state_fini(&state);
1188
1189 Py_DECREF(filter);
1190
1191 /* convert list to single string (also removes list) */
1192 joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
1193 if (!joiner) {
1194 Py_DECREF(list);
1195 return NULL;
1196 }
1197 if (PyList_GET_SIZE(list) == 0) {
1198 Py_DECREF(list);
1199 item = joiner;
1200 }
1201 else {
1202 if (state.isbytes)
1203 item = _PyBytes_Join(joiner, list);
1204 else
1205 item = PyUnicode_Join(joiner, list);
1206 Py_DECREF(joiner);
1207 Py_DECREF(list);
1208 if (!item)
1209 return NULL;
1210 }
1211
1212 if (subn)
1213 return Py_BuildValue("Nn", item, n);
1214
1215 return item;
1216
1217error:
1218 Py_DECREF(list);
1219 state_fini(&state);
1220 Py_DECREF(filter);
1221 return NULL;
1222
1223}
1224
1225/*[clinic input]
1226_sre.SRE_Pattern.sub
1227
1228 cls: defining_class
1229 /
1230 repl: object
1231 string: object
1232 count: Py_ssize_t = 0
1233
1234Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.
1235[clinic start generated code]*/
1236
1237static PyObject *
1238_sre_SRE_Pattern_sub_impl(PatternObject *self, PyTypeObject *cls,
1239 PyObject *repl, PyObject *string, Py_ssize_t count)
1240/*[clinic end generated code: output=4be141ab04bca60d input=d8d1d4ac2311a07c]*/
1241{
1242 _sremodulestate *module_state = get_sre_module_state_by_class(cls);
1243
1244 return pattern_subx(module_state, self, repl, string, count, 0);
1245}
1246
1247/*[clinic input]
1248_sre.SRE_Pattern.subn
1249
1250 cls: defining_class
1251 /
1252 repl: object
1253 string: object
1254 count: Py_ssize_t = 0
1255
1256Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.
1257[clinic start generated code]*/
1258
1259static PyObject *
1260_sre_SRE_Pattern_subn_impl(PatternObject *self, PyTypeObject *cls,
1261 PyObject *repl, PyObject *string,
1262 Py_ssize_t count)
1263/*[clinic end generated code: output=da02fd85258b1e1f input=8b78a65b8302e58d]*/
1264{
1265 _sremodulestate *module_state = get_sre_module_state_by_class(cls);
1266
1267 return pattern_subx(module_state, self, repl, string, count, 1);
1268}
1269
1270/*[clinic input]
1271_sre.SRE_Pattern.__copy__
1272
1273[clinic start generated code]*/
1274
1275static PyObject *
1276_sre_SRE_Pattern___copy___impl(PatternObject *self)
1277/*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/
1278{
1279 Py_INCREF(self);
1280 return (PyObject *)self;
1281}
1282
1283/*[clinic input]
1284_sre.SRE_Pattern.__deepcopy__
1285
1286 memo: object
1287 /
1288
1289[clinic start generated code]*/
1290
1291static PyObject *
1292_sre_SRE_Pattern___deepcopy__(PatternObject *self, PyObject *memo)
1293/*[clinic end generated code: output=2ad25679c1f1204a input=a465b1602f997bed]*/
1294{
1295 Py_INCREF(self);
1296 return (PyObject *)self;
1297}
1298
1299static PyObject *
1300pattern_repr(PatternObject *obj)
1301{
1302 static const struct {
1303 const char *name;
1304 int value;
1305 } flag_names[] = {
1306 {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
1307 {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1308 {"re.LOCALE", SRE_FLAG_LOCALE},
1309 {"re.MULTILINE", SRE_FLAG_MULTILINE},
1310 {"re.DOTALL", SRE_FLAG_DOTALL},
1311 {"re.UNICODE", SRE_FLAG_UNICODE},
1312 {"re.VERBOSE", SRE_FLAG_VERBOSE},
1313 {"re.DEBUG", SRE_FLAG_DEBUG},
1314 {"re.ASCII", SRE_FLAG_ASCII},
1315 };
1316 PyObject *result = NULL;
1317 PyObject *flag_items;
1318 size_t i;
1319 int flags = obj->flags;
1320
1321 /* Omit re.UNICODE for valid string patterns. */
1322 if (obj->isbytes == 0 &&
1323 (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1324 SRE_FLAG_UNICODE)
1325 flags &= ~SRE_FLAG_UNICODE;
1326
1327 flag_items = PyList_New(0);
1328 if (!flag_items)
1329 return NULL;
1330
1331 for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1332 if (flags & flag_names[i].value) {
1333 PyObject *item = PyUnicode_FromString(flag_names[i].name);
1334 if (!item)
1335 goto done;
1336
1337 if (PyList_Append(flag_items, item) < 0) {
1338 Py_DECREF(item);
1339 goto done;
1340 }
1341 Py_DECREF(item);
1342 flags &= ~flag_names[i].value;
1343 }
1344 }
1345 if (flags) {
1346 PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1347 if (!item)
1348 goto done;
1349
1350 if (PyList_Append(flag_items, item) < 0) {
1351 Py_DECREF(item);
1352 goto done;
1353 }
1354 Py_DECREF(item);
1355 }
1356
1357 if (PyList_Size(flag_items) > 0) {
1358 PyObject *flags_result;
1359 PyObject *sep = PyUnicode_FromString("|");
1360 if (!sep)
1361 goto done;
1362 flags_result = PyUnicode_Join(sep, flag_items);
1363 Py_DECREF(sep);
1364 if (!flags_result)
1365 goto done;
1366 result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1367 obj->pattern, flags_result);
1368 Py_DECREF(flags_result);
1369 }
1370 else {
1371 result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1372 }
1373
1374done:
1375 Py_DECREF(flag_items);
1376 return result;
1377}
1378
1379PyDoc_STRVAR(pattern_doc, "Compiled regular expression object.");
1380
1381/* PatternObject's 'groupindex' method. */
1382static PyObject *
1383pattern_groupindex(PatternObject *self, void *Py_UNUSED(ignored))
1384{
1385 if (self->groupindex == NULL)
1386 return PyDict_New();
1387 return PyDictProxy_New(self->groupindex);
1388}
1389
1390static int _validate(PatternObject *self); /* Forward */
1391
1392/*[clinic input]
1393_sre.compile
1394
1395 pattern: object
1396 flags: int
1397 code: object(subclass_of='&PyList_Type')
1398 groups: Py_ssize_t
1399 groupindex: object(subclass_of='&PyDict_Type')
1400 indexgroup: object(subclass_of='&PyTuple_Type')
1401
1402[clinic start generated code]*/
1403
1404static PyObject *
1405_sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
1406 PyObject *code, Py_ssize_t groups, PyObject *groupindex,
1407 PyObject *indexgroup)
1408/*[clinic end generated code: output=ef9c2b3693776404 input=0a68476dbbe5db30]*/
1409{
1410 /* "compile" pattern descriptor to pattern object */
1411
1412 _sremodulestate *module_state = get_sre_module_state(module);
1413 PatternObject* self;
1414 Py_ssize_t i, n;
1415
1416 n = PyList_GET_SIZE(code);
1417 /* coverity[ampersand_in_size] */
1418 self = PyObject_GC_NewVar(PatternObject, module_state->Pattern_Type, n);
1419 if (!self)
1420 return NULL;
1421 self->weakreflist = NULL;
1422 self->pattern = NULL;
1423 self->groupindex = NULL;
1424 self->indexgroup = NULL;
1425
1426 self->codesize = n;
1427
1428 for (i = 0; i < n; i++) {
1429 PyObject *o = PyList_GET_ITEM(code, i);
1430 unsigned long value = PyLong_AsUnsignedLong(o);
1431 self->code[i] = (SRE_CODE) value;
1432 if ((unsigned long) self->code[i] != value) {
1433 PyErr_SetString(PyExc_OverflowError,
1434 "regular expression code size limit exceeded");
1435 break;
1436 }
1437 }
1438 PyObject_GC_Track(self);
1439
1440 if (PyErr_Occurred()) {
1441 Py_DECREF(self);
1442 return NULL;
1443 }
1444
1445 if (pattern == Py_None) {
1446 self->isbytes = -1;
1447 }
1448 else {
1449 Py_ssize_t p_length;
1450 int charsize;
1451 Py_buffer view;
1452 view.buf = NULL;
1453 if (!getstring(pattern, &p_length, &self->isbytes,
1454 &charsize, &view)) {
1455 Py_DECREF(self);
1456 return NULL;
1457 }
1458 if (view.buf)
1459 PyBuffer_Release(&view);
1460 }
1461
1462 Py_INCREF(pattern);
1463 self->pattern = pattern;
1464
1465 self->flags = flags;
1466
1467 self->groups = groups;
1468
1469 if (PyDict_GET_SIZE(groupindex) > 0) {
1470 Py_INCREF(groupindex);
1471 self->groupindex = groupindex;
1472 if (PyTuple_GET_SIZE(indexgroup) > 0) {
1473 Py_INCREF(indexgroup);
1474 self->indexgroup = indexgroup;
1475 }
1476 }
1477
1478 if (!_validate(self)) {
1479 Py_DECREF(self);
1480 return NULL;
1481 }
1482
1483 return (PyObject*) self;
1484}
1485
1486/* -------------------------------------------------------------------- */
1487/* Code validation */
1488
1489/* To learn more about this code, have a look at the _compile() function in
1490 Lib/sre_compile.py. The validation functions below checks the code array
1491 for conformance with the code patterns generated there.
1492
1493 The nice thing about the generated code is that it is position-independent:
1494 all jumps are relative jumps forward. Also, jumps don't cross each other:
1495 the target of a later jump is always earlier than the target of an earlier
1496 jump. IOW, this is okay:
1497
1498 J---------J-------T--------T
1499 \ \_____/ /
1500 \______________________/
1501
1502 but this is not:
1503
1504 J---------J-------T--------T
1505 \_________\_____/ /
1506 \____________/
1507
1508 It also helps that SRE_CODE is always an unsigned type.
1509*/
1510
1511/* Defining this one enables tracing of the validator */
1512#undef VVERBOSE
1513
1514/* Trace macro for the validator */
1515#if defined(VVERBOSE)
1516#define VTRACE(v) printf v
1517#else
1518#define VTRACE(v) do {} while(0) /* do nothing */
1519#endif
1520
1521/* Report failure */
1522#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
1523
1524/* Extract opcode, argument, or skip count from code array */
1525#define GET_OP \
1526 do { \
1527 VTRACE(("%p: ", code)); \
1528 if (code >= end) FAIL; \
1529 op = *code++; \
1530 VTRACE(("%lu (op)\n", (unsigned long)op)); \
1531 } while (0)
1532#define GET_ARG \
1533 do { \
1534 VTRACE(("%p= ", code)); \
1535 if (code >= end) FAIL; \
1536 arg = *code++; \
1537 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \
1538 } while (0)
1539#define GET_SKIP_ADJ(adj) \
1540 do { \
1541 VTRACE(("%p= ", code)); \
1542 if (code >= end) FAIL; \
1543 skip = *code; \
1544 VTRACE(("%lu (skip to %p)\n", \
1545 (unsigned long)skip, code+skip)); \
1546 if (skip-adj > (uintptr_t)(end - code)) \
1547 FAIL; \
1548 code++; \
1549 } while (0)
1550#define GET_SKIP GET_SKIP_ADJ(0)
1551
1552static int
1553_validate_charset(SRE_CODE *code, SRE_CODE *end)
1554{
1555 /* Some variables are manipulated by the macros above */
1556 SRE_CODE op;
1557 SRE_CODE arg;
1558 SRE_CODE offset;
1559 int i;
1560
1561 while (code < end) {
1562 GET_OP;
1563 switch (op) {
1564
1565 case SRE_OP_NEGATE:
1566 break;
1567
1568 case SRE_OP_LITERAL:
1569 GET_ARG;
1570 break;
1571
1572 case SRE_OP_RANGE:
1573 case SRE_OP_RANGE_UNI_IGNORE:
1574 GET_ARG;
1575 GET_ARG;
1576 break;
1577
1578 case SRE_OP_CHARSET:
1579 offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
1580 if (offset > (uintptr_t)(end - code))
1581 FAIL;
1582 code += offset;
1583 break;
1584
1585 case SRE_OP_BIGCHARSET:
1586 GET_ARG; /* Number of blocks */
1587 offset = 256/sizeof(SRE_CODE); /* 256-byte table */
1588 if (offset > (uintptr_t)(end - code))
1589 FAIL;
1590 /* Make sure that each byte points to a valid block */
1591 for (i = 0; i < 256; i++) {
1592 if (((unsigned char *)code)[i] >= arg)
1593 FAIL;
1594 }
1595 code += offset;
1596 offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
1597 if (offset > (uintptr_t)(end - code))
1598 FAIL;
1599 code += offset;
1600 break;
1601
1602 case SRE_OP_CATEGORY:
1603 GET_ARG;
1604 switch (arg) {
1605 case SRE_CATEGORY_DIGIT:
1606 case SRE_CATEGORY_NOT_DIGIT:
1607 case SRE_CATEGORY_SPACE:
1608 case SRE_CATEGORY_NOT_SPACE:
1609 case SRE_CATEGORY_WORD:
1610 case SRE_CATEGORY_NOT_WORD:
1611 case SRE_CATEGORY_LINEBREAK:
1612 case SRE_CATEGORY_NOT_LINEBREAK:
1613 case SRE_CATEGORY_LOC_WORD:
1614 case SRE_CATEGORY_LOC_NOT_WORD:
1615 case SRE_CATEGORY_UNI_DIGIT:
1616 case SRE_CATEGORY_UNI_NOT_DIGIT:
1617 case SRE_CATEGORY_UNI_SPACE:
1618 case SRE_CATEGORY_UNI_NOT_SPACE:
1619 case SRE_CATEGORY_UNI_WORD:
1620 case SRE_CATEGORY_UNI_NOT_WORD:
1621 case SRE_CATEGORY_UNI_LINEBREAK:
1622 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1623 break;
1624 default:
1625 FAIL;
1626 }
1627 break;
1628
1629 default:
1630 FAIL;
1631
1632 }
1633 }
1634
1635 return 1;
1636}
1637
1638static int
1639_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1640{
1641 /* Some variables are manipulated by the macros above */
1642 SRE_CODE op;
1643 SRE_CODE arg;
1644 SRE_CODE skip;
1645
1646 VTRACE(("code=%p, end=%p\n", code, end));
1647
1648 if (code > end)
1649 FAIL;
1650
1651 while (code < end) {
1652 GET_OP;
1653 switch (op) {
1654
1655 case SRE_OP_MARK:
1656 /* We don't check whether marks are properly nested; the
1657 sre_match() code is robust even if they don't, and the worst
1658 you can get is nonsensical match results. */
1659 GET_ARG;
1660 if (arg > 2 * (size_t)groups + 1) {
1661 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1662 FAIL;
1663 }
1664 break;
1665
1666 case SRE_OP_LITERAL:
1667 case SRE_OP_NOT_LITERAL:
1668 case SRE_OP_LITERAL_IGNORE:
1669 case SRE_OP_NOT_LITERAL_IGNORE:
1670 case SRE_OP_LITERAL_UNI_IGNORE:
1671 case SRE_OP_NOT_LITERAL_UNI_IGNORE:
1672 case SRE_OP_LITERAL_LOC_IGNORE:
1673 case SRE_OP_NOT_LITERAL_LOC_IGNORE:
1674 GET_ARG;
1675 /* The arg is just a character, nothing to check */
1676 break;
1677
1678 case SRE_OP_SUCCESS:
1679 case SRE_OP_FAILURE:
1680 /* Nothing to check; these normally end the matching process */
1681 break;
1682
1683 case SRE_OP_AT:
1684 GET_ARG;
1685 switch (arg) {
1686 case SRE_AT_BEGINNING:
1687 case SRE_AT_BEGINNING_STRING:
1688 case SRE_AT_BEGINNING_LINE:
1689 case SRE_AT_END:
1690 case SRE_AT_END_LINE:
1691 case SRE_AT_END_STRING:
1692 case SRE_AT_BOUNDARY:
1693 case SRE_AT_NON_BOUNDARY:
1694 case SRE_AT_LOC_BOUNDARY:
1695 case SRE_AT_LOC_NON_BOUNDARY:
1696 case SRE_AT_UNI_BOUNDARY:
1697 case SRE_AT_UNI_NON_BOUNDARY:
1698 break;
1699 default:
1700 FAIL;
1701 }
1702 break;
1703
1704 case SRE_OP_ANY:
1705 case SRE_OP_ANY_ALL:
1706 /* These have no operands */
1707 break;
1708
1709 case SRE_OP_IN:
1710 case SRE_OP_IN_IGNORE:
1711 case SRE_OP_IN_UNI_IGNORE:
1712 case SRE_OP_IN_LOC_IGNORE:
1713 GET_SKIP;
1714 /* Stop 1 before the end; we check the FAILURE below */
1715 if (!_validate_charset(code, code+skip-2))
1716 FAIL;
1717 if (code[skip-2] != SRE_OP_FAILURE)
1718 FAIL;
1719 code += skip-1;
1720 break;
1721
1722 case SRE_OP_INFO:
1723 {
1724 /* A minimal info field is
1725 <INFO> <1=skip> <2=flags> <3=min> <4=max>;
1726 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
1727 more follows. */
1728 SRE_CODE flags, i;
1729 SRE_CODE *newcode;
1730 GET_SKIP;
1731 newcode = code+skip-1;
1732 GET_ARG; flags = arg;
1733 GET_ARG;
1734 GET_ARG;
1735 /* Check that only valid flags are present */
1736 if ((flags & ~(SRE_INFO_PREFIX |
1737 SRE_INFO_LITERAL |
1738 SRE_INFO_CHARSET)) != 0)
1739 FAIL;
1740 /* PREFIX and CHARSET are mutually exclusive */
1741 if ((flags & SRE_INFO_PREFIX) &&
1742 (flags & SRE_INFO_CHARSET))
1743 FAIL;
1744 /* LITERAL implies PREFIX */
1745 if ((flags & SRE_INFO_LITERAL) &&
1746 !(flags & SRE_INFO_PREFIX))
1747 FAIL;
1748 /* Validate the prefix */
1749 if (flags & SRE_INFO_PREFIX) {
1750 SRE_CODE prefix_len;
1751 GET_ARG; prefix_len = arg;
1752 GET_ARG;
1753 /* Here comes the prefix string */
1754 if (prefix_len > (uintptr_t)(newcode - code))
1755 FAIL;
1756 code += prefix_len;
1757 /* And here comes the overlap table */
1758 if (prefix_len > (uintptr_t)(newcode - code))
1759 FAIL;
1760 /* Each overlap value should be < prefix_len */
1761 for (i = 0; i < prefix_len; i++) {
1762 if (code[i] >= prefix_len)
1763 FAIL;
1764 }
1765 code += prefix_len;
1766 }
1767 /* Validate the charset */
1768 if (flags & SRE_INFO_CHARSET) {
1769 if (!_validate_charset(code, newcode-1))
1770 FAIL;
1771 if (newcode[-1] != SRE_OP_FAILURE)
1772 FAIL;
1773 code = newcode;
1774 }
1775 else if (code != newcode) {
1776 VTRACE(("code=%p, newcode=%p\n", code, newcode));
1777 FAIL;
1778 }
1779 }
1780 break;
1781
1782 case SRE_OP_BRANCH:
1783 {
1784 SRE_CODE *target = NULL;
1785 for (;;) {
1786 GET_SKIP;
1787 if (skip == 0)
1788 break;
1789 /* Stop 2 before the end; we check the JUMP below */
1790 if (!_validate_inner(code, code+skip-3, groups))
1791 FAIL;
1792 code += skip-3;
1793 /* Check that it ends with a JUMP, and that each JUMP
1794 has the same target */
1795 GET_OP;
1796 if (op != SRE_OP_JUMP)
1797 FAIL;
1798 GET_SKIP;
1799 if (target == NULL)
1800 target = code+skip-1;
1801 else if (code+skip-1 != target)
1802 FAIL;
1803 }
1804 }
1805 break;
1806
1807 case SRE_OP_REPEAT_ONE:
1808 case SRE_OP_MIN_REPEAT_ONE:
1809 {
1810 SRE_CODE min, max;
1811 GET_SKIP;
1812 GET_ARG; min = arg;
1813 GET_ARG; max = arg;
1814 if (min > max)
1815 FAIL;
1816 if (max > SRE_MAXREPEAT)
1817 FAIL;
1818 if (!_validate_inner(code, code+skip-4, groups))
1819 FAIL;
1820 code += skip-4;
1821 GET_OP;
1822 if (op != SRE_OP_SUCCESS)
1823 FAIL;
1824 }
1825 break;
1826
1827 case SRE_OP_REPEAT:
1828 {
1829 SRE_CODE min, max;
1830 GET_SKIP;
1831 GET_ARG; min = arg;
1832 GET_ARG; max = arg;
1833 if (min > max)
1834 FAIL;
1835 if (max > SRE_MAXREPEAT)
1836 FAIL;
1837 if (!_validate_inner(code, code+skip-3, groups))
1838 FAIL;
1839 code += skip-3;
1840 GET_OP;
1841 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
1842 FAIL;
1843 }
1844 break;
1845
1846 case SRE_OP_GROUPREF:
1847 case SRE_OP_GROUPREF_IGNORE:
1848 case SRE_OP_GROUPREF_UNI_IGNORE:
1849 case SRE_OP_GROUPREF_LOC_IGNORE:
1850 GET_ARG;
1851 if (arg >= (size_t)groups)
1852 FAIL;
1853 break;
1854
1855 case SRE_OP_GROUPREF_EXISTS:
1856 /* The regex syntax for this is: '(?(group)then|else)', where
1857 'group' is either an integer group number or a group name,
1858 'then' and 'else' are sub-regexes, and 'else' is optional. */
1859 GET_ARG;
1860 if (arg >= (size_t)groups)
1861 FAIL;
1862 GET_SKIP_ADJ(1);
1863 code--; /* The skip is relative to the first arg! */
1864 /* There are two possibilities here: if there is both a 'then'
1865 part and an 'else' part, the generated code looks like:
1866
1867 GROUPREF_EXISTS
1868 <group>
1869 <skipyes>
1870 ...then part...
1871 JUMP
1872 <skipno>
1873 (<skipyes> jumps here)
1874 ...else part...
1875 (<skipno> jumps here)
1876
1877 If there is only a 'then' part, it looks like:
1878
1879 GROUPREF_EXISTS
1880 <group>
1881 <skip>
1882 ...then part...
1883 (<skip> jumps here)
1884
1885 There is no direct way to decide which it is, and we don't want
1886 to allow arbitrary jumps anywhere in the code; so we just look
1887 for a JUMP opcode preceding our skip target.
1888 */
1889 if (skip >= 3 && skip-3 < (uintptr_t)(end - code) &&
1890 code[skip-3] == SRE_OP_JUMP)
1891 {
1892 VTRACE(("both then and else parts present\n"));
1893 if (!_validate_inner(code+1, code+skip-3, groups))
1894 FAIL;
1895 code += skip-2; /* Position after JUMP, at <skipno> */
1896 GET_SKIP;
1897 if (!_validate_inner(code, code+skip-1, groups))
1898 FAIL;
1899 code += skip-1;
1900 }
1901 else {
1902 VTRACE(("only a then part present\n"));
1903 if (!_validate_inner(code+1, code+skip-1, groups))
1904 FAIL;
1905 code += skip-1;
1906 }
1907 break;
1908
1909 case SRE_OP_ASSERT:
1910 case SRE_OP_ASSERT_NOT:
1911 GET_SKIP;
1912 GET_ARG; /* 0 for lookahead, width for lookbehind */
1913 code--; /* Back up over arg to simplify math below */
1914 if (arg & 0x80000000)
1915 FAIL; /* Width too large */
1916 /* Stop 1 before the end; we check the SUCCESS below */
1917 if (!_validate_inner(code+1, code+skip-2, groups))
1918 FAIL;
1919 code += skip-2;
1920 GET_OP;
1921 if (op != SRE_OP_SUCCESS)
1922 FAIL;
1923 break;
1924
1925 default:
1926 FAIL;
1927
1928 }
1929 }
1930
1931 VTRACE(("okay\n"));
1932 return 1;
1933}
1934
1935static int
1936_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1937{
1938 if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
1939 code >= end || end[-1] != SRE_OP_SUCCESS)
1940 FAIL;
1941 return _validate_inner(code, end-1, groups);
1942}
1943
1944static int
1945_validate(PatternObject *self)
1946{
1947 if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
1948 {
1949 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
1950 return 0;
1951 }
1952 else
1953 VTRACE(("Success!\n"));
1954 return 1;
1955}
1956
1957/* -------------------------------------------------------------------- */
1958/* match methods */
1959
1960static int
1961match_traverse(MatchObject *self, visitproc visit, void *arg)
1962{
1963 Py_VISIT(Py_TYPE(self));
1964 Py_VISIT(self->string);
1965 Py_VISIT(self->regs);
1966 Py_VISIT(self->pattern);
1967 return 0;
1968}
1969
1970static int
1971match_clear(MatchObject *self)
1972{
1973 Py_CLEAR(self->string);
1974 Py_CLEAR(self->regs);
1975 Py_CLEAR(self->pattern);
1976 return 0;
1977}
1978
1979static void
1980match_dealloc(MatchObject* self)
1981{
1982 PyTypeObject *tp = Py_TYPE(self);
1983
1984 PyObject_GC_UnTrack(self);
1985 (void)match_clear(self);
1986 tp->tp_free(self);
1987 Py_DECREF(tp);
1988}
1989
1990static PyObject*
1991match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
1992{
1993 Py_ssize_t length;
1994 int isbytes, charsize;
1995 Py_buffer view;
1996 PyObject *result;
1997 const void* ptr;
1998 Py_ssize_t i, j;
1999
2000 assert(0 <= index && index < self->groups);
2001 index *= 2;
2002
2003 if (self->string == Py_None || self->mark[index] < 0) {
2004 /* return default value if the string or group is undefined */
2005 Py_INCREF(def);
2006 return def;
2007 }
2008
2009 ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
2010 if (ptr == NULL)
2011 return NULL;
2012
2013 i = self->mark[index];
2014 j = self->mark[index+1];
2015 i = Py_MIN(i, length);
2016 j = Py_MIN(j, length);
2017 result = getslice(isbytes, ptr, self->string, i, j);
2018 if (isbytes && view.buf != NULL)
2019 PyBuffer_Release(&view);
2020 return result;
2021}
2022
2023static Py_ssize_t
2024match_getindex(MatchObject* self, PyObject* index)
2025{
2026 Py_ssize_t i;
2027
2028 if (index == NULL)
2029 /* Default value */
2030 return 0;
2031
2032 if (PyIndex_Check(index)) {
2033 i = PyNumber_AsSsize_t(index, NULL);
2034 }
2035 else {
2036 i = -1;
2037
2038 if (self->pattern->groupindex) {
2039 index = PyDict_GetItemWithError(self->pattern->groupindex, index);
2040 if (index && PyLong_Check(index)) {
2041 i = PyLong_AsSsize_t(index);
2042 }
2043 }
2044 }
2045 if (i < 0 || i >= self->groups) {
2046 /* raise IndexError if we were given a bad group number */
2047 if (!PyErr_Occurred()) {
2048 PyErr_SetString(PyExc_IndexError, "no such group");
2049 }
2050 return -1;
2051 }
2052
2053 return i;
2054}
2055
2056static PyObject*
2057match_getslice(MatchObject* self, PyObject* index, PyObject* def)
2058{
2059 Py_ssize_t i = match_getindex(self, index);
2060
2061 if (i < 0) {
2062 return NULL;
2063 }
2064
2065 return match_getslice_by_index(self, i, def);
2066}
2067
2068/*[clinic input]
2069_sre.SRE_Match.expand
2070
2071 template: object
2072
2073Return the string obtained by doing backslash substitution on the string template, as done by the sub() method.
2074[clinic start generated code]*/
2075
2076static PyObject *
2077_sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template)
2078/*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/
2079{
2080 /* delegate to Python code */
2081 return call(
2082 SRE_PY_MODULE, "_expand",
2083 PyTuple_Pack(3, self->pattern, self, template)
2084 );
2085}
2086
2087static PyObject*
2088match_group(MatchObject* self, PyObject* args)
2089{
2090 PyObject* result;
2091 Py_ssize_t i, size;
2092
2093 size = PyTuple_GET_SIZE(args);
2094
2095 switch (size) {
2096 case 0:
2097 result = match_getslice(self, _PyLong_GetZero(), Py_None);
2098 break;
2099 case 1:
2100 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2101 break;
2102 default:
2103 /* fetch multiple items */
2104 result = PyTuple_New(size);
2105 if (!result)
2106 return NULL;
2107 for (i = 0; i < size; i++) {
2108 PyObject* item = match_getslice(
2109 self, PyTuple_GET_ITEM(args, i), Py_None
2110 );
2111 if (!item) {
2112 Py_DECREF(result);
2113 return NULL;
2114 }
2115 PyTuple_SET_ITEM(result, i, item);
2116 }
2117 break;
2118 }
2119 return result;
2120}
2121
2122static PyObject*
2123match_getitem(MatchObject* self, PyObject* name)
2124{
2125 return match_getslice(self, name, Py_None);
2126}
2127
2128/*[clinic input]
2129_sre.SRE_Match.groups
2130
2131 default: object = None
2132 Is used for groups that did not participate in the match.
2133
2134Return a tuple containing all the subgroups of the match, from 1.
2135[clinic start generated code]*/
2136
2137static PyObject *
2138_sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value)
2139/*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/
2140{
2141 PyObject* result;
2142 Py_ssize_t index;
2143
2144 result = PyTuple_New(self->groups-1);
2145 if (!result)
2146 return NULL;
2147
2148 for (index = 1; index < self->groups; index++) {
2149 PyObject* item;
2150 item = match_getslice_by_index(self, index, default_value);
2151 if (!item) {
2152 Py_DECREF(result);
2153 return NULL;
2154 }
2155 PyTuple_SET_ITEM(result, index-1, item);
2156 }
2157
2158 return result;
2159}
2160
2161/*[clinic input]
2162_sre.SRE_Match.groupdict
2163
2164 default: object = None
2165 Is used for groups that did not participate in the match.
2166
2167Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.
2168[clinic start generated code]*/
2169
2170static PyObject *
2171_sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value)
2172/*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/
2173{
2174 PyObject *result;
2175 PyObject *key;
2176 PyObject *value;
2177 Py_ssize_t pos = 0;
2178 Py_hash_t hash;
2179
2180 result = PyDict_New();
2181 if (!result || !self->pattern->groupindex)
2182 return result;
2183
2184 while (_PyDict_Next(self->pattern->groupindex, &pos, &key, &value, &hash)) {
2185 int status;
2186 Py_INCREF(key);
2187 value = match_getslice(self, key, default_value);
2188 if (!value) {
2189 Py_DECREF(key);
2190 goto failed;
2191 }
2192 status = _PyDict_SetItem_KnownHash(result, key, value, hash);
2193 Py_DECREF(value);
2194 Py_DECREF(key);
2195 if (status < 0)
2196 goto failed;
2197 }
2198
2199 return result;
2200
2201failed:
2202 Py_DECREF(result);
2203 return NULL;
2204}
2205
2206/*[clinic input]
2207_sre.SRE_Match.start -> Py_ssize_t
2208
2209 group: object(c_default="NULL") = 0
2210 /
2211
2212Return index of the start of the substring matched by group.
2213[clinic start generated code]*/
2214
2215static Py_ssize_t
2216_sre_SRE_Match_start_impl(MatchObject *self, PyObject *group)
2217/*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/
2218{
2219 Py_ssize_t index = match_getindex(self, group);
2220
2221 if (index < 0) {
2222 return -1;
2223 }
2224
2225 /* mark is -1 if group is undefined */
2226 return self->mark[index*2];
2227}
2228
2229/*[clinic input]
2230_sre.SRE_Match.end -> Py_ssize_t
2231
2232 group: object(c_default="NULL") = 0
2233 /
2234
2235Return index of the end of the substring matched by group.
2236[clinic start generated code]*/
2237
2238static Py_ssize_t
2239_sre_SRE_Match_end_impl(MatchObject *self, PyObject *group)
2240/*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/
2241{
2242 Py_ssize_t index = match_getindex(self, group);
2243
2244 if (index < 0) {
2245 return -1;
2246 }
2247
2248 /* mark is -1 if group is undefined */
2249 return self->mark[index*2+1];
2250}
2251
2252LOCAL(PyObject*)
2253_pair(Py_ssize_t i1, Py_ssize_t i2)
2254{
2255 PyObject* pair;
2256 PyObject* item;
2257
2258 pair = PyTuple_New(2);
2259 if (!pair)
2260 return NULL;
2261
2262 item = PyLong_FromSsize_t(i1);
2263 if (!item)
2264 goto error;
2265 PyTuple_SET_ITEM(pair, 0, item);
2266
2267 item = PyLong_FromSsize_t(i2);
2268 if (!item)
2269 goto error;
2270 PyTuple_SET_ITEM(pair, 1, item);
2271
2272 return pair;
2273
2274 error:
2275 Py_DECREF(pair);
2276 return NULL;
2277}
2278
2279/*[clinic input]
2280_sre.SRE_Match.span
2281
2282 group: object(c_default="NULL") = 0
2283 /
2284
2285For match object m, return the 2-tuple (m.start(group), m.end(group)).
2286[clinic start generated code]*/
2287
2288static PyObject *
2289_sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
2290/*[clinic end generated code: output=f02ae40594d14fe6 input=8fa6014e982d71d4]*/
2291{
2292 Py_ssize_t index = match_getindex(self, group);
2293
2294 if (index < 0) {
2295 return NULL;
2296 }
2297
2298 /* marks are -1 if group is undefined */
2299 return _pair(self->mark[index*2], self->mark[index*2+1]);
2300}
2301
2302static PyObject*
2303match_regs(MatchObject* self)
2304{
2305 PyObject* regs;
2306 PyObject* item;
2307 Py_ssize_t index;
2308
2309 regs = PyTuple_New(self->groups);
2310 if (!regs)
2311 return NULL;
2312
2313 for (index = 0; index < self->groups; index++) {
2314 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2315 if (!item) {
2316 Py_DECREF(regs);
2317 return NULL;
2318 }
2319 PyTuple_SET_ITEM(regs, index, item);
2320 }
2321
2322 Py_INCREF(regs);
2323 self->regs = regs;
2324
2325 return regs;
2326}
2327
2328/*[clinic input]
2329_sre.SRE_Match.__copy__
2330
2331[clinic start generated code]*/
2332
2333static PyObject *
2334_sre_SRE_Match___copy___impl(MatchObject *self)
2335/*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/
2336{
2337 Py_INCREF(self);
2338 return (PyObject *)self;
2339}
2340
2341/*[clinic input]
2342_sre.SRE_Match.__deepcopy__
2343
2344 memo: object
2345 /
2346
2347[clinic start generated code]*/
2348
2349static PyObject *
2350_sre_SRE_Match___deepcopy__(MatchObject *self, PyObject *memo)
2351/*[clinic end generated code: output=ba7cb46d655e4ee2 input=779d12a31c2c325e]*/
2352{
2353 Py_INCREF(self);
2354 return (PyObject *)self;
2355}
2356
2357PyDoc_STRVAR(match_doc,
2358"The result of re.match() and re.search().\n\
2359Match objects always have a boolean value of True.");
2360
2361PyDoc_STRVAR(match_group_doc,
2362"group([group1, ...]) -> str or tuple.\n\
2363 Return subgroup(s) of the match by indices or names.\n\
2364 For 0 returns the entire match.");
2365
2366static PyObject *
2367match_lastindex_get(MatchObject *self, void *Py_UNUSED(ignored))
2368{
2369 if (self->lastindex >= 0)
2370 return PyLong_FromSsize_t(self->lastindex);
2371 Py_RETURN_NONE;
2372}
2373
2374static PyObject *
2375match_lastgroup_get(MatchObject *self, void *Py_UNUSED(ignored))
2376{
2377 if (self->pattern->indexgroup &&
2378 self->lastindex >= 0 &&
2379 self->lastindex < PyTuple_GET_SIZE(self->pattern->indexgroup))
2380 {
2381 PyObject *result = PyTuple_GET_ITEM(self->pattern->indexgroup,
2382 self->lastindex);
2383 Py_INCREF(result);
2384 return result;
2385 }
2386 Py_RETURN_NONE;
2387}
2388
2389static PyObject *
2390match_regs_get(MatchObject *self, void *Py_UNUSED(ignored))
2391{
2392 if (self->regs) {
2393 Py_INCREF(self->regs);
2394 return self->regs;
2395 } else
2396 return match_regs(self);
2397}
2398
2399static PyObject *
2400match_repr(MatchObject *self)
2401{
2402 PyObject *result;
2403 PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2404 if (group0 == NULL)
2405 return NULL;
2406 result = PyUnicode_FromFormat(
2407 "<%s object; span=(%zd, %zd), match=%.50R>",
2408 Py_TYPE(self)->tp_name,
2409 self->mark[0], self->mark[1], group0);
2410 Py_DECREF(group0);
2411 return result;
2412}
2413
2414
2415static PyObject*
2416pattern_new_match(_sremodulestate* module_state,
2417 PatternObject* pattern,
2418 SRE_STATE* state,
2419 Py_ssize_t status)
2420{
2421 /* create match object (from state object) */
2422
2423 MatchObject* match;
2424 Py_ssize_t i, j;
2425 char* base;
2426 int n;
2427
2428 if (status > 0) {
2429
2430 /* create match object (with room for extra group marks) */
2431 /* coverity[ampersand_in_size] */
2432 match = PyObject_GC_NewVar(MatchObject,
2433 module_state->Match_Type,
2434 2*(pattern->groups+1));
2435 if (!match)
2436 return NULL;
2437
2438 Py_INCREF(pattern);
2439 match->pattern = pattern;
2440
2441 Py_INCREF(state->string);
2442 match->string = state->string;
2443
2444 match->regs = NULL;
2445 match->groups = pattern->groups+1;
2446
2447 /* fill in group slices */
2448
2449 base = (char*) state->beginning;
2450 n = state->charsize;
2451
2452 match->mark[0] = ((char*) state->start - base) / n;
2453 match->mark[1] = ((char*) state->ptr - base) / n;
2454
2455 for (i = j = 0; i < pattern->groups; i++, j+=2)
2456 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2457 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2458 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2459 } else
2460 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2461
2462 match->pos = state->pos;
2463 match->endpos = state->endpos;
2464
2465 match->lastindex = state->lastindex;
2466
2467 PyObject_GC_Track(match);
2468 return (PyObject*) match;
2469
2470 } else if (status == 0) {
2471
2472 /* no match */
2473 Py_RETURN_NONE;
2474
2475 }
2476
2477 /* internal error */
2478 pattern_error(status);
2479 return NULL;
2480}
2481
2482
2483/* -------------------------------------------------------------------- */
2484/* scanner methods (experimental) */
2485
2486static int
2487scanner_traverse(ScannerObject *self, visitproc visit, void *arg)
2488{
2489 Py_VISIT(Py_TYPE(self));
2490 Py_VISIT(self->pattern);
2491 return 0;
2492}
2493
2494static int
2495scanner_clear(ScannerObject *self)
2496{
2497 Py_CLEAR(self->pattern);
2498 return 0;
2499}
2500
2501static void
2502scanner_dealloc(ScannerObject* self)
2503{
2504 PyTypeObject *tp = Py_TYPE(self);
2505
2506 PyObject_GC_UnTrack(self);
2507 state_fini(&self->state);
2508 (void)scanner_clear(self);
2509 tp->tp_free(self);
2510 Py_DECREF(tp);
2511}
2512
2513static int
2514scanner_begin(ScannerObject* self)
2515{
2516 if (self->executing) {
2517 PyErr_SetString(PyExc_ValueError,
2518 "regular expression scanner already executing");
2519 return 0;
2520 }
2521 self->executing = 1;
2522 return 1;
2523}
2524
2525static void
2526scanner_end(ScannerObject* self)
2527{
2528 assert(self->executing);
2529 self->executing = 0;
2530}
2531
2532/*[clinic input]
2533_sre.SRE_Scanner.match
2534
2535 cls: defining_class
2536 /
2537
2538[clinic start generated code]*/
2539
2540static PyObject *
2541_sre_SRE_Scanner_match_impl(ScannerObject *self, PyTypeObject *cls)
2542/*[clinic end generated code: output=6e22c149dc0f0325 input=b5146e1f30278cb7]*/
2543{
2544 _sremodulestate *module_state = get_sre_module_state_by_class(cls);
2545 SRE_STATE* state = &self->state;
2546 PyObject* match;
2547 Py_ssize_t status;
2548
2549 if (!scanner_begin(self)) {
2550 return NULL;
2551 }
2552 if (state->start == NULL) {
2553 scanner_end(self);
2554 Py_RETURN_NONE;
2555 }
2556
2557 state_reset(state);
2558
2559 state->ptr = state->start;
2560
2561 status = sre_match(state, PatternObject_GetCode(self->pattern));
2562 if (PyErr_Occurred()) {
2563 scanner_end(self);
2564 return NULL;
2565 }
2566
2567 match = pattern_new_match(module_state, (PatternObject*) self->pattern,
2568 state, status);
2569
2570 if (status == 0)
2571 state->start = NULL;
2572 else {
2573 state->must_advance = (state->ptr == state->start);
2574 state->start = state->ptr;
2575 }
2576
2577 scanner_end(self);
2578 return match;
2579}
2580
2581
2582/*[clinic input]
2583_sre.SRE_Scanner.search
2584
2585 cls: defining_class
2586 /
2587
2588[clinic start generated code]*/
2589
2590static PyObject *
2591_sre_SRE_Scanner_search_impl(ScannerObject *self, PyTypeObject *cls)
2592/*[clinic end generated code: output=23e8fc78013f9161 input=056c2d37171d0bf2]*/
2593{
2594 _sremodulestate *module_state = get_sre_module_state_by_class(cls);
2595 SRE_STATE* state = &self->state;
2596 PyObject* match;
2597 Py_ssize_t status;
2598
2599 if (!scanner_begin(self)) {
2600 return NULL;
2601 }
2602 if (state->start == NULL) {
2603 scanner_end(self);
2604 Py_RETURN_NONE;
2605 }
2606
2607 state_reset(state);
2608
2609 state->ptr = state->start;
2610
2611 status = sre_search(state, PatternObject_GetCode(self->pattern));
2612 if (PyErr_Occurred()) {
2613 scanner_end(self);
2614 return NULL;
2615 }
2616
2617 match = pattern_new_match(module_state, (PatternObject*) self->pattern,
2618 state, status);
2619
2620 if (status == 0)
2621 state->start = NULL;
2622 else {
2623 state->must_advance = (state->ptr == state->start);
2624 state->start = state->ptr;
2625 }
2626
2627 scanner_end(self);
2628 return match;
2629}
2630
2631static PyObject *
2632pattern_scanner(_sremodulestate *module_state,
2633 PatternObject *self,
2634 PyObject *string,
2635 Py_ssize_t pos,
2636 Py_ssize_t endpos)
2637{
2638 ScannerObject* scanner;
2639
2640 /* create scanner object */
2641 scanner = PyObject_GC_New(ScannerObject, module_state->Scanner_Type);
2642 if (!scanner)
2643 return NULL;
2644 scanner->pattern = NULL;
2645 scanner->executing = 0;
2646
2647 /* create search state object */
2648 if (!state_init(&scanner->state, self, string, pos, endpos)) {
2649 Py_DECREF(scanner);
2650 return NULL;
2651 }
2652
2653 Py_INCREF(self);
2654 scanner->pattern = (PyObject*) self;
2655
2656 PyObject_GC_Track(scanner);
2657 return (PyObject*) scanner;
2658}
2659
2660static Py_hash_t
2661pattern_hash(PatternObject *self)
2662{
2663 Py_hash_t hash, hash2;
2664
2665 hash = PyObject_Hash(self->pattern);
2666 if (hash == -1) {
2667 return -1;
2668 }
2669
2670 hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize);
2671 hash ^= hash2;
2672
2673 hash ^= self->flags;
2674 hash ^= self->isbytes;
2675 hash ^= self->codesize;
2676
2677 if (hash == -1) {
2678 hash = -2;
2679 }
2680 return hash;
2681}
2682
2683static PyObject*
2684pattern_richcompare(PyObject *lefto, PyObject *righto, int op)
2685{
2686 PyTypeObject *tp = Py_TYPE(lefto);
2687 _sremodulestate *module_state = get_sre_module_state_by_class(tp);
2688 PatternObject *left, *right;
2689 int cmp;
2690
2691 if (op != Py_EQ && op != Py_NE) {
2692 Py_RETURN_NOTIMPLEMENTED;
2693 }
2694
2695 if (!Py_IS_TYPE(righto, module_state->Pattern_Type))
2696 {
2697 Py_RETURN_NOTIMPLEMENTED;
2698 }
2699
2700 if (lefto == righto) {
2701 /* a pattern is equal to itself */
2702 return PyBool_FromLong(op == Py_EQ);
2703 }
2704
2705 left = (PatternObject *)lefto;
2706 right = (PatternObject *)righto;
2707
2708 cmp = (left->flags == right->flags
2709 && left->isbytes == right->isbytes
2710 && left->codesize == right->codesize);
2711 if (cmp) {
2712 /* Compare the code and the pattern because the same pattern can
2713 produce different codes depending on the locale used to compile the
2714 pattern when the re.LOCALE flag is used. Don't compare groups,
2715 indexgroup nor groupindex: they are derivated from the pattern. */
2716 cmp = (memcmp(left->code, right->code,
2717 sizeof(left->code[0]) * left->codesize) == 0);
2718 }
2719 if (cmp) {
2720 cmp = PyObject_RichCompareBool(left->pattern, right->pattern,
2721 Py_EQ);
2722 if (cmp < 0) {
2723 return NULL;
2724 }
2725 }
2726 if (op == Py_NE) {
2727 cmp = !cmp;
2728 }
2729 return PyBool_FromLong(cmp);
2730}
2731
2732#include "clinic/_sre.c.h"
2733
2734static PyMethodDef pattern_methods[] = {
2735 _SRE_SRE_PATTERN_MATCH_METHODDEF
2736 _SRE_SRE_PATTERN_FULLMATCH_METHODDEF
2737 _SRE_SRE_PATTERN_SEARCH_METHODDEF
2738 _SRE_SRE_PATTERN_SUB_METHODDEF
2739 _SRE_SRE_PATTERN_SUBN_METHODDEF
2740 _SRE_SRE_PATTERN_FINDALL_METHODDEF
2741 _SRE_SRE_PATTERN_SPLIT_METHODDEF
2742 _SRE_SRE_PATTERN_FINDITER_METHODDEF
2743 _SRE_SRE_PATTERN_SCANNER_METHODDEF
2744 _SRE_SRE_PATTERN___COPY___METHODDEF
2745 _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
2746 {"__class_getitem__", (PyCFunction)Py_GenericAlias, METH_O|METH_CLASS,
2747 PyDoc_STR("See PEP 585")},
2748 {NULL, NULL}
2749};
2750
2751static PyGetSetDef pattern_getset[] = {
2752 {"groupindex", (getter)pattern_groupindex, (setter)NULL,
2753 "A dictionary mapping group names to group numbers."},
2754 {NULL} /* Sentinel */
2755};
2756
2757#define PAT_OFF(x) offsetof(PatternObject, x)
2758static PyMemberDef pattern_members[] = {
2759 {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY,
2760 "The pattern string from which the RE object was compiled."},
2761 {"flags", T_INT, PAT_OFF(flags), READONLY,
2762 "The regex matching flags."},
2763 {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY,
2764 "The number of capturing groups in the pattern."},
2765 {"__weaklistoffset__", T_PYSSIZET, offsetof(PatternObject, weakreflist), READONLY},
2766 {NULL} /* Sentinel */
2767};
2768
2769static PyType_Slot pattern_slots[] = {
2770 {Py_tp_dealloc, (destructor)pattern_dealloc},
2771 {Py_tp_repr, (reprfunc)pattern_repr},
2772 {Py_tp_hash, (hashfunc)pattern_hash},
2773 {Py_tp_doc, (void *)pattern_doc},
2774 {Py_tp_richcompare, pattern_richcompare},
2775 {Py_tp_methods, pattern_methods},
2776 {Py_tp_members, pattern_members},
2777 {Py_tp_getset, pattern_getset},
2778 {Py_tp_traverse, pattern_traverse},
2779 {Py_tp_clear, pattern_clear},
2780 {0, NULL},
2781};
2782
2783static PyType_Spec pattern_spec = {
2784 .name = "re.Pattern",
2785 .basicsize = sizeof(PatternObject),
2786 .itemsize = sizeof(SRE_CODE),
2787 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
2788 Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
2789 .slots = pattern_slots,
2790};
2791
2792static PyMethodDef match_methods[] = {
2793 {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
2794 _SRE_SRE_MATCH_START_METHODDEF
2795 _SRE_SRE_MATCH_END_METHODDEF
2796 _SRE_SRE_MATCH_SPAN_METHODDEF
2797 _SRE_SRE_MATCH_GROUPS_METHODDEF
2798 _SRE_SRE_MATCH_GROUPDICT_METHODDEF
2799 _SRE_SRE_MATCH_EXPAND_METHODDEF
2800 _SRE_SRE_MATCH___COPY___METHODDEF
2801 _SRE_SRE_MATCH___DEEPCOPY___METHODDEF
2802 {"__class_getitem__", (PyCFunction)Py_GenericAlias, METH_O|METH_CLASS,
2803 PyDoc_STR("See PEP 585")},
2804 {NULL, NULL}
2805};
2806
2807static PyGetSetDef match_getset[] = {
2808 {"lastindex", (getter)match_lastindex_get, (setter)NULL,
2809 "The integer index of the last matched capturing group."},
2810 {"lastgroup", (getter)match_lastgroup_get, (setter)NULL,
2811 "The name of the last matched capturing group."},
2812 {"regs", (getter)match_regs_get, (setter)NULL},
2813 {NULL}
2814};
2815
2816#define MATCH_OFF(x) offsetof(MatchObject, x)
2817static PyMemberDef match_members[] = {
2818 {"string", T_OBJECT, MATCH_OFF(string), READONLY,
2819 "The string passed to match() or search()."},
2820 {"re", T_OBJECT, MATCH_OFF(pattern), READONLY,
2821 "The regular expression object."},
2822 {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY,
2823 "The index into the string at which the RE engine started looking for a match."},
2824 {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY,
2825 "The index into the string beyond which the RE engine will not go."},
2826 {NULL}
2827};
2828
2829/* FIXME: implement setattr("string", None) as a special case (to
2830 detach the associated string, if any */
2831static PyType_Slot match_slots[] = {
2832 {Py_tp_dealloc, match_dealloc},
2833 {Py_tp_repr, match_repr},
2834 {Py_tp_doc, (void *)match_doc},
2835 {Py_tp_methods, match_methods},
2836 {Py_tp_members, match_members},
2837 {Py_tp_getset, match_getset},
2838 {Py_tp_traverse, match_traverse},
2839 {Py_tp_clear, match_clear},
2840
2841 /* As mapping.
2842 *
2843 * Match objects do not support length or assignment, but do support
2844 * __getitem__.
2845 */
2846 {Py_mp_subscript, match_getitem},
2847
2848 {0, NULL},
2849};
2850
2851static PyType_Spec match_spec = {
2852 .name = "re.Match",
2853 .basicsize = sizeof(MatchObject),
2854 .itemsize = sizeof(Py_ssize_t),
2855 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
2856 Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
2857 .slots = match_slots,
2858};
2859
2860static PyMethodDef scanner_methods[] = {
2861 _SRE_SRE_SCANNER_MATCH_METHODDEF
2862 _SRE_SRE_SCANNER_SEARCH_METHODDEF
2863 {NULL, NULL}
2864};
2865
2866#define SCAN_OFF(x) offsetof(ScannerObject, x)
2867static PyMemberDef scanner_members[] = {
2868 {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
2869 {NULL} /* Sentinel */
2870};
2871
2872static PyType_Slot scanner_slots[] = {
2873 {Py_tp_dealloc, scanner_dealloc},
2874 {Py_tp_methods, scanner_methods},
2875 {Py_tp_members, scanner_members},
2876 {Py_tp_traverse, scanner_traverse},
2877 {Py_tp_clear, scanner_clear},
2878 {0, NULL},
2879};
2880
2881static PyType_Spec scanner_spec = {
2882 .name = "_" SRE_MODULE ".SRE_Scanner",
2883 .basicsize = sizeof(ScannerObject),
2884 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
2885 Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
2886 .slots = scanner_slots,
2887};
2888
2889static PyMethodDef _functions[] = {
2890 _SRE_COMPILE_METHODDEF
2891 _SRE_GETCODESIZE_METHODDEF
2892 _SRE_ASCII_ISCASED_METHODDEF
2893 _SRE_UNICODE_ISCASED_METHODDEF
2894 _SRE_ASCII_TOLOWER_METHODDEF
2895 _SRE_UNICODE_TOLOWER_METHODDEF
2896 {NULL, NULL}
2897};
2898
2899static int
2900sre_traverse(PyObject *module, visitproc visit, void *arg)
2901{
2902 _sremodulestate *state = get_sre_module_state(module);
2903
2904 Py_VISIT(state->Pattern_Type);
2905 Py_VISIT(state->Match_Type);
2906 Py_VISIT(state->Scanner_Type);
2907
2908 return 0;
2909}
2910
2911static int
2912sre_clear(PyObject *module)
2913{
2914 _sremodulestate *state = get_sre_module_state(module);
2915
2916 Py_CLEAR(state->Pattern_Type);
2917 Py_CLEAR(state->Match_Type);
2918 Py_CLEAR(state->Scanner_Type);
2919
2920 return 0;
2921}
2922
2923static void
2924sre_free(void *module)
2925{
2926 sre_clear((PyObject *)module);
2927}
2928
2929#define CREATE_TYPE(m, type, spec) \
2930do { \
2931 type = (PyTypeObject *)PyType_FromModuleAndSpec(m, spec, NULL); \
2932 if (type == NULL) { \
2933 goto error; \
2934 } \
2935} while (0)
2936
2937#define ADD_ULONG_CONSTANT(module, name, value) \
2938 do { \
2939 PyObject *o = PyLong_FromUnsignedLong(value); \
2940 if (!o) \
2941 goto error; \
2942 int res = PyModule_AddObjectRef(module, name, o); \
2943 Py_DECREF(o); \
2944 if (res < 0) { \
2945 goto error; \
2946 } \
2947} while (0)
2948
2949static int
2950sre_exec(PyObject *m)
2951{
2952 _sremodulestate *state;
2953
2954 /* Create heap types */
2955 state = get_sre_module_state(m);
2956 CREATE_TYPE(m, state->Pattern_Type, &pattern_spec);
2957 CREATE_TYPE(m, state->Match_Type, &match_spec);
2958 CREATE_TYPE(m, state->Scanner_Type, &scanner_spec);
2959
2960 if (PyModule_AddIntConstant(m, "MAGIC", SRE_MAGIC) < 0) {
2961 goto error;
2962 }
2963
2964 if (PyModule_AddIntConstant(m, "CODESIZE", sizeof(SRE_CODE)) < 0) {
2965 goto error;
2966 }
2967
2968 ADD_ULONG_CONSTANT(m, "MAXREPEAT", SRE_MAXREPEAT);
2969 ADD_ULONG_CONSTANT(m, "MAXGROUPS", SRE_MAXGROUPS);
2970
2971 if (PyModule_AddStringConstant(m, "copyright", copyright) < 0) {
2972 goto error;
2973 }
2974
2975 return 0;
2976
2977error:
2978 return -1;
2979}
2980
2981static PyModuleDef_Slot sre_slots[] = {
2982 {Py_mod_exec, sre_exec},
2983 {0, NULL},
2984};
2985
2986static struct PyModuleDef sremodule = {
2987 .m_base = PyModuleDef_HEAD_INIT,
2988 .m_name = "_" SRE_MODULE,
2989 .m_size = sizeof(_sremodulestate),
2990 .m_methods = _functions,
2991 .m_slots = sre_slots,
2992 .m_traverse = sre_traverse,
2993 .m_free = sre_free,
2994 .m_clear = sre_clear,
2995};
2996
2997PyMODINIT_FUNC
2998PyInit__sre(void)
2999{
3000 return PyModuleDef_Init(&sremodule);
3001}
3002
3003/* vim:ts=4:sw=4:et
3004*/
3005