1 | /* |
2 | * Secret Labs' Regular Expression Engine |
3 | * |
4 | * regular expression matching engine |
5 | * |
6 | * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. |
7 | * |
8 | * See the _sre.c file for information on usage and redistribution. |
9 | */ |
10 | |
11 | #ifndef SRE_INCLUDED |
12 | #define SRE_INCLUDED |
13 | |
14 | #include "sre_constants.h" |
15 | |
16 | /* size of a code word (must be unsigned short or larger, and |
17 | large enough to hold a UCS4 character) */ |
18 | #define SRE_CODE Py_UCS4 |
19 | #if SIZEOF_SIZE_T > 4 |
20 | # define SRE_MAXREPEAT (~(SRE_CODE)0) |
21 | # define SRE_MAXGROUPS ((~(SRE_CODE)0) / 2) |
22 | #else |
23 | # define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX) |
24 | # define SRE_MAXGROUPS ((SRE_CODE)PY_SSIZE_T_MAX / SIZEOF_SIZE_T / 2) |
25 | #endif |
26 | |
27 | typedef struct { |
28 | PyObject_VAR_HEAD |
29 | Py_ssize_t groups; /* must be first! */ |
30 | PyObject* groupindex; /* dict */ |
31 | PyObject* indexgroup; /* tuple */ |
32 | /* compatibility */ |
33 | PyObject* pattern; /* pattern source (or None) */ |
34 | int flags; /* flags used when compiling pattern source */ |
35 | PyObject *weakreflist; /* List of weak references */ |
36 | int isbytes; /* pattern type (1 - bytes, 0 - string, -1 - None) */ |
37 | /* pattern code */ |
38 | Py_ssize_t codesize; |
39 | SRE_CODE code[1]; |
40 | } PatternObject; |
41 | |
42 | #define PatternObject_GetCode(o) (((PatternObject*)(o))->code) |
43 | |
44 | typedef struct { |
45 | PyObject_VAR_HEAD |
46 | PyObject* string; /* link to the target string (must be first) */ |
47 | PyObject* regs; /* cached list of matching spans */ |
48 | PatternObject* pattern; /* link to the regex (pattern) object */ |
49 | Py_ssize_t pos, endpos; /* current target slice */ |
50 | Py_ssize_t lastindex; /* last index marker seen by the engine (-1 if none) */ |
51 | Py_ssize_t groups; /* number of groups (start/end marks) */ |
52 | Py_ssize_t mark[1]; |
53 | } MatchObject; |
54 | |
55 | typedef struct SRE_REPEAT_T { |
56 | Py_ssize_t count; |
57 | const SRE_CODE* pattern; /* points to REPEAT operator arguments */ |
58 | const void* last_ptr; /* helper to check for infinite loops */ |
59 | struct SRE_REPEAT_T *prev; /* points to previous repeat context */ |
60 | } SRE_REPEAT; |
61 | |
62 | typedef struct { |
63 | /* string pointers */ |
64 | const void* ptr; /* current position (also end of current slice) */ |
65 | const void* beginning; /* start of original string */ |
66 | const void* start; /* start of current slice */ |
67 | const void* end; /* end of original string */ |
68 | /* attributes for the match object */ |
69 | PyObject* string; |
70 | Py_buffer buffer; |
71 | Py_ssize_t pos, endpos; |
72 | int isbytes; |
73 | int charsize; /* character size */ |
74 | /* registers */ |
75 | Py_ssize_t lastindex; |
76 | Py_ssize_t lastmark; |
77 | const void** mark; |
78 | int match_all; |
79 | int must_advance; |
80 | /* dynamically allocated stuff */ |
81 | char* data_stack; |
82 | size_t data_stack_size; |
83 | size_t data_stack_base; |
84 | /* current repeat context */ |
85 | SRE_REPEAT *repeat; |
86 | } SRE_STATE; |
87 | |
88 | typedef struct { |
89 | PyObject_HEAD |
90 | PyObject* pattern; |
91 | SRE_STATE state; |
92 | int executing; |
93 | } ScannerObject; |
94 | |
95 | #endif |
96 | |