1 | // Copyright 2008 The RE2 Authors. All Rights Reserved. |
2 | // Use of this source code is governed by a BSD-style |
3 | // license that can be found in the LICENSE file. |
4 | |
5 | #ifndef RE2_UNICODE_GROUPS_H_ |
6 | #define RE2_UNICODE_GROUPS_H_ |
7 | |
8 | // Unicode character groups. |
9 | |
10 | // The codes get split into ranges of 16-bit codes |
11 | // and ranges of 32-bit codes. It would be simpler |
12 | // to use only 32-bit ranges, but these tables are large |
13 | // enough to warrant extra care. |
14 | // |
15 | // Using just 32-bit ranges gives 27 kB of data. |
16 | // Adding 16-bit ranges gives 18 kB of data. |
17 | // Adding an extra table of 16-bit singletons would reduce |
18 | // to 16.5 kB of data but make the data harder to use; |
19 | // we don't bother. |
20 | |
21 | #include <stdint.h> |
22 | |
23 | #include "util/utf.h" |
24 | |
25 | namespace re2 { |
26 | |
27 | struct URange16 |
28 | { |
29 | uint16_t lo; |
30 | uint16_t hi; |
31 | }; |
32 | |
33 | struct URange32 |
34 | { |
35 | Rune lo; |
36 | Rune hi; |
37 | }; |
38 | |
39 | struct UGroup |
40 | { |
41 | const char *name; |
42 | int sign; // +1 for [abc], -1 for [^abc] |
43 | const URange16 *r16; |
44 | int nr16; |
45 | const URange32 *r32; |
46 | int nr32; |
47 | }; |
48 | |
49 | // Named by property or script name (e.g., "Nd", "N", "Han"). |
50 | // Negated groups are not included. |
51 | extern const UGroup unicode_groups[]; |
52 | extern const int num_unicode_groups; |
53 | |
54 | // Named by POSIX name (e.g., "[:alpha:]", "[:^lower:]"). |
55 | // Negated groups are included. |
56 | extern const UGroup posix_groups[]; |
57 | extern const int num_posix_groups; |
58 | |
59 | // Named by Perl name (e.g., "\\d", "\\D"). |
60 | // Negated groups are included. |
61 | extern const UGroup perl_groups[]; |
62 | extern const int num_perl_groups; |
63 | |
64 | } // namespace re2 |
65 | |
66 | #endif // RE2_UNICODE_GROUPS_H_ |
67 | |