regexp.h source code [tensorflow/external/com_googlesource_code_re2/re2/regexp.h]

1	// Copyright 2006 The RE2 Authors. All Rights Reserved.
2	// Use of this source code is governed by a BSD-style
3	// license that can be found in the LICENSE file.
4
5	#ifndef RE2_REGEXP_H_
6	#define RE2_REGEXP_H_
7
8	// --- SPONSORED LINK --------------------------------------------------
9	// If you want to use this library for regular expression matching,
10	// you should use re2/re2.h, which provides a class RE2 that
11	// mimics the PCRE interface provided by PCRE's C++ wrappers.
12	// This header describes the low-level interface used to implement RE2
13	// and may change in backwards-incompatible ways from time to time.
14	// In contrast, RE2's interface will not.
15	// ---------------------------------------------------------------------
16
17	// Regular expression library: parsing, execution, and manipulation
18	// of regular expressions.
19	//
20	// Any operation that traverses the Regexp structures should be written
21	// using Regexp::Walker (see walker-inl.h), not recursively, because deeply nested
22	// regular expressions such as x++++++++++++++++++++... might cause recursive
23	// traversals to overflow the stack.
24	//
25	// It is the caller's responsibility to provide appropriate mutual exclusion
26	// around manipulation of the regexps. RE2 does this.
27	//
28	// PARSING
29	//
30	// Regexp::Parse parses regular expressions encoded in UTF-8.
31	// The default syntax is POSIX extended regular expressions,
32	// with the following changes:
33	//
34	// 1. Backreferences (optional in POSIX EREs) are not supported.
35	// (Supporting them precludes the use of DFA-based
36	// matching engines.)
37	//
38	// 2. Collating elements and collation classes are not supported.
39	// (No one has needed or wanted them.)
40	//
41	// The exact syntax accepted can be modified by passing flags to
42	// Regexp::Parse. In particular, many of the basic Perl additions
43	// are available. The flags are documented below (search for LikePerl).
44	//
45	// If parsed with the flag Regexp::Latin1, both the regular expression
46	// and the input to the matching routines are assumed to be encoded in
47	// Latin-1, not UTF-8.
48	//
49	// EXECUTION
50	//
51	// Once Regexp has parsed a regular expression, it provides methods
52	// to search text using that regular expression. These methods are
53	// implemented via calling out to other regular expression libraries.
54	// (Let's call them the sublibraries.)
55	//
56	// To call a sublibrary, Regexp does not simply prepare a
57	// string version of the regular expression and hand it to the
58	// sublibrary. Instead, Regexp prepares, from its own parsed form, the
59	// corresponding internal representation used by the sublibrary.
60	// This has the drawback of needing to know the internal representation
61	// used by the sublibrary, but it has two important benefits:
62	//
63	// 1. The syntax and meaning of regular expressions is guaranteed
64	// to be that used by Regexp's parser, not the syntax expected
65	// by the sublibrary. Regexp might accept a restricted or
66	// expanded syntax for regular expressions as compared with
67	// the sublibrary. As long as Regexp can translate from its
68	// internal form into the sublibrary's, clients need not know
69	// exactly which sublibrary they are using.
70	//
71	// 2. The sublibrary parsers are bypassed. For whatever reason,
72	// sublibrary regular expression parsers often have security
73	// problems. For example, plan9grep's regular expression parser
74	// has a buffer overflow in its handling of large character
75	// classes, and PCRE's parser has had buffer overflow problems
76	// in the past. Security-team requires sandboxing of sublibrary
77	// regular expression parsers. Avoiding the sublibrary parsers
78	// avoids the sandbox.
79	//
80	// The execution methods we use now are provided by the compiled form,
81	// Prog, described in prog.h
82	//
83	// MANIPULATION
84	//
85	// Unlike other regular expression libraries, Regexp makes its parsed
86	// form accessible to clients, so that client code can analyze the
87	// parsed regular expressions.
88
89	#include <stddef.h>
90	#include <stdint.h>
91	#include <map>
92	#include <set>
93	#include <string>
94
95	#include "absl/strings/string_view.h"
96	#include "util/logging.h"
97	#include "util/utf.h"
98
99	namespace re2 {
100
101	// Keep in sync with string list kOpcodeNames[] in testing/dump.cc
102	enum RegexpOp {
103	// Matches no strings.
104	kRegexpNoMatch = `1`,
105
106	// Matches empty string.
107	kRegexpEmptyMatch,
108
109	// Matches rune_.
110	kRegexpLiteral,
111
112	// Matches runes_.
113	kRegexpLiteralString,
114
115	// Matches concatenation of sub_[0..nsub-1].
116	kRegexpConcat,
117	// Matches union of sub_[0..nsub-1].
118	kRegexpAlternate,
119
120	// Matches sub_[0] zero or more times.
121	kRegexpStar,
122	// Matches sub_[0] one or more times.
123	kRegexpPlus,
124	// Matches sub_[0] zero or one times.
125	kRegexpQuest,
126
127	// Matches sub_[0] at least min_ times, at most max_ times.
128	// max_ == -1 means no upper limit.
129	kRegexpRepeat,
130
131	// Parenthesized (capturing) subexpression. Index is cap_.
132	// Optionally, capturing name is name_.
133	kRegexpCapture,
134
135	// Matches any character.
136	kRegexpAnyChar,
137
138	// Matches any byte [sic].
139	kRegexpAnyByte,
140
141	// Matches empty string at beginning of line.
142	kRegexpBeginLine,
143	// Matches empty string at end of line.
144	kRegexpEndLine,
145
146	// Matches word boundary "\b".
147	kRegexpWordBoundary,
148	// Matches not-a-word boundary "\B".
149	kRegexpNoWordBoundary,
150
151	// Matches empty string at beginning of text.
152	kRegexpBeginText,
153	// Matches empty string at end of text.
154	kRegexpEndText,
155
156	// Matches character class given by cc_.
157	kRegexpCharClass,
158
159	// Forces match of entire expression right now,
160	// with match ID match_id_ (used by RE2::Set).
161	kRegexpHaveMatch,
162
163	kMaxRegexpOp = kRegexpHaveMatch,
164	};
165
166	// Keep in sync with string list in regexp.cc
167	enum RegexpStatusCode {
168	// No error
169	kRegexpSuccess = `0`,
170
171	// Unexpected error
172	kRegexpInternalError,
173
174	// Parse errors
175	kRegexpBadEscape, // bad escape sequence
176	kRegexpBadCharClass, // bad character class
177	kRegexpBadCharRange, // bad character class range
178	kRegexpMissingBracket, // missing closing ]
179	kRegexpMissingParen, // missing closing )
180	kRegexpUnexpectedParen, // unexpected closing )
181	kRegexpTrailingBackslash, // at end of regexp
182	kRegexpRepeatArgument, // repeat argument missing, e.g. ""*
183	kRegexpRepeatSize, // bad repetition argument
184	kRegexpRepeatOp, // bad repetition operator
185	kRegexpBadPerlOp, // bad perl operator
186	kRegexpBadUTF8, // invalid UTF-8 in regexp
187	kRegexpBadNamedCapture, // bad named capture
188	};
189
190	// Error status for certain operations.
191	class RegexpStatus {
192	public:
193	RegexpStatus() : code_(kRegexpSuccess), tmp_(NULL) {}
194	~RegexpStatus() { delete tmp_; }
195
196	void set_code(RegexpStatusCode code) { code_ = code; }
197	void set_error_arg(absl::string_view error_arg) { error_arg_ = error_arg; }
198	void set_tmp(std::string* tmp) { delete tmp_; tmp_ = tmp; }
199	RegexpStatusCode code() const { return code_; }
200	absl::string_view error_arg() const { return error_arg_; }
201	bool ok() const { return code() == kRegexpSuccess; }
202
203	// Copies state from status.
204	void Copy(const RegexpStatus& status);
205
206	// Returns text equivalent of code, e.g.:
207	// "Bad character class"
208	static std::string CodeText(RegexpStatusCode code);
209
210	// Returns text describing error, e.g.:
211	// "Bad character class: [z-a]"
212	std::string Text() const;
213
214	private:
215	RegexpStatusCode code_; // Kind of error.
216	absl::string_view error_arg_; // Piece of regexp containing syntax error.
217	std::string* tmp_; // Temporary storage, possibly for error_arg_.
218
219	RegexpStatus(const RegexpStatus&) = delete;
220	RegexpStatus& operator=(const RegexpStatus&) = delete;
221	};
222
223	// Compiled form; see prog.h
224	class Prog;
225
226	struct RuneRange {
227	RuneRange() : lo(`0`), hi(`0`) { }
228	RuneRange(int l, int h) : lo(l), hi(h) { }
229	Rune lo;
230	Rune hi;
231	};
232
233	// Less-than on RuneRanges treats a == b if they overlap at all.
234	// This lets us look in a set to find the range covering a particular Rune.
235	struct RuneRangeLess {
236	bool operator()(const RuneRange& a, const RuneRange& b) const {
237	return a.hi < b.lo;
238	}
239	};
240
241	class CharClassBuilder;
242
243	class CharClass {
244	public:
245	void Delete();
246
247	typedef RuneRange* iterator;
248	iterator begin() { return ranges_; }
249	iterator end() { return ranges_ + nranges_; }
250
251	int size() { return nrunes_; }
252	bool empty() { return nrunes_ == `0`; }
253	bool full() { return nrunes_ == Runemax+`1`; }
254	bool FoldsASCII() { return folds_ascii_; }
255
256	bool Contains(Rune r) const;
257	CharClass* Negate();
258
259	private:
260	CharClass(); // not implemented
261	~CharClass(); // not implemented
262	static CharClass* New(size_t maxranges);
263
264	friend class CharClassBuilder;
265
266	bool folds_ascii_;
267	int nrunes_;
268	RuneRange *ranges_;
269	int nranges_;
270
271	CharClass(const CharClass&) = delete;
272	CharClass& operator=(const CharClass&) = delete;
273	};
274
275	class Regexp {
276	public:
277
278	// Flags for parsing. Can be ORed together.
279	enum ParseFlags {
280	NoParseFlags = `0`,
281	FoldCase = `1`<<`0`, // Fold case during matching (case-insensitive).
282	Literal = `1`<<`1`, // Treat s as literal string instead of a regexp.
283	ClassNL = `1`<<`2`, // Allow char classes like [^a-z] and \D and \s
284	// and [[:space:]] to match newline.
285	DotNL = `1`<<`3`, // Allow . to match newline.
286	MatchNL = ClassNL \| DotNL,
287	OneLine = `1`<<`4`, // Treat ^ and $ as only matching at beginning and
288	// end of text, not around embedded newlines.
289	// (Perl's default)
290	Latin1 = `1`<<`5`, // Regexp and text are in Latin1, not UTF-8.
291	NonGreedy = `1`<<`6`, // Repetition operators are non-greedy by default.
292	PerlClasses = `1`<<`7`, // Allow Perl character classes like \d.
293	PerlB = `1`<<`8`, // Allow Perl's \b and \B.
294	PerlX = `1`<<`9`, // Perl extensions:
295	// non-capturing parens - (?: )
296	// non-greedy operators - ? +? ?? {}?*
297	// flag edits - (?i) (?-i) (?i: )
298	// i - FoldCase
299	// m - !OneLine
300	// s - DotNL
301	// U - NonGreedy
302	// line ends: \A \z
303	// \Q and \E to disable/enable metacharacters
304	// (?P<name>expr) for named captures
305	// \C to match any single byte
306	UnicodeGroups = `1`<<`10`, // Allow \p{Han} for Unicode Han group
307	// and \P{Han} for its negation.
308	NeverNL = `1`<<`11`, // Never match NL, even if the regexp mentions
309	// it explicitly.
310	NeverCapture = `1`<<`12`, // Parse all parens as non-capturing.
311
312	// As close to Perl as we can get.
313	LikePerl = ClassNL \| OneLine \| PerlClasses \| PerlB \| PerlX \|
314	UnicodeGroups,
315
316	// Internal use only.
317	WasDollar = `1`<<`13`, // on kRegexpEndText: was $ in regexp text
318	AllParseFlags = (`1`<<`14`)-`1`,
319	};
320
321	// Get. No set, Regexps are logically immutable once created.
322	RegexpOp op() { return static_cast<RegexpOp>(op_); }
323	int nsub() { return nsub_; }
324	bool simple() { return simple_ != `0`; }
325	ParseFlags parse_flags() { return static_cast<ParseFlags>(parse_flags_); }
326	int Ref(); // For testing.
327
328	Regexp** sub() {
329	if(nsub_ <= `1`)
330	return &subone_;
331	else
332	return submany_;
333	}
334
335	int min() { DCHECK_EQ(op_, kRegexpRepeat); return min_; }
336	int max() { DCHECK_EQ(op_, kRegexpRepeat); return max_; }
337	Rune rune() { DCHECK_EQ(op_, kRegexpLiteral); return rune_; }
338	CharClass* cc() { DCHECK_EQ(op_, kRegexpCharClass); return cc_; }
339	int cap() { DCHECK_EQ(op_, kRegexpCapture); return cap_; }
340	const std::string* name() { DCHECK_EQ(op_, kRegexpCapture); return name_; }
341	Rune* runes() { DCHECK_EQ(op_, kRegexpLiteralString); return runes_; }
342	int nrunes() { DCHECK_EQ(op_, kRegexpLiteralString); return nrunes_; }
343	int match_id() { DCHECK_EQ(op_, kRegexpHaveMatch); return match_id_; }
344
345	// Increments reference count, returns object as convenience.
346	Regexp* Incref();
347
348	// Decrements reference count and deletes this object if count reaches 0.
349	void Decref();
350
351	// Parses string s to produce regular expression, returned.
352	// Caller must release return value with re->Decref().
353	// On failure, sets status (if status != NULL) and returns NULL.*
354	static Regexp* Parse(absl::string_view s, ParseFlags flags,
355	RegexpStatus* status);
356
357	// Returns a _new_ simplified version of the current regexp.
358	// Does not edit the current regexp.
359	// Caller must release return value with re->Decref().
360	// Simplified means that counted repetition has been rewritten
361	// into simpler terms and all Perl/POSIX features have been
362	// removed. The result will capture exactly the same
363	// subexpressions the original did, unless formatted with ToString.
364	Regexp* Simplify();
365	friend class CoalesceWalker;
366	friend class SimplifyWalker;
367
368	// Parses the regexp src and then simplifies it and sets dst to the*
369	// string representation of the simplified form. Returns true on success.
370	// Returns false and sets status (if status != NULL) on parse error.*
371	static bool SimplifyRegexp(absl::string_view src, ParseFlags flags,
372	std::string* dst, RegexpStatus* status);
373
374	// Returns the number of capturing groups in the regexp.
375	int NumCaptures();
376	friend class NumCapturesWalker;
377
378	// Returns a map from names to capturing group indices,
379	// or NULL if the regexp contains no named capture groups.
380	// The caller is responsible for deleting the map.
381	std::map<std::string, int>* NamedCaptures();
382
383	// Returns a map from capturing group indices to capturing group
384	// names or NULL if the regexp contains no named capture groups. The
385	// caller is responsible for deleting the map.
386	std::map<int, std::string>* CaptureNames();
387
388	// Returns a string representation of the current regexp,
389	// using as few parentheses as possible.
390	std::string ToString();
391
392	// Convenience functions. They consume the passed reference,
393	// so in many cases you should use, e.g., Plus(re->Incref(), flags).
394	// They do not consume allocated arrays like subs or runes.
395	static Regexp* Plus(Regexp* sub, ParseFlags flags);
396	static Regexp* Star(Regexp* sub, ParseFlags flags);
397	static Regexp* Quest(Regexp* sub, ParseFlags flags);
398	static Regexp* Concat(Regexp** subs, int nsubs, ParseFlags flags);
399	static Regexp* Alternate(Regexp** subs, int nsubs, ParseFlags flags);
400	static Regexp* Capture(Regexp* sub, ParseFlags flags, int cap);
401	static Regexp* Repeat(Regexp* sub, ParseFlags flags, int min, int max);
402	static Regexp* NewLiteral(Rune rune, ParseFlags flags);
403	static Regexp* NewCharClass(CharClass* cc, ParseFlags flags);
404	static Regexp* LiteralString(Rune* runes, int nrunes, ParseFlags flags);
405	static Regexp* HaveMatch(int match_id, ParseFlags flags);
406
407	// Like Alternate but does not factor out common prefixes.
408	static Regexp* AlternateNoFactor(Regexp** subs, int nsubs, ParseFlags flags);
409
410	// Debugging function. Returns string format for regexp
411	// that makes structure clear. Does NOT use regexp syntax.
412	std::string Dump();
413
414	// Helper traversal class, defined fully in walker-inl.h.
415	template<typename T> class Walker;
416
417	// Compile to Prog. See prog.h
418	// Reverse prog expects to be run over text backward.
419	// Construction and execution of prog will
420	// stay within approximately max_mem bytes of memory.
421	// If max_mem <= 0, a reasonable default is used.
422	Prog* CompileToProg(int64_t max_mem);
423	Prog* CompileToReverseProg(int64_t max_mem);
424
425	// Whether to expect this library to find exactly the same answer as PCRE
426	// when running this regexp. Most regexps do mimic PCRE exactly, but a few
427	// obscure cases behave differently. Technically this is more a property
428	// of the Prog than the Regexp, but the computation is much easier to do
429	// on the Regexp. See mimics_pcre.cc for the exact conditions.
430	bool MimicsPCRE();
431
432	// Benchmarking function.
433	void NullWalk();
434
435	// Whether every match of this regexp must be anchored and
436	// begin with a non-empty fixed string (perhaps after ASCII
437	// case-folding). If so, returns the prefix and the sub-regexp that
438	// follows it.
439	// Callers should expect prefix, foldcase and suffix to be "zeroed"*
440	// regardless of the return value.
441	bool RequiredPrefix(std::string* prefix, bool* foldcase,
442	Regexp** suffix);
443
444	// Whether every match of this regexp must be unanchored and
445	// begin with a non-empty fixed string (perhaps after ASCII
446	// case-folding). If so, returns the prefix.
447	// Callers should expect prefix and foldcase to be "zeroed"
448	// regardless of the return value.
449	bool RequiredPrefixForAccel(std::string* prefix, bool* foldcase);
450
451	// Controls the maximum repeat count permitted by the parser.
452	// FOR FUZZING ONLY.
453	static void FUZZING_ONLY_set_maximum_repeat_count(int i);
454
455	private:
456	// Constructor allocates vectors as appropriate for operator.
457	explicit Regexp(RegexpOp op, ParseFlags parse_flags);
458
459	// Use Decref() instead of delete to release Regexps.
460	// This is private to catch deletes at compile time.
461	~Regexp();
462	void Destroy();
463	bool QuickDestroy();
464
465	// Helpers for Parse. Listed here so they can edit Regexps.
466	class ParseState;
467
468	friend class ParseState;
469	friend bool ParseCharClass(absl::string_view* s, Regexp** out_re,
470	RegexpStatus* status);
471
472	// Helper for testing [sic].
473	friend bool RegexpEqualTestingOnly(Regexp, Regexp);
474
475	// Computes whether Regexp is already simple.
476	bool ComputeSimple();
477
478	// Constructor that generates a Star, Plus or Quest,
479	// squashing the pair if sub is also a Star, Plus or Quest.
480	static Regexp* StarPlusOrQuest(RegexpOp op, Regexp* sub, ParseFlags flags);
481
482	// Constructor that generates a concatenation or alternation,
483	// enforcing the limit on the number of subexpressions for
484	// a particular Regexp.
485	static Regexp* ConcatOrAlternate(RegexpOp op, Regexp** subs, int nsubs,
486	ParseFlags flags, bool can_factor);
487
488	// Returns the leading string that re starts with.
489	// The returned Rune points into a piece of re,*
490	// so it must not be used after the caller calls re->Decref().
491	static Rune* LeadingString(Regexp* re, int* nrune, ParseFlags* flags);
492
493	// Removes the first n leading runes from the beginning of re.
494	// Edits re in place.
495	static void RemoveLeadingString(Regexp* re, int n);
496
497	// Returns the leading regexp in re's top-level concatenation.
498	// The returned Regexp points at re or a sub-expression of re,*
499	// so it must not be used after the caller calls re->Decref().
500	static Regexp* LeadingRegexp(Regexp* re);
501
502	// Removes LeadingRegexp(re) from re and returns the remainder.
503	// Might edit re in place.
504	static Regexp* RemoveLeadingRegexp(Regexp* re);
505
506	// Simplifies an alternation of literal strings by factoring out
507	// common prefixes.
508	static int FactorAlternation(Regexp** sub, int nsub, ParseFlags flags);
509	friend class FactorAlternationImpl;
510
511	// Is a == b? Only efficient on regexps that have not been through
512	// Simplify yet - the expansion of a kRegexpRepeat will make this
513	// take a long time. Do not call on such regexps, hence private.
514	static bool Equal(Regexp* a, Regexp* b);
515
516	// Allocate space for n sub-regexps.
517	void AllocSub(int n) {
518	DCHECK(n >= `0` && static_cast<uint16_t>(n) == n);
519	if (n > `1`)
520	submany_ = new Regexp*[n];
521	nsub_ = static_cast<uint16_t>(n);
522	}
523
524	// Add Rune to LiteralString
525	void AddRuneToString(Rune r);
526
527	// Swaps this with that, in place.
528	void Swap(Regexp *that);
529
530	// Operator. See description of operators above.
531	// uint8_t instead of RegexpOp to control space usage.
532	uint8_t op_;
533
534	// Is this regexp structure already simple
535	// (has it been returned by Simplify)?
536	// uint8_t instead of bool to control space usage.
537	uint8_t simple_;
538
539	// Flags saved from parsing and used during execution.
540	// (Only FoldCase is used.)
541	// uint16_t instead of ParseFlags to control space usage.
542	uint16_t parse_flags_;
543
544	// Reference count. Exists so that SimplifyRegexp can build
545	// regexp structures that are dags rather than trees to avoid
546	// exponential blowup in space requirements.
547	// uint16_t to control space usage.
548	// The standard regexp routines will never generate a
549	// ref greater than the maximum repeat count (kMaxRepeat),
550	// but even so, Incref and Decref consult an overflow map
551	// when ref_ reaches kMaxRef.
552	uint16_t ref_;
553	static const uint16_t kMaxRef = `0xffff`;
554
555	// Subexpressions.
556	// uint16_t to control space usage.
557	// Concat and Alternate handle larger numbers of subexpressions
558	// by building concatenation or alternation trees.
559	// Other routines should call Concat or Alternate instead of
560	// filling in sub() by hand.
561	uint16_t nsub_;
562	static const uint16_t kMaxNsub = `0xffff`;
563	union {
564	Regexp** submany_; // if nsub_ > 1
565	Regexp* subone_; // if nsub_ == 1
566	};
567
568	// Extra space for parse and teardown stacks.
569	Regexp* down_;
570
571	// Arguments to operator. See description of operators above.
572	union {
573	struct { // Repeat
574	int max_;
575	int min_;
576	};
577	struct { // Capture
578	int cap_;
579	std::string* name_;
580	};
581	struct { // LiteralString
582	int nrunes_;
583	Rune* runes_;
584	};
585	struct { // CharClass
586	// These two could be in separate union members,
587	// but it wouldn't save any space (there are other two-word structs)
588	// and keeping them separate avoids confusion during parsing.
589	CharClass* cc_;
590	CharClassBuilder* ccb_;
591	};
592	Rune rune_; // Literal
593	int match_id_; // HaveMatch
594	void the_union_[`2`]; // as big as any other element, for memset*
595	};
596
597	Regexp(const Regexp&) = delete;
598	Regexp& operator=(const Regexp&) = delete;
599	};
600
601	// Character class set: contains non-overlapping, non-abutting RuneRanges.
602	typedef std::set<RuneRange, RuneRangeLess> RuneRangeSet;
603
604	class CharClassBuilder {
605	public:
606	CharClassBuilder();
607
608	typedef RuneRangeSet::iterator iterator;
609	iterator begin() { return ranges_.begin(); }
610	iterator end() { return ranges_.end(); }
611
612	int size() { return nrunes_; }
613	bool empty() { return nrunes_ == `0`; }
614	bool full() { return nrunes_ == Runemax+`1`; }
615
616	bool Contains(Rune r);
617	bool FoldsASCII();
618	bool AddRange(Rune lo, Rune hi); // returns whether class changed
619	CharClassBuilder* Copy();
620	void AddCharClass(CharClassBuilder* cc);
621	void Negate();
622	void RemoveAbove(Rune r);
623	CharClass* GetCharClass();
624	void AddRangeFlags(Rune lo, Rune hi, Regexp::ParseFlags parse_flags);
625
626	private:
627	static const uint32_t AlphaMask = (`1`<<`26`) - `1`;
628	uint32_t upper_; // bitmap of A-Z
629	uint32_t lower_; // bitmap of a-z
630	int nrunes_;
631	RuneRangeSet ranges_;
632
633	CharClassBuilder(const CharClassBuilder&) = delete;
634	CharClassBuilder& operator=(const CharClassBuilder&) = delete;
635	};
636
637	// Bitwise ops on ParseFlags produce ParseFlags.
638	inline Regexp::ParseFlags operator\|(Regexp::ParseFlags a,
639	Regexp::ParseFlags b) {
640	return static_cast<Regexp::ParseFlags>(
641	static_cast<int>(a) \| static_cast<int>(b));
642	}
643
644	inline Regexp::ParseFlags operator^(Regexp::ParseFlags a,
645	Regexp::ParseFlags b) {
646	return static_cast<Regexp::ParseFlags>(
647	static_cast<int>(a) ^ static_cast<int>(b));
648	}
649
650	inline Regexp::ParseFlags operator&(Regexp::ParseFlags a,
651	Regexp::ParseFlags b) {
652	return static_cast<Regexp::ParseFlags>(
653	static_cast<int>(a) & static_cast<int>(b));
654	}
655
656	inline Regexp::ParseFlags operator~(Regexp::ParseFlags a) {
657	// Attempting to produce a value out of enum's range has undefined behaviour.
658	return static_cast<Regexp::ParseFlags>(
659	~static_cast<int>(a) & static_cast<int>(Regexp::AllParseFlags));
660	}
661
662	} // namespace re2
663
664	#endif // RE2_REGEXP_H_
665

Browse the source code of tensorflow/external/com_googlesource_code_re2/re2/regexp.h