1 | // Copyright 2010 The RE2 Authors. All Rights Reserved. |
2 | // Use of this source code is governed by a BSD-style |
3 | // license that can be found in the LICENSE file. |
4 | |
5 | #include "re2/set.h" |
6 | |
7 | #include <stddef.h> |
8 | #include <algorithm> |
9 | #include <memory> |
10 | #include <utility> |
11 | |
12 | #include "util/logging.h" |
13 | #include "re2/pod_array.h" |
14 | #include "re2/prog.h" |
15 | #include "re2/re2.h" |
16 | #include "re2/regexp.h" |
17 | |
18 | namespace re2 { |
19 | |
20 | RE2::Set::Set(const RE2::Options& options, RE2::Anchor anchor) |
21 | : options_(options), |
22 | anchor_(anchor), |
23 | compiled_(false), |
24 | size_(0) { |
25 | options_.set_never_capture(true); // might unblock some optimisations |
26 | } |
27 | |
28 | RE2::Set::~Set() { |
29 | for (size_t i = 0; i < elem_.size(); i++) |
30 | elem_[i].second->Decref(); |
31 | } |
32 | |
33 | RE2::Set::Set(Set&& other) |
34 | : options_(other.options_), |
35 | anchor_(other.anchor_), |
36 | elem_(std::move(other.elem_)), |
37 | compiled_(other.compiled_), |
38 | size_(other.size_), |
39 | prog_(std::move(other.prog_)) { |
40 | other.elem_.clear(); |
41 | other.elem_.shrink_to_fit(); |
42 | other.compiled_ = false; |
43 | other.size_ = 0; |
44 | other.prog_.reset(); |
45 | } |
46 | |
47 | RE2::Set& RE2::Set::operator=(Set&& other) { |
48 | this->~Set(); |
49 | (void) new (this) Set(std::move(other)); |
50 | return *this; |
51 | } |
52 | |
53 | int RE2::Set::Add(absl::string_view pattern, std::string* error) { |
54 | if (compiled_) { |
55 | LOG(DFATAL) << "RE2::Set::Add() called after compiling" ; |
56 | return -1; |
57 | } |
58 | |
59 | Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>( |
60 | options_.ParseFlags()); |
61 | RegexpStatus status; |
62 | re2::Regexp* re = Regexp::Parse(pattern, pf, &status); |
63 | if (re == NULL) { |
64 | if (error != NULL) |
65 | *error = status.Text(); |
66 | if (options_.log_errors()) |
67 | LOG(ERROR) << "Error parsing '" << pattern << "': " << status.Text(); |
68 | return -1; |
69 | } |
70 | |
71 | // Concatenate with match index and push on vector. |
72 | int n = static_cast<int>(elem_.size()); |
73 | re2::Regexp* m = re2::Regexp::HaveMatch(n, pf); |
74 | if (re->op() == kRegexpConcat) { |
75 | int nsub = re->nsub(); |
76 | PODArray<re2::Regexp*> sub(nsub + 1); |
77 | for (int i = 0; i < nsub; i++) |
78 | sub[i] = re->sub()[i]->Incref(); |
79 | sub[nsub] = m; |
80 | re->Decref(); |
81 | re = re2::Regexp::Concat(sub.data(), nsub + 1, pf); |
82 | } else { |
83 | re2::Regexp* sub[2]; |
84 | sub[0] = re; |
85 | sub[1] = m; |
86 | re = re2::Regexp::Concat(sub, 2, pf); |
87 | } |
88 | elem_.emplace_back(std::string(pattern), re); |
89 | return n; |
90 | } |
91 | |
92 | bool RE2::Set::Compile() { |
93 | if (compiled_) { |
94 | LOG(DFATAL) << "RE2::Set::Compile() called more than once" ; |
95 | return false; |
96 | } |
97 | compiled_ = true; |
98 | size_ = static_cast<int>(elem_.size()); |
99 | |
100 | // Sort the elements by their patterns. This is good enough for now |
101 | // until we have a Regexp comparison function. (Maybe someday...) |
102 | std::sort(elem_.begin(), elem_.end(), |
103 | [](const Elem& a, const Elem& b) -> bool { |
104 | return a.first < b.first; |
105 | }); |
106 | |
107 | PODArray<re2::Regexp*> sub(size_); |
108 | for (int i = 0; i < size_; i++) |
109 | sub[i] = elem_[i].second; |
110 | elem_.clear(); |
111 | elem_.shrink_to_fit(); |
112 | |
113 | Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>( |
114 | options_.ParseFlags()); |
115 | re2::Regexp* re = re2::Regexp::Alternate(sub.data(), size_, pf); |
116 | |
117 | prog_.reset(Prog::CompileSet(re, anchor_, options_.max_mem())); |
118 | re->Decref(); |
119 | return prog_ != nullptr; |
120 | } |
121 | |
122 | bool RE2::Set::Match(absl::string_view text, std::vector<int>* v) const { |
123 | return Match(text, v, NULL); |
124 | } |
125 | |
126 | bool RE2::Set::Match(absl::string_view text, std::vector<int>* v, |
127 | ErrorInfo* error_info) const { |
128 | if (!compiled_) { |
129 | LOG(DFATAL) << "RE2::Set::Match() called before compiling" ; |
130 | if (error_info != NULL) |
131 | error_info->kind = kNotCompiled; |
132 | return false; |
133 | } |
134 | #ifdef RE2_HAVE_THREAD_LOCAL |
135 | hooks::context = NULL; |
136 | #endif |
137 | bool dfa_failed = false; |
138 | std::unique_ptr<SparseSet> matches; |
139 | if (v != NULL) { |
140 | matches.reset(new SparseSet(size_)); |
141 | v->clear(); |
142 | } |
143 | bool ret = prog_->SearchDFA(text, text, Prog::kAnchored, Prog::kManyMatch, |
144 | NULL, &dfa_failed, matches.get()); |
145 | if (dfa_failed) { |
146 | if (options_.log_errors()) |
147 | LOG(ERROR) << "DFA out of memory: " |
148 | << "program size " << prog_->size() << ", " |
149 | << "list count " << prog_->list_count() << ", " |
150 | << "bytemap range " << prog_->bytemap_range(); |
151 | if (error_info != NULL) |
152 | error_info->kind = kOutOfMemory; |
153 | return false; |
154 | } |
155 | if (ret == false) { |
156 | if (error_info != NULL) |
157 | error_info->kind = kNoError; |
158 | return false; |
159 | } |
160 | if (v != NULL) { |
161 | if (matches->empty()) { |
162 | LOG(DFATAL) << "RE2::Set::Match() matched, but no matches returned?!" ; |
163 | if (error_info != NULL) |
164 | error_info->kind = kInconsistent; |
165 | return false; |
166 | } |
167 | v->assign(matches->begin(), matches->end()); |
168 | } |
169 | if (error_info != NULL) |
170 | error_info->kind = kNoError; |
171 | return true; |
172 | } |
173 | |
174 | } // namespace re2 |
175 | |