1// Copyright 2010 The RE2 Authors. All Rights Reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5#include "re2/set.h"
6
7#include <stddef.h>
8#include <algorithm>
9#include <memory>
10#include <utility>
11
12#include "util/logging.h"
13#include "re2/pod_array.h"
14#include "re2/prog.h"
15#include "re2/re2.h"
16#include "re2/regexp.h"
17
18namespace re2 {
19
20RE2::Set::Set(const RE2::Options& options, RE2::Anchor anchor)
21 : options_(options),
22 anchor_(anchor),
23 compiled_(false),
24 size_(0) {
25 options_.set_never_capture(true); // might unblock some optimisations
26}
27
28RE2::Set::~Set() {
29 for (size_t i = 0; i < elem_.size(); i++)
30 elem_[i].second->Decref();
31}
32
33RE2::Set::Set(Set&& other)
34 : options_(other.options_),
35 anchor_(other.anchor_),
36 elem_(std::move(other.elem_)),
37 compiled_(other.compiled_),
38 size_(other.size_),
39 prog_(std::move(other.prog_)) {
40 other.elem_.clear();
41 other.elem_.shrink_to_fit();
42 other.compiled_ = false;
43 other.size_ = 0;
44 other.prog_.reset();
45}
46
47RE2::Set& RE2::Set::operator=(Set&& other) {
48 this->~Set();
49 (void) new (this) Set(std::move(other));
50 return *this;
51}
52
53int RE2::Set::Add(absl::string_view pattern, std::string* error) {
54 if (compiled_) {
55 LOG(DFATAL) << "RE2::Set::Add() called after compiling";
56 return -1;
57 }
58
59 Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(
60 options_.ParseFlags());
61 RegexpStatus status;
62 re2::Regexp* re = Regexp::Parse(pattern, pf, &status);
63 if (re == NULL) {
64 if (error != NULL)
65 *error = status.Text();
66 if (options_.log_errors())
67 LOG(ERROR) << "Error parsing '" << pattern << "': " << status.Text();
68 return -1;
69 }
70
71 // Concatenate with match index and push on vector.
72 int n = static_cast<int>(elem_.size());
73 re2::Regexp* m = re2::Regexp::HaveMatch(n, pf);
74 if (re->op() == kRegexpConcat) {
75 int nsub = re->nsub();
76 PODArray<re2::Regexp*> sub(nsub + 1);
77 for (int i = 0; i < nsub; i++)
78 sub[i] = re->sub()[i]->Incref();
79 sub[nsub] = m;
80 re->Decref();
81 re = re2::Regexp::Concat(sub.data(), nsub + 1, pf);
82 } else {
83 re2::Regexp* sub[2];
84 sub[0] = re;
85 sub[1] = m;
86 re = re2::Regexp::Concat(sub, 2, pf);
87 }
88 elem_.emplace_back(std::string(pattern), re);
89 return n;
90}
91
92bool RE2::Set::Compile() {
93 if (compiled_) {
94 LOG(DFATAL) << "RE2::Set::Compile() called more than once";
95 return false;
96 }
97 compiled_ = true;
98 size_ = static_cast<int>(elem_.size());
99
100 // Sort the elements by their patterns. This is good enough for now
101 // until we have a Regexp comparison function. (Maybe someday...)
102 std::sort(elem_.begin(), elem_.end(),
103 [](const Elem& a, const Elem& b) -> bool {
104 return a.first < b.first;
105 });
106
107 PODArray<re2::Regexp*> sub(size_);
108 for (int i = 0; i < size_; i++)
109 sub[i] = elem_[i].second;
110 elem_.clear();
111 elem_.shrink_to_fit();
112
113 Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(
114 options_.ParseFlags());
115 re2::Regexp* re = re2::Regexp::Alternate(sub.data(), size_, pf);
116
117 prog_.reset(Prog::CompileSet(re, anchor_, options_.max_mem()));
118 re->Decref();
119 return prog_ != nullptr;
120}
121
122bool RE2::Set::Match(absl::string_view text, std::vector<int>* v) const {
123 return Match(text, v, NULL);
124}
125
126bool RE2::Set::Match(absl::string_view text, std::vector<int>* v,
127 ErrorInfo* error_info) const {
128 if (!compiled_) {
129 LOG(DFATAL) << "RE2::Set::Match() called before compiling";
130 if (error_info != NULL)
131 error_info->kind = kNotCompiled;
132 return false;
133 }
134#ifdef RE2_HAVE_THREAD_LOCAL
135 hooks::context = NULL;
136#endif
137 bool dfa_failed = false;
138 std::unique_ptr<SparseSet> matches;
139 if (v != NULL) {
140 matches.reset(new SparseSet(size_));
141 v->clear();
142 }
143 bool ret = prog_->SearchDFA(text, text, Prog::kAnchored, Prog::kManyMatch,
144 NULL, &dfa_failed, matches.get());
145 if (dfa_failed) {
146 if (options_.log_errors())
147 LOG(ERROR) << "DFA out of memory: "
148 << "program size " << prog_->size() << ", "
149 << "list count " << prog_->list_count() << ", "
150 << "bytemap range " << prog_->bytemap_range();
151 if (error_info != NULL)
152 error_info->kind = kOutOfMemory;
153 return false;
154 }
155 if (ret == false) {
156 if (error_info != NULL)
157 error_info->kind = kNoError;
158 return false;
159 }
160 if (v != NULL) {
161 if (matches->empty()) {
162 LOG(DFATAL) << "RE2::Set::Match() matched, but no matches returned?!";
163 if (error_info != NULL)
164 error_info->kind = kInconsistent;
165 return false;
166 }
167 v->assign(matches->begin(), matches->end());
168 }
169 if (error_info != NULL)
170 error_info->kind = kNoError;
171 return true;
172}
173
174} // namespace re2
175