1 | // Copyright 2006-2007 The RE2 Authors. All Rights Reserved. |
2 | // Use of this source code is governed by a BSD-style |
3 | // license that can be found in the LICENSE file. |
4 | |
5 | // Tested by search_test.cc. |
6 | // |
7 | // Prog::SearchNFA, an NFA search. |
8 | // This is an actual NFA like the theorists talk about, |
9 | // not the pseudo-NFA found in backtracking regexp implementations. |
10 | // |
11 | // IMPLEMENTATION |
12 | // |
13 | // This algorithm is a variant of one that appeared in Rob Pike's sam editor, |
14 | // which is a variant of the one described in Thompson's 1968 CACM paper. |
15 | // See http://swtch.com/~rsc/regexp/ for various history. The main feature |
16 | // over the DFA implementation is that it tracks submatch boundaries. |
17 | // |
18 | // When the choice of submatch boundaries is ambiguous, this particular |
19 | // implementation makes the same choices that traditional backtracking |
20 | // implementations (in particular, Perl and PCRE) do. |
21 | // Note that unlike in Perl and PCRE, this algorithm *cannot* take exponential |
22 | // time in the length of the input. |
23 | // |
24 | // Like Thompson's original machine and like the DFA implementation, this |
25 | // implementation notices a match only once it is one byte past it. |
26 | |
27 | #include <stdio.h> |
28 | #include <string.h> |
29 | #include <algorithm> |
30 | #include <deque> |
31 | #include <string> |
32 | #include <utility> |
33 | #include <vector> |
34 | |
35 | #include "absl/strings/str_format.h" |
36 | #include "util/logging.h" |
37 | #include "re2/pod_array.h" |
38 | #include "re2/prog.h" |
39 | #include "re2/regexp.h" |
40 | #include "re2/sparse_array.h" |
41 | #include "re2/sparse_set.h" |
42 | |
43 | namespace re2 { |
44 | |
45 | static const bool = false; |
46 | |
47 | class NFA { |
48 | public: |
49 | NFA(Prog* prog); |
50 | ~NFA(); |
51 | |
52 | // Searches for a matching string. |
53 | // * If anchored is true, only considers matches starting at offset. |
54 | // Otherwise finds lefmost match at or after offset. |
55 | // * If longest is true, returns the longest match starting |
56 | // at the chosen start point. Otherwise returns the so-called |
57 | // left-biased match, the one traditional backtracking engines |
58 | // (like Perl and PCRE) find. |
59 | // Records submatch boundaries in submatch[1..nsubmatch-1]. |
60 | // Submatch[0] is the entire match. When there is a choice in |
61 | // which text matches each subexpression, the submatch boundaries |
62 | // are chosen to match what a backtracking implementation would choose. |
63 | bool Search(absl::string_view text, absl::string_view context, bool anchored, |
64 | bool longest, absl::string_view* submatch, int nsubmatch); |
65 | |
66 | private: |
67 | struct Thread { |
68 | union { |
69 | int ref; |
70 | Thread* next; // when on free list |
71 | }; |
72 | const char** capture; |
73 | }; |
74 | |
75 | // State for explicit stack in AddToThreadq. |
76 | struct AddState { |
77 | int id; // Inst to process |
78 | Thread* t; // if not null, set t0 = t before processing id |
79 | }; |
80 | |
81 | // Threadq is a list of threads. The list is sorted by the order |
82 | // in which Perl would explore that particular state -- the earlier |
83 | // choices appear earlier in the list. |
84 | typedef SparseArray<Thread*> Threadq; |
85 | |
86 | inline Thread* AllocThread(); |
87 | inline Thread* Incref(Thread* t); |
88 | inline void Decref(Thread* t); |
89 | |
90 | // Follows all empty arrows from id0 and enqueues all the states reached. |
91 | // Enqueues only the ByteRange instructions that match byte c. |
92 | // context is used (with p) for evaluating empty-width specials. |
93 | // p is the current input position, and t0 is the current thread. |
94 | void AddToThreadq(Threadq* q, int id0, int c, absl::string_view context, |
95 | const char* p, Thread* t0); |
96 | |
97 | // Run runq on byte c, appending new states to nextq. |
98 | // Updates matched_ and match_ as new, better matches are found. |
99 | // context is used (with p) for evaluating empty-width specials. |
100 | // p is the position of byte c in the input string for AddToThreadq; |
101 | // p-1 will be used when processing Match instructions. |
102 | // Frees all the threads on runq. |
103 | // If there is a shortcut to the end, returns that shortcut. |
104 | int Step(Threadq* runq, Threadq* nextq, int c, absl::string_view context, |
105 | const char* p); |
106 | |
107 | // Returns text version of capture information, for debugging. |
108 | std::string FormatCapture(const char** capture); |
109 | |
110 | void CopyCapture(const char** dst, const char** src) { |
111 | memmove(dst, src, ncapture_*sizeof src[0]); |
112 | } |
113 | |
114 | Prog* prog_; // underlying program |
115 | int start_; // start instruction in program |
116 | int ncapture_; // number of submatches to track |
117 | bool longest_; // whether searching for longest match |
118 | bool endmatch_; // whether match must end at text.end() |
119 | const char* btext_; // beginning of text (for FormatSubmatch) |
120 | const char* etext_; // end of text (for endmatch_) |
121 | Threadq q0_, q1_; // pre-allocated for Search. |
122 | PODArray<AddState> stack_; // pre-allocated for AddToThreadq |
123 | std::deque<Thread> arena_; // thread arena |
124 | Thread* freelist_; // thread freelist |
125 | const char** match_; // best match so far |
126 | bool matched_; // any match so far? |
127 | |
128 | NFA(const NFA&) = delete; |
129 | NFA& operator=(const NFA&) = delete; |
130 | }; |
131 | |
132 | NFA::NFA(Prog* prog) { |
133 | prog_ = prog; |
134 | start_ = prog_->start(); |
135 | ncapture_ = 0; |
136 | longest_ = false; |
137 | endmatch_ = false; |
138 | btext_ = NULL; |
139 | etext_ = NULL; |
140 | q0_.resize(prog_->size()); |
141 | q1_.resize(prog_->size()); |
142 | // See NFA::AddToThreadq() for why this is so. |
143 | int nstack = 2*prog_->inst_count(kInstCapture) + |
144 | prog_->inst_count(kInstEmptyWidth) + |
145 | prog_->inst_count(kInstNop) + 1; // + 1 for start inst |
146 | stack_ = PODArray<AddState>(nstack); |
147 | freelist_ = NULL; |
148 | match_ = NULL; |
149 | matched_ = false; |
150 | } |
151 | |
152 | NFA::~NFA() { |
153 | delete[] match_; |
154 | for (const Thread& t : arena_) |
155 | delete[] t.capture; |
156 | } |
157 | |
158 | NFA::Thread* NFA::AllocThread() { |
159 | Thread* t = freelist_; |
160 | if (t != NULL) { |
161 | freelist_ = t->next; |
162 | t->ref = 1; |
163 | // We don't need to touch t->capture because |
164 | // the caller will immediately overwrite it. |
165 | return t; |
166 | } |
167 | arena_.emplace_back(); |
168 | t = &arena_.back(); |
169 | t->ref = 1; |
170 | t->capture = new const char*[ncapture_]; |
171 | return t; |
172 | } |
173 | |
174 | NFA::Thread* NFA::Incref(Thread* t) { |
175 | DCHECK(t != NULL); |
176 | t->ref++; |
177 | return t; |
178 | } |
179 | |
180 | void NFA::Decref(Thread* t) { |
181 | DCHECK(t != NULL); |
182 | t->ref--; |
183 | if (t->ref > 0) |
184 | return; |
185 | DCHECK_EQ(t->ref, 0); |
186 | t->next = freelist_; |
187 | freelist_ = t; |
188 | } |
189 | |
190 | // Follows all empty arrows from id0 and enqueues all the states reached. |
191 | // Enqueues only the ByteRange instructions that match byte c. |
192 | // context is used (with p) for evaluating empty-width specials. |
193 | // p is the current input position, and t0 is the current thread. |
194 | void NFA::AddToThreadq(Threadq* q, int id0, int c, absl::string_view context, |
195 | const char* p, Thread* t0) { |
196 | if (id0 == 0) |
197 | return; |
198 | |
199 | // Use stack_ to hold our stack of instructions yet to process. |
200 | // It was preallocated as follows: |
201 | // two entries per Capture; |
202 | // one entry per EmptyWidth; and |
203 | // one entry per Nop. |
204 | // This reflects the maximum number of stack pushes that each can |
205 | // perform. (Each instruction can be processed at most once.) |
206 | AddState* stk = stack_.data(); |
207 | int nstk = 0; |
208 | |
209 | stk[nstk++] = {id0, NULL}; |
210 | while (nstk > 0) { |
211 | DCHECK_LE(nstk, stack_.size()); |
212 | AddState a = stk[--nstk]; |
213 | |
214 | Loop: |
215 | if (a.t != NULL) { |
216 | // t0 was a thread that we allocated and copied in order to |
217 | // record the capture, so we must now decref it. |
218 | Decref(t0); |
219 | t0 = a.t; |
220 | } |
221 | |
222 | int id = a.id; |
223 | if (id == 0) |
224 | continue; |
225 | if (q->has_index(id)) { |
226 | if (ExtraDebug) |
227 | absl::FPrintF(stderr, " [%d%s]\n" , id, FormatCapture(t0->capture)); |
228 | continue; |
229 | } |
230 | |
231 | // Create entry in q no matter what. We might fill it in below, |
232 | // or we might not. Even if not, it is necessary to have it, |
233 | // so that we don't revisit id0 during the recursion. |
234 | q->set_new(id, NULL); |
235 | Thread** tp = &q->get_existing(id); |
236 | int j; |
237 | Thread* t; |
238 | Prog::Inst* ip = prog_->inst(id); |
239 | switch (ip->opcode()) { |
240 | default: |
241 | LOG(DFATAL) << "unhandled " << ip->opcode() << " in AddToThreadq" ; |
242 | break; |
243 | |
244 | case kInstFail: |
245 | break; |
246 | |
247 | case kInstAltMatch: |
248 | // Save state; will pick up at next byte. |
249 | t = Incref(t0); |
250 | *tp = t; |
251 | |
252 | DCHECK(!ip->last()); |
253 | a = {id+1, NULL}; |
254 | goto Loop; |
255 | |
256 | case kInstNop: |
257 | if (!ip->last()) |
258 | stk[nstk++] = {id+1, NULL}; |
259 | |
260 | // Continue on. |
261 | a = {ip->out(), NULL}; |
262 | goto Loop; |
263 | |
264 | case kInstCapture: |
265 | if (!ip->last()) |
266 | stk[nstk++] = {id+1, NULL}; |
267 | |
268 | if ((j=ip->cap()) < ncapture_) { |
269 | // Push a dummy whose only job is to restore t0 |
270 | // once we finish exploring this possibility. |
271 | stk[nstk++] = {0, t0}; |
272 | |
273 | // Record capture. |
274 | t = AllocThread(); |
275 | CopyCapture(t->capture, t0->capture); |
276 | t->capture[j] = p; |
277 | t0 = t; |
278 | } |
279 | a = {ip->out(), NULL}; |
280 | goto Loop; |
281 | |
282 | case kInstByteRange: |
283 | if (!ip->Matches(c)) |
284 | goto Next; |
285 | |
286 | // Save state; will pick up at next byte. |
287 | t = Incref(t0); |
288 | *tp = t; |
289 | if (ExtraDebug) |
290 | absl::FPrintF(stderr, " + %d%s\n" , id, FormatCapture(t0->capture)); |
291 | |
292 | if (ip->hint() == 0) |
293 | break; |
294 | a = {id+ip->hint(), NULL}; |
295 | goto Loop; |
296 | |
297 | case kInstMatch: |
298 | // Save state; will pick up at next byte. |
299 | t = Incref(t0); |
300 | *tp = t; |
301 | if (ExtraDebug) |
302 | absl::FPrintF(stderr, " ! %d%s\n" , id, FormatCapture(t0->capture)); |
303 | |
304 | Next: |
305 | if (ip->last()) |
306 | break; |
307 | a = {id+1, NULL}; |
308 | goto Loop; |
309 | |
310 | case kInstEmptyWidth: |
311 | if (!ip->last()) |
312 | stk[nstk++] = {id+1, NULL}; |
313 | |
314 | // Continue on if we have all the right flag bits. |
315 | if (ip->empty() & ~Prog::EmptyFlags(context, p)) |
316 | break; |
317 | a = {ip->out(), NULL}; |
318 | goto Loop; |
319 | } |
320 | } |
321 | } |
322 | |
323 | // Run runq on byte c, appending new states to nextq. |
324 | // Updates matched_ and match_ as new, better matches are found. |
325 | // context is used (with p) for evaluating empty-width specials. |
326 | // p is the position of byte c in the input string for AddToThreadq; |
327 | // p-1 will be used when processing Match instructions. |
328 | // Frees all the threads on runq. |
329 | // If there is a shortcut to the end, returns that shortcut. |
330 | int NFA::Step(Threadq* runq, Threadq* nextq, int c, absl::string_view context, |
331 | const char* p) { |
332 | nextq->clear(); |
333 | |
334 | for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) { |
335 | Thread* t = i->value(); |
336 | if (t == NULL) |
337 | continue; |
338 | |
339 | if (longest_) { |
340 | // Can skip any threads started after our current best match. |
341 | if (matched_ && match_[0] < t->capture[0]) { |
342 | Decref(t); |
343 | continue; |
344 | } |
345 | } |
346 | |
347 | int id = i->index(); |
348 | Prog::Inst* ip = prog_->inst(id); |
349 | |
350 | switch (ip->opcode()) { |
351 | default: |
352 | // Should only see the values handled below. |
353 | LOG(DFATAL) << "Unhandled " << ip->opcode() << " in step" ; |
354 | break; |
355 | |
356 | case kInstByteRange: |
357 | AddToThreadq(nextq, ip->out(), c, context, p, t); |
358 | break; |
359 | |
360 | case kInstAltMatch: |
361 | if (i != runq->begin()) |
362 | break; |
363 | // The match is ours if we want it. |
364 | if (ip->greedy(prog_) || longest_) { |
365 | CopyCapture(match_, t->capture); |
366 | matched_ = true; |
367 | |
368 | Decref(t); |
369 | for (++i; i != runq->end(); ++i) { |
370 | if (i->value() != NULL) |
371 | Decref(i->value()); |
372 | } |
373 | runq->clear(); |
374 | if (ip->greedy(prog_)) |
375 | return ip->out1(); |
376 | return ip->out(); |
377 | } |
378 | break; |
379 | |
380 | case kInstMatch: { |
381 | // Avoid invoking undefined behavior (arithmetic on a null pointer) |
382 | // by storing p instead of p-1. (What would the latter even mean?!) |
383 | // This complements the special case in NFA::Search(). |
384 | if (p == NULL) { |
385 | CopyCapture(match_, t->capture); |
386 | match_[1] = p; |
387 | matched_ = true; |
388 | break; |
389 | } |
390 | |
391 | if (endmatch_ && p-1 != etext_) |
392 | break; |
393 | |
394 | if (longest_) { |
395 | // Leftmost-longest mode: save this match only if |
396 | // it is either farther to the left or at the same |
397 | // point but longer than an existing match. |
398 | if (!matched_ || t->capture[0] < match_[0] || |
399 | (t->capture[0] == match_[0] && p-1 > match_[1])) { |
400 | CopyCapture(match_, t->capture); |
401 | match_[1] = p-1; |
402 | matched_ = true; |
403 | } |
404 | } else { |
405 | // Leftmost-biased mode: this match is by definition |
406 | // better than what we've already found (see next line). |
407 | CopyCapture(match_, t->capture); |
408 | match_[1] = p-1; |
409 | matched_ = true; |
410 | |
411 | // Cut off the threads that can only find matches |
412 | // worse than the one we just found: don't run the |
413 | // rest of the current Threadq. |
414 | Decref(t); |
415 | for (++i; i != runq->end(); ++i) { |
416 | if (i->value() != NULL) |
417 | Decref(i->value()); |
418 | } |
419 | runq->clear(); |
420 | return 0; |
421 | } |
422 | break; |
423 | } |
424 | } |
425 | Decref(t); |
426 | } |
427 | runq->clear(); |
428 | return 0; |
429 | } |
430 | |
431 | std::string NFA::FormatCapture(const char** capture) { |
432 | std::string s; |
433 | for (int i = 0; i < ncapture_; i+=2) { |
434 | if (capture[i] == NULL) |
435 | s += "(?,?)" ; |
436 | else if (capture[i+1] == NULL) |
437 | s += absl::StrFormat("(%d,?)" , |
438 | capture[i] - btext_); |
439 | else |
440 | s += absl::StrFormat("(%d,%d)" , |
441 | capture[i] - btext_, |
442 | capture[i+1] - btext_); |
443 | } |
444 | return s; |
445 | } |
446 | |
447 | bool NFA::Search(absl::string_view text, absl::string_view context, |
448 | bool anchored, bool longest, absl::string_view* submatch, |
449 | int nsubmatch) { |
450 | if (start_ == 0) |
451 | return false; |
452 | |
453 | if (context.data() == NULL) |
454 | context = text; |
455 | |
456 | // Sanity check: make sure that text lies within context. |
457 | if (BeginPtr(text) < BeginPtr(context) || EndPtr(text) > EndPtr(context)) { |
458 | LOG(DFATAL) << "context does not contain text" ; |
459 | return false; |
460 | } |
461 | |
462 | if (prog_->anchor_start() && BeginPtr(context) != BeginPtr(text)) |
463 | return false; |
464 | if (prog_->anchor_end() && EndPtr(context) != EndPtr(text)) |
465 | return false; |
466 | anchored |= prog_->anchor_start(); |
467 | if (prog_->anchor_end()) { |
468 | longest = true; |
469 | endmatch_ = true; |
470 | } |
471 | |
472 | if (nsubmatch < 0) { |
473 | LOG(DFATAL) << "Bad args: nsubmatch=" << nsubmatch; |
474 | return false; |
475 | } |
476 | |
477 | // Save search parameters. |
478 | ncapture_ = 2*nsubmatch; |
479 | longest_ = longest; |
480 | |
481 | if (nsubmatch == 0) { |
482 | // We need to maintain match[0], both to distinguish the |
483 | // longest match (if longest is true) and also to tell |
484 | // whether we've seen any matches at all. |
485 | ncapture_ = 2; |
486 | } |
487 | |
488 | match_ = new const char*[ncapture_]; |
489 | memset(match_, 0, ncapture_*sizeof match_[0]); |
490 | matched_ = false; |
491 | |
492 | // For debugging prints. |
493 | btext_ = context.data(); |
494 | // For convenience. |
495 | etext_ = text.data() + text.size(); |
496 | |
497 | if (ExtraDebug) |
498 | absl::FPrintF(stderr, "NFA::Search %s (context: %s) anchored=%d longest=%d\n" , |
499 | text, context, anchored, longest); |
500 | |
501 | // Set up search. |
502 | Threadq* runq = &q0_; |
503 | Threadq* nextq = &q1_; |
504 | runq->clear(); |
505 | nextq->clear(); |
506 | |
507 | // Loop over the text, stepping the machine. |
508 | for (const char* p = text.data();; p++) { |
509 | if (ExtraDebug) { |
510 | int c = 0; |
511 | if (p == btext_) |
512 | c = '^'; |
513 | else if (p > etext_) |
514 | c = '$'; |
515 | else if (p < etext_) |
516 | c = p[0] & 0xFF; |
517 | |
518 | absl::FPrintF(stderr, "%c:" , c); |
519 | for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) { |
520 | Thread* t = i->value(); |
521 | if (t == NULL) |
522 | continue; |
523 | absl::FPrintF(stderr, " %d%s" , i->index(), FormatCapture(t->capture)); |
524 | } |
525 | absl::FPrintF(stderr, "\n" ); |
526 | } |
527 | |
528 | // This is a no-op the first time around the loop because runq is empty. |
529 | int id = Step(runq, nextq, p < etext_ ? p[0] & 0xFF : -1, context, p); |
530 | DCHECK_EQ(runq->size(), 0); |
531 | using std::swap; |
532 | swap(nextq, runq); |
533 | nextq->clear(); |
534 | if (id != 0) { |
535 | // We're done: full match ahead. |
536 | p = etext_; |
537 | for (;;) { |
538 | Prog::Inst* ip = prog_->inst(id); |
539 | switch (ip->opcode()) { |
540 | default: |
541 | LOG(DFATAL) << "Unexpected opcode in short circuit: " << ip->opcode(); |
542 | break; |
543 | |
544 | case kInstCapture: |
545 | if (ip->cap() < ncapture_) |
546 | match_[ip->cap()] = p; |
547 | id = ip->out(); |
548 | continue; |
549 | |
550 | case kInstNop: |
551 | id = ip->out(); |
552 | continue; |
553 | |
554 | case kInstMatch: |
555 | match_[1] = p; |
556 | matched_ = true; |
557 | break; |
558 | } |
559 | break; |
560 | } |
561 | break; |
562 | } |
563 | |
564 | if (p > etext_) |
565 | break; |
566 | |
567 | // Start a new thread if there have not been any matches. |
568 | // (No point in starting a new thread if there have been |
569 | // matches, since it would be to the right of the match |
570 | // we already found.) |
571 | if (!matched_ && (!anchored || p == text.data())) { |
572 | // Try to use prefix accel (e.g. memchr) to skip ahead. |
573 | // The search must be unanchored and there must be zero |
574 | // possible matches already. |
575 | if (!anchored && runq->size() == 0 && |
576 | p < etext_ && prog_->can_prefix_accel()) { |
577 | p = reinterpret_cast<const char*>(prog_->PrefixAccel(p, etext_ - p)); |
578 | if (p == NULL) |
579 | p = etext_; |
580 | } |
581 | |
582 | Thread* t = AllocThread(); |
583 | CopyCapture(t->capture, match_); |
584 | t->capture[0] = p; |
585 | AddToThreadq(runq, start_, p < etext_ ? p[0] & 0xFF : -1, context, p, |
586 | t); |
587 | Decref(t); |
588 | } |
589 | |
590 | // If all the threads have died, stop early. |
591 | if (runq->size() == 0) { |
592 | if (ExtraDebug) |
593 | absl::FPrintF(stderr, "dead\n" ); |
594 | break; |
595 | } |
596 | |
597 | // Avoid invoking undefined behavior (arithmetic on a null pointer) |
598 | // by simply not continuing the loop. |
599 | // This complements the special case in NFA::Step(). |
600 | if (p == NULL) { |
601 | (void) Step(runq, nextq, -1, context, p); |
602 | DCHECK_EQ(runq->size(), 0); |
603 | using std::swap; |
604 | swap(nextq, runq); |
605 | nextq->clear(); |
606 | break; |
607 | } |
608 | } |
609 | |
610 | for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) { |
611 | if (i->value() != NULL) |
612 | Decref(i->value()); |
613 | } |
614 | |
615 | if (matched_) { |
616 | for (int i = 0; i < nsubmatch; i++) |
617 | submatch[i] = absl::string_view( |
618 | match_[2 * i], |
619 | static_cast<size_t>(match_[2 * i + 1] - match_[2 * i])); |
620 | if (ExtraDebug) |
621 | absl::FPrintF(stderr, "match (%d,%d)\n" , |
622 | match_[0] - btext_, |
623 | match_[1] - btext_); |
624 | return true; |
625 | } |
626 | return false; |
627 | } |
628 | |
629 | bool Prog::SearchNFA(absl::string_view text, absl::string_view context, |
630 | Anchor anchor, MatchKind kind, absl::string_view* match, |
631 | int nmatch) { |
632 | if (ExtraDebug) |
633 | Dump(); |
634 | |
635 | NFA nfa(this); |
636 | absl::string_view sp; |
637 | if (kind == kFullMatch) { |
638 | anchor = kAnchored; |
639 | if (nmatch == 0) { |
640 | match = &sp; |
641 | nmatch = 1; |
642 | } |
643 | } |
644 | if (!nfa.Search(text, context, anchor == kAnchored, kind != kFirstMatch, match, nmatch)) |
645 | return false; |
646 | if (kind == kFullMatch && EndPtr(match[0]) != EndPtr(text)) |
647 | return false; |
648 | return true; |
649 | } |
650 | |
651 | // For each instruction i in the program reachable from the start, compute the |
652 | // number of instructions reachable from i by following only empty transitions |
653 | // and record that count as fanout[i]. |
654 | // |
655 | // fanout holds the results and is also the work queue for the outer iteration. |
656 | // reachable holds the reached nodes for the inner iteration. |
657 | void Prog::Fanout(SparseArray<int>* fanout) { |
658 | DCHECK_EQ(fanout->max_size(), size()); |
659 | SparseSet reachable(size()); |
660 | fanout->clear(); |
661 | fanout->set_new(start(), 0); |
662 | for (SparseArray<int>::iterator i = fanout->begin(); i != fanout->end(); ++i) { |
663 | int* count = &i->value(); |
664 | reachable.clear(); |
665 | reachable.insert(i->index()); |
666 | for (SparseSet::iterator j = reachable.begin(); j != reachable.end(); ++j) { |
667 | int id = *j; |
668 | Prog::Inst* ip = inst(id); |
669 | switch (ip->opcode()) { |
670 | default: |
671 | LOG(DFATAL) << "unhandled " << ip->opcode() << " in Prog::Fanout()" ; |
672 | break; |
673 | |
674 | case kInstByteRange: |
675 | if (!ip->last()) |
676 | reachable.insert(id+1); |
677 | |
678 | (*count)++; |
679 | if (!fanout->has_index(ip->out())) { |
680 | fanout->set_new(ip->out(), 0); |
681 | } |
682 | break; |
683 | |
684 | case kInstAltMatch: |
685 | DCHECK(!ip->last()); |
686 | reachable.insert(id+1); |
687 | break; |
688 | |
689 | case kInstCapture: |
690 | case kInstEmptyWidth: |
691 | case kInstNop: |
692 | if (!ip->last()) |
693 | reachable.insert(id+1); |
694 | |
695 | reachable.insert(ip->out()); |
696 | break; |
697 | |
698 | case kInstMatch: |
699 | if (!ip->last()) |
700 | reachable.insert(id+1); |
701 | break; |
702 | |
703 | case kInstFail: |
704 | break; |
705 | } |
706 | } |
707 | } |
708 | } |
709 | |
710 | } // namespace re2 |
711 | |