1/* stringlib: fastsearch implementation */
2
3#define STRINGLIB_FASTSEARCH_H
4
5/* fast search/count implementation, based on a mix between boyer-
6 moore and horspool, with a few more bells and whistles on the top.
7 for some more background, see:
8 https://web.archive.org/web/20201107074620/http://effbot.org/zone/stringlib.htm */
9
10/* note: fastsearch may access s[n], which isn't a problem when using
11 Python's ordinary string types, but may cause problems if you're
12 using this code in other contexts. also, the count mode returns -1
13 if there cannot possibly be a match in the target string, and 0 if
14 it has actually checked for matches, but didn't find any. callers
15 beware! */
16
17/* If the strings are long enough, use Crochemore and Perrin's Two-Way
18 algorithm, which has worst-case O(n) runtime and best-case O(n/k).
19 Also compute a table of shifts to achieve O(n/k) in more cases,
20 and often (data dependent) deduce larger shifts than pure C&P can
21 deduce. */
22
23#define FAST_COUNT 0
24#define FAST_SEARCH 1
25#define FAST_RSEARCH 2
26
27#if LONG_BIT >= 128
28#define STRINGLIB_BLOOM_WIDTH 128
29#elif LONG_BIT >= 64
30#define STRINGLIB_BLOOM_WIDTH 64
31#elif LONG_BIT >= 32
32#define STRINGLIB_BLOOM_WIDTH 32
33#else
34#error "LONG_BIT is smaller than 32"
35#endif
36
37#define STRINGLIB_BLOOM_ADD(mask, ch) \
38 ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
39#define STRINGLIB_BLOOM(mask, ch) \
40 ((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
41
42#if STRINGLIB_SIZEOF_CHAR == 1
43# define MEMCHR_CUT_OFF 15
44#else
45# define MEMCHR_CUT_OFF 40
46#endif
47
48Py_LOCAL_INLINE(Py_ssize_t)
49STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
50{
51 const STRINGLIB_CHAR *p, *e;
52
53 p = s;
54 e = s + n;
55 if (n > MEMCHR_CUT_OFF) {
56#if STRINGLIB_SIZEOF_CHAR == 1
57 p = memchr(s, ch, n);
58 if (p != NULL)
59 return (p - s);
60 return -1;
61#else
62 /* use memchr if we can choose a needle without too many likely
63 false positives */
64 const STRINGLIB_CHAR *s1, *e1;
65 unsigned char needle = ch & 0xff;
66 /* If looking for a multiple of 256, we'd have too
67 many false positives looking for the '\0' byte in UCS2
68 and UCS4 representations. */
69 if (needle != 0) {
70 do {
71 void *candidate = memchr(p, needle,
72 (e - p) * sizeof(STRINGLIB_CHAR));
73 if (candidate == NULL)
74 return -1;
75 s1 = p;
76 p = (const STRINGLIB_CHAR *)
77 _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
78 if (*p == ch)
79 return (p - s);
80 /* False positive */
81 p++;
82 if (p - s1 > MEMCHR_CUT_OFF)
83 continue;
84 if (e - p <= MEMCHR_CUT_OFF)
85 break;
86 e1 = p + MEMCHR_CUT_OFF;
87 while (p != e1) {
88 if (*p == ch)
89 return (p - s);
90 p++;
91 }
92 }
93 while (e - p > MEMCHR_CUT_OFF);
94 }
95#endif
96 }
97 while (p < e) {
98 if (*p == ch)
99 return (p - s);
100 p++;
101 }
102 return -1;
103}
104
105Py_LOCAL_INLINE(Py_ssize_t)
106STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
107{
108 const STRINGLIB_CHAR *p;
109#ifdef HAVE_MEMRCHR
110 /* memrchr() is a GNU extension, available since glibc 2.1.91.
111 it doesn't seem as optimized as memchr(), but is still quite
112 faster than our hand-written loop below */
113
114 if (n > MEMCHR_CUT_OFF) {
115#if STRINGLIB_SIZEOF_CHAR == 1
116 p = memrchr(s, ch, n);
117 if (p != NULL)
118 return (p - s);
119 return -1;
120#else
121 /* use memrchr if we can choose a needle without too many likely
122 false positives */
123 const STRINGLIB_CHAR *s1;
124 Py_ssize_t n1;
125 unsigned char needle = ch & 0xff;
126 /* If looking for a multiple of 256, we'd have too
127 many false positives looking for the '\0' byte in UCS2
128 and UCS4 representations. */
129 if (needle != 0) {
130 do {
131 void *candidate = memrchr(s, needle,
132 n * sizeof(STRINGLIB_CHAR));
133 if (candidate == NULL)
134 return -1;
135 n1 = n;
136 p = (const STRINGLIB_CHAR *)
137 _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
138 n = p - s;
139 if (*p == ch)
140 return n;
141 /* False positive */
142 if (n1 - n > MEMCHR_CUT_OFF)
143 continue;
144 if (n <= MEMCHR_CUT_OFF)
145 break;
146 s1 = p - MEMCHR_CUT_OFF;
147 while (p > s1) {
148 p--;
149 if (*p == ch)
150 return (p - s);
151 }
152 n = p - s;
153 }
154 while (n > MEMCHR_CUT_OFF);
155 }
156#endif
157 }
158#endif /* HAVE_MEMRCHR */
159 p = s + n;
160 while (p > s) {
161 p--;
162 if (*p == ch)
163 return (p - s);
164 }
165 return -1;
166}
167
168#undef MEMCHR_CUT_OFF
169
170/* Change to a 1 to see logging comments walk through the algorithm. */
171#if 0 && STRINGLIB_SIZEOF_CHAR == 1
172# define LOG(...) printf(__VA_ARGS__)
173# define LOG_STRING(s, n) printf("\"%.*s\"", n, s)
174#else
175# define LOG(...)
176# define LOG_STRING(s, n)
177#endif
178
179Py_LOCAL_INLINE(Py_ssize_t)
180STRINGLIB(_lex_search)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
181 Py_ssize_t *return_period, int invert_alphabet)
182{
183 /* Do a lexicographic search. Essentially this:
184 >>> max(needle[i:] for i in range(len(needle)+1))
185 Also find the period of the right half. */
186 Py_ssize_t max_suffix = 0;
187 Py_ssize_t candidate = 1;
188 Py_ssize_t k = 0;
189 // The period of the right half.
190 Py_ssize_t period = 1;
191
192 while (candidate + k < len_needle) {
193 // each loop increases candidate + k + max_suffix
194 STRINGLIB_CHAR a = needle[candidate + k];
195 STRINGLIB_CHAR b = needle[max_suffix + k];
196 // check if the suffix at candidate is better than max_suffix
197 if (invert_alphabet ? (b < a) : (a < b)) {
198 // Fell short of max_suffix.
199 // The next k + 1 characters are non-increasing
200 // from candidate, so they won't start a maximal suffix.
201 candidate += k + 1;
202 k = 0;
203 // We've ruled out any period smaller than what's
204 // been scanned since max_suffix.
205 period = candidate - max_suffix;
206 }
207 else if (a == b) {
208 if (k + 1 != period) {
209 // Keep scanning the equal strings
210 k++;
211 }
212 else {
213 // Matched a whole period.
214 // Start matching the next period.
215 candidate += period;
216 k = 0;
217 }
218 }
219 else {
220 // Did better than max_suffix, so replace it.
221 max_suffix = candidate;
222 candidate++;
223 k = 0;
224 period = 1;
225 }
226 }
227 *return_period = period;
228 return max_suffix;
229}
230
231Py_LOCAL_INLINE(Py_ssize_t)
232STRINGLIB(_factorize)(const STRINGLIB_CHAR *needle,
233 Py_ssize_t len_needle,
234 Py_ssize_t *return_period)
235{
236 /* Do a "critical factorization", making it so that:
237 >>> needle = (left := needle[:cut]) + (right := needle[cut:])
238 where the "local period" of the cut is maximal.
239
240 The local period of the cut is the minimal length of a string w
241 such that (left endswith w or w endswith left)
242 and (right startswith w or w startswith left).
243
244 The Critical Factorization Theorem says that this maximal local
245 period is the global period of the string.
246
247 Crochemore and Perrin (1991) show that this cut can be computed
248 as the later of two cuts: one that gives a lexicographically
249 maximal right half, and one that gives the same with the
250 with respect to a reversed alphabet-ordering.
251
252 This is what we want to happen:
253 >>> x = "GCAGAGAG"
254 >>> cut, period = factorize(x)
255 >>> x[:cut], (right := x[cut:])
256 ('GC', 'AGAGAG')
257 >>> period # right half period
258 2
259 >>> right[period:] == right[:-period]
260 True
261
262 This is how the local period lines up in the above example:
263 GC | AGAGAG
264 AGAGAGC = AGAGAGC
265 The length of this minimal repetition is 7, which is indeed the
266 period of the original string. */
267
268 Py_ssize_t cut1, period1, cut2, period2, cut, period;
269 cut1 = STRINGLIB(_lex_search)(needle, len_needle, &period1, 0);
270 cut2 = STRINGLIB(_lex_search)(needle, len_needle, &period2, 1);
271
272 // Take the later cut.
273 if (cut1 > cut2) {
274 period = period1;
275 cut = cut1;
276 }
277 else {
278 period = period2;
279 cut = cut2;
280 }
281
282 LOG("split: "); LOG_STRING(needle, cut);
283 LOG(" + "); LOG_STRING(needle + cut, len_needle - cut);
284 LOG("\n");
285
286 *return_period = period;
287 return cut;
288}
289
290#define SHIFT_TYPE uint8_t
291#define NOT_FOUND ((1U<<(8*sizeof(SHIFT_TYPE))) - 1U)
292#define SHIFT_OVERFLOW (NOT_FOUND - 1U)
293
294#define TABLE_SIZE_BITS 6
295#define TABLE_SIZE (1U << TABLE_SIZE_BITS)
296#define TABLE_MASK (TABLE_SIZE - 1U)
297
298typedef struct STRINGLIB(_pre) {
299 const STRINGLIB_CHAR *needle;
300 Py_ssize_t len_needle;
301 Py_ssize_t cut;
302 Py_ssize_t period;
303 int is_periodic;
304 SHIFT_TYPE table[TABLE_SIZE];
305} STRINGLIB(prework);
306
307
308Py_LOCAL_INLINE(void)
309STRINGLIB(_preprocess)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
310 STRINGLIB(prework) *p)
311{
312 p->needle = needle;
313 p->len_needle = len_needle;
314 p->cut = STRINGLIB(_factorize)(needle, len_needle, &(p->period));
315 assert(p->period + p->cut <= len_needle);
316 p->is_periodic = (0 == memcmp(needle,
317 needle + p->period,
318 p->cut * STRINGLIB_SIZEOF_CHAR));
319 if (p->is_periodic) {
320 assert(p->cut <= len_needle/2);
321 assert(p->cut < p->period);
322 }
323 else {
324 // A lower bound on the period
325 p->period = Py_MAX(p->cut, len_needle - p->cut) + 1;
326 }
327 // Now fill up a table
328 memset(&(p->table[0]), 0xff, TABLE_SIZE*sizeof(SHIFT_TYPE));
329 assert(p->table[0] == NOT_FOUND);
330 assert(p->table[TABLE_MASK] == NOT_FOUND);
331 for (Py_ssize_t i = 0; i < len_needle; i++) {
332 Py_ssize_t shift = len_needle - i;
333 if (shift > SHIFT_OVERFLOW) {
334 shift = SHIFT_OVERFLOW;
335 }
336 p->table[needle[i] & TABLE_MASK] = Py_SAFE_DOWNCAST(shift,
337 Py_ssize_t,
338 SHIFT_TYPE);
339 }
340}
341
342Py_LOCAL_INLINE(Py_ssize_t)
343STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack,
344 STRINGLIB(prework) *p)
345{
346 // Crochemore and Perrin's (1991) Two-Way algorithm.
347 // See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260
348 Py_ssize_t len_needle = p->len_needle;
349 Py_ssize_t cut = p->cut;
350 Py_ssize_t period = p->period;
351 const STRINGLIB_CHAR *needle = p->needle;
352 const STRINGLIB_CHAR *window = haystack;
353 const STRINGLIB_CHAR *last_window = haystack + len_haystack - len_needle;
354 SHIFT_TYPE *table = p->table;
355 LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack);
356
357 if (p->is_periodic) {
358 LOG("Needle is periodic.\n");
359 Py_ssize_t memory = 0;
360 periodicwindowloop:
361 while (window <= last_window) {
362 Py_ssize_t i = Py_MAX(cut, memory);
363
364 // Visualize the line-up:
365 LOG("> "); LOG_STRING(haystack, len_haystack);
366 LOG("\n> "); LOG("%*s", window - haystack, "");
367 LOG_STRING(needle, len_needle);
368 LOG("\n> "); LOG("%*s", window - haystack + i, "");
369 LOG(" ^ <-- cut\n");
370
371 if (window[i] != needle[i]) {
372 // Sunday's trick: if we're going to jump, we might
373 // as well jump to line up the character *after* the
374 // current window.
375 STRINGLIB_CHAR first_outside = window[len_needle];
376 SHIFT_TYPE shift = table[first_outside & TABLE_MASK];
377 if (shift == NOT_FOUND) {
378 LOG("\"%c\" not found. Skipping entirely.\n",
379 first_outside);
380 window += len_needle + 1;
381 }
382 else {
383 LOG("Shifting to line up \"%c\".\n", first_outside);
384 Py_ssize_t memory_shift = i - cut + 1;
385 window += Py_MAX(shift, memory_shift);
386 }
387 memory = 0;
388 goto periodicwindowloop;
389 }
390 for (i = i + 1; i < len_needle; i++) {
391 if (needle[i] != window[i]) {
392 LOG("Right half does not match. Jump ahead by %d.\n",
393 i - cut + 1);
394 window += i - cut + 1;
395 memory = 0;
396 goto periodicwindowloop;
397 }
398 }
399 for (i = memory; i < cut; i++) {
400 if (needle[i] != window[i]) {
401 LOG("Left half does not match. Jump ahead by period %d.\n",
402 period);
403 window += period;
404 memory = len_needle - period;
405 goto periodicwindowloop;
406 }
407 }
408 LOG("Left half matches. Returning %d.\n",
409 window - haystack);
410 return window - haystack;
411 }
412 }
413 else {
414 LOG("Needle is not periodic.\n");
415 assert(cut < len_needle);
416 STRINGLIB_CHAR needle_cut = needle[cut];
417 windowloop:
418 while (window <= last_window) {
419
420 // Visualize the line-up:
421 LOG("> "); LOG_STRING(haystack, len_haystack);
422 LOG("\n> "); LOG("%*s", window - haystack, "");
423 LOG_STRING(needle, len_needle);
424 LOG("\n> "); LOG("%*s", window - haystack + cut, "");
425 LOG(" ^ <-- cut\n");
426
427 if (window[cut] != needle_cut) {
428 // Sunday's trick: if we're going to jump, we might
429 // as well jump to line up the character *after* the
430 // current window.
431 STRINGLIB_CHAR first_outside = window[len_needle];
432 SHIFT_TYPE shift = table[first_outside & TABLE_MASK];
433 if (shift == NOT_FOUND) {
434 LOG("\"%c\" not found. Skipping entirely.\n",
435 first_outside);
436 window += len_needle + 1;
437 }
438 else {
439 LOG("Shifting to line up \"%c\".\n", first_outside);
440 window += shift;
441 }
442 goto windowloop;
443 }
444 for (Py_ssize_t i = cut + 1; i < len_needle; i++) {
445 if (needle[i] != window[i]) {
446 LOG("Right half does not match. Advance by %d.\n",
447 i - cut + 1);
448 window += i - cut + 1;
449 goto windowloop;
450 }
451 }
452 for (Py_ssize_t i = 0; i < cut; i++) {
453 if (needle[i] != window[i]) {
454 LOG("Left half does not match. Advance by period %d.\n",
455 period);
456 window += period;
457 goto windowloop;
458 }
459 }
460 LOG("Left half matches. Returning %d.\n", window - haystack);
461 return window - haystack;
462 }
463 }
464 LOG("Not found. Returning -1.\n");
465 return -1;
466}
467
468Py_LOCAL_INLINE(Py_ssize_t)
469STRINGLIB(_two_way_find)(const STRINGLIB_CHAR *haystack,
470 Py_ssize_t len_haystack,
471 const STRINGLIB_CHAR *needle,
472 Py_ssize_t len_needle)
473{
474 LOG("###### Finding \"%s\" in \"%s\".\n", needle, haystack);
475 STRINGLIB(prework) p;
476 STRINGLIB(_preprocess)(needle, len_needle, &p);
477 return STRINGLIB(_two_way)(haystack, len_haystack, &p);
478}
479
480Py_LOCAL_INLINE(Py_ssize_t)
481STRINGLIB(_two_way_count)(const STRINGLIB_CHAR *haystack,
482 Py_ssize_t len_haystack,
483 const STRINGLIB_CHAR *needle,
484 Py_ssize_t len_needle,
485 Py_ssize_t maxcount)
486{
487 LOG("###### Counting \"%s\" in \"%s\".\n", needle, haystack);
488 STRINGLIB(prework) p;
489 STRINGLIB(_preprocess)(needle, len_needle, &p);
490 Py_ssize_t index = 0, count = 0;
491 while (1) {
492 Py_ssize_t result;
493 result = STRINGLIB(_two_way)(haystack + index,
494 len_haystack - index, &p);
495 if (result == -1) {
496 return count;
497 }
498 count++;
499 if (count == maxcount) {
500 return maxcount;
501 }
502 index += result + len_needle;
503 }
504 return count;
505}
506
507#undef SHIFT_TYPE
508#undef NOT_FOUND
509#undef SHIFT_OVERFLOW
510#undef TABLE_SIZE_BITS
511#undef TABLE_SIZE
512#undef TABLE_MASK
513
514#undef LOG
515#undef LOG_STRING
516
517Py_LOCAL_INLINE(Py_ssize_t)
518FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
519 const STRINGLIB_CHAR* p, Py_ssize_t m,
520 Py_ssize_t maxcount, int mode)
521{
522 unsigned long mask;
523 Py_ssize_t skip, count = 0;
524 Py_ssize_t i, j, mlast, w;
525
526 w = n - m;
527
528 if (w < 0 || (mode == FAST_COUNT && maxcount == 0))
529 return -1;
530
531 /* look for special cases */
532 if (m <= 1) {
533 if (m <= 0)
534 return -1;
535 /* use special case for 1-character strings */
536 if (mode == FAST_SEARCH)
537 return STRINGLIB(find_char)(s, n, p[0]);
538 else if (mode == FAST_RSEARCH)
539 return STRINGLIB(rfind_char)(s, n, p[0]);
540 else { /* FAST_COUNT */
541 for (i = 0; i < n; i++)
542 if (s[i] == p[0]) {
543 count++;
544 if (count == maxcount)
545 return maxcount;
546 }
547 return count;
548 }
549 }
550
551 mlast = m - 1;
552 skip = mlast;
553 mask = 0;
554
555 if (mode != FAST_RSEARCH) {
556 if (m >= 100 && w >= 2000 && w / m >= 5) {
557 /* For larger problems where the needle isn't a huge
558 percentage of the size of the haystack, the relatively
559 expensive O(m) startup cost of the two-way algorithm
560 will surely pay off. */
561 if (mode == FAST_SEARCH) {
562 return STRINGLIB(_two_way_find)(s, n, p, m);
563 }
564 else {
565 return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
566 }
567 }
568 const STRINGLIB_CHAR *ss = s + m - 1;
569 const STRINGLIB_CHAR *pp = p + m - 1;
570
571 /* create compressed boyer-moore delta 1 table */
572
573 /* process pattern[:-1] */
574 for (i = 0; i < mlast; i++) {
575 STRINGLIB_BLOOM_ADD(mask, p[i]);
576 if (p[i] == p[mlast]) {
577 skip = mlast - i - 1;
578 }
579 }
580 /* process pattern[-1] outside the loop */
581 STRINGLIB_BLOOM_ADD(mask, p[mlast]);
582
583 if (m >= 100 && w >= 8000) {
584 /* To ensure that we have good worst-case behavior,
585 here's an adaptive version of the algorithm, where if
586 we match O(m) characters without any matches of the
587 entire needle, then we predict that the startup cost of
588 the two-way algorithm will probably be worth it. */
589 Py_ssize_t hits = 0;
590 for (i = 0; i <= w; i++) {
591 if (ss[i] == pp[0]) {
592 /* candidate match */
593 for (j = 0; j < mlast; j++) {
594 if (s[i+j] != p[j]) {
595 break;
596 }
597 }
598 if (j == mlast) {
599 /* got a match! */
600 if (mode != FAST_COUNT) {
601 return i;
602 }
603 count++;
604 if (count == maxcount) {
605 return maxcount;
606 }
607 i = i + mlast;
608 continue;
609 }
610 /* miss: check if next character is part of pattern */
611 if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
612 i = i + m;
613 }
614 else {
615 i = i + skip;
616 }
617 hits += j + 1;
618 if (hits >= m / 4 && i < w - 1000) {
619 /* We've done O(m) fruitless comparisons
620 anyway, so spend the O(m) cost on the
621 setup for the two-way algorithm. */
622 Py_ssize_t res;
623 if (mode == FAST_COUNT) {
624 res = STRINGLIB(_two_way_count)(
625 s+i, n-i, p, m, maxcount-count);
626 return count + res;
627 }
628 else {
629 res = STRINGLIB(_two_way_find)(s+i, n-i, p, m);
630 if (res == -1) {
631 return -1;
632 }
633 return i + res;
634 }
635 }
636 }
637 else {
638 /* skip: check if next character is part of pattern */
639 if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
640 i = i + m;
641 }
642 }
643 }
644 if (mode != FAST_COUNT) {
645 return -1;
646 }
647 return count;
648 }
649 /* The standard, non-adaptive version of the algorithm. */
650 for (i = 0; i <= w; i++) {
651 /* note: using mlast in the skip path slows things down on x86 */
652 if (ss[i] == pp[0]) {
653 /* candidate match */
654 for (j = 0; j < mlast; j++) {
655 if (s[i+j] != p[j]) {
656 break;
657 }
658 }
659 if (j == mlast) {
660 /* got a match! */
661 if (mode != FAST_COUNT) {
662 return i;
663 }
664 count++;
665 if (count == maxcount) {
666 return maxcount;
667 }
668 i = i + mlast;
669 continue;
670 }
671 /* miss: check if next character is part of pattern */
672 if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
673 i = i + m;
674 }
675 else {
676 i = i + skip;
677 }
678 }
679 else {
680 /* skip: check if next character is part of pattern */
681 if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
682 i = i + m;
683 }
684 }
685 }
686 }
687 else { /* FAST_RSEARCH */
688
689 /* create compressed boyer-moore delta 1 table */
690
691 /* process pattern[0] outside the loop */
692 STRINGLIB_BLOOM_ADD(mask, p[0]);
693 /* process pattern[:0:-1] */
694 for (i = mlast; i > 0; i--) {
695 STRINGLIB_BLOOM_ADD(mask, p[i]);
696 if (p[i] == p[0]) {
697 skip = i - 1;
698 }
699 }
700
701 for (i = w; i >= 0; i--) {
702 if (s[i] == p[0]) {
703 /* candidate match */
704 for (j = mlast; j > 0; j--) {
705 if (s[i+j] != p[j]) {
706 break;
707 }
708 }
709 if (j == 0) {
710 /* got a match! */
711 return i;
712 }
713 /* miss: check if previous character is part of pattern */
714 if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
715 i = i - m;
716 }
717 else {
718 i = i - skip;
719 }
720 }
721 else {
722 /* skip: check if previous character is part of pattern */
723 if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
724 i = i - m;
725 }
726 }
727 }
728 }
729
730 if (mode != FAST_COUNT)
731 return -1;
732 return count;
733}
734
735