1 | /* stringlib: fastsearch implementation */ |
2 | |
3 | #define STRINGLIB_FASTSEARCH_H |
4 | |
5 | /* fast search/count implementation, based on a mix between boyer- |
6 | moore and horspool, with a few more bells and whistles on the top. |
7 | for some more background, see: |
8 | https://web.archive.org/web/20201107074620/http://effbot.org/zone/stringlib.htm */ |
9 | |
10 | /* note: fastsearch may access s[n], which isn't a problem when using |
11 | Python's ordinary string types, but may cause problems if you're |
12 | using this code in other contexts. also, the count mode returns -1 |
13 | if there cannot possibly be a match in the target string, and 0 if |
14 | it has actually checked for matches, but didn't find any. callers |
15 | beware! */ |
16 | |
17 | /* If the strings are long enough, use Crochemore and Perrin's Two-Way |
18 | algorithm, which has worst-case O(n) runtime and best-case O(n/k). |
19 | Also compute a table of shifts to achieve O(n/k) in more cases, |
20 | and often (data dependent) deduce larger shifts than pure C&P can |
21 | deduce. */ |
22 | |
23 | #define FAST_COUNT 0 |
24 | #define FAST_SEARCH 1 |
25 | #define FAST_RSEARCH 2 |
26 | |
27 | #if LONG_BIT >= 128 |
28 | #define STRINGLIB_BLOOM_WIDTH 128 |
29 | #elif LONG_BIT >= 64 |
30 | #define STRINGLIB_BLOOM_WIDTH 64 |
31 | #elif LONG_BIT >= 32 |
32 | #define STRINGLIB_BLOOM_WIDTH 32 |
33 | #else |
34 | #error "LONG_BIT is smaller than 32" |
35 | #endif |
36 | |
37 | #define STRINGLIB_BLOOM_ADD(mask, ch) \ |
38 | ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1))))) |
39 | #define STRINGLIB_BLOOM(mask, ch) \ |
40 | ((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1))))) |
41 | |
42 | #if STRINGLIB_SIZEOF_CHAR == 1 |
43 | # define MEMCHR_CUT_OFF 15 |
44 | #else |
45 | # define MEMCHR_CUT_OFF 40 |
46 | #endif |
47 | |
48 | Py_LOCAL_INLINE(Py_ssize_t) |
49 | STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch) |
50 | { |
51 | const STRINGLIB_CHAR *p, *e; |
52 | |
53 | p = s; |
54 | e = s + n; |
55 | if (n > MEMCHR_CUT_OFF) { |
56 | #if STRINGLIB_SIZEOF_CHAR == 1 |
57 | p = memchr(s, ch, n); |
58 | if (p != NULL) |
59 | return (p - s); |
60 | return -1; |
61 | #else |
62 | /* use memchr if we can choose a needle without too many likely |
63 | false positives */ |
64 | const STRINGLIB_CHAR *s1, *e1; |
65 | unsigned char needle = ch & 0xff; |
66 | /* If looking for a multiple of 256, we'd have too |
67 | many false positives looking for the '\0' byte in UCS2 |
68 | and UCS4 representations. */ |
69 | if (needle != 0) { |
70 | do { |
71 | void *candidate = memchr(p, needle, |
72 | (e - p) * sizeof(STRINGLIB_CHAR)); |
73 | if (candidate == NULL) |
74 | return -1; |
75 | s1 = p; |
76 | p = (const STRINGLIB_CHAR *) |
77 | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); |
78 | if (*p == ch) |
79 | return (p - s); |
80 | /* False positive */ |
81 | p++; |
82 | if (p - s1 > MEMCHR_CUT_OFF) |
83 | continue; |
84 | if (e - p <= MEMCHR_CUT_OFF) |
85 | break; |
86 | e1 = p + MEMCHR_CUT_OFF; |
87 | while (p != e1) { |
88 | if (*p == ch) |
89 | return (p - s); |
90 | p++; |
91 | } |
92 | } |
93 | while (e - p > MEMCHR_CUT_OFF); |
94 | } |
95 | #endif |
96 | } |
97 | while (p < e) { |
98 | if (*p == ch) |
99 | return (p - s); |
100 | p++; |
101 | } |
102 | return -1; |
103 | } |
104 | |
105 | Py_LOCAL_INLINE(Py_ssize_t) |
106 | STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch) |
107 | { |
108 | const STRINGLIB_CHAR *p; |
109 | #ifdef HAVE_MEMRCHR |
110 | /* memrchr() is a GNU extension, available since glibc 2.1.91. |
111 | it doesn't seem as optimized as memchr(), but is still quite |
112 | faster than our hand-written loop below */ |
113 | |
114 | if (n > MEMCHR_CUT_OFF) { |
115 | #if STRINGLIB_SIZEOF_CHAR == 1 |
116 | p = memrchr(s, ch, n); |
117 | if (p != NULL) |
118 | return (p - s); |
119 | return -1; |
120 | #else |
121 | /* use memrchr if we can choose a needle without too many likely |
122 | false positives */ |
123 | const STRINGLIB_CHAR *s1; |
124 | Py_ssize_t n1; |
125 | unsigned char needle = ch & 0xff; |
126 | /* If looking for a multiple of 256, we'd have too |
127 | many false positives looking for the '\0' byte in UCS2 |
128 | and UCS4 representations. */ |
129 | if (needle != 0) { |
130 | do { |
131 | void *candidate = memrchr(s, needle, |
132 | n * sizeof(STRINGLIB_CHAR)); |
133 | if (candidate == NULL) |
134 | return -1; |
135 | n1 = n; |
136 | p = (const STRINGLIB_CHAR *) |
137 | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); |
138 | n = p - s; |
139 | if (*p == ch) |
140 | return n; |
141 | /* False positive */ |
142 | if (n1 - n > MEMCHR_CUT_OFF) |
143 | continue; |
144 | if (n <= MEMCHR_CUT_OFF) |
145 | break; |
146 | s1 = p - MEMCHR_CUT_OFF; |
147 | while (p > s1) { |
148 | p--; |
149 | if (*p == ch) |
150 | return (p - s); |
151 | } |
152 | n = p - s; |
153 | } |
154 | while (n > MEMCHR_CUT_OFF); |
155 | } |
156 | #endif |
157 | } |
158 | #endif /* HAVE_MEMRCHR */ |
159 | p = s + n; |
160 | while (p > s) { |
161 | p--; |
162 | if (*p == ch) |
163 | return (p - s); |
164 | } |
165 | return -1; |
166 | } |
167 | |
168 | #undef MEMCHR_CUT_OFF |
169 | |
170 | /* Change to a 1 to see logging comments walk through the algorithm. */ |
171 | #if 0 && STRINGLIB_SIZEOF_CHAR == 1 |
172 | # define LOG(...) printf(__VA_ARGS__) |
173 | # define LOG_STRING(s, n) printf("\"%.*s\"", n, s) |
174 | #else |
175 | # define LOG(...) |
176 | # define LOG_STRING(s, n) |
177 | #endif |
178 | |
179 | Py_LOCAL_INLINE(Py_ssize_t) |
180 | STRINGLIB(_lex_search)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle, |
181 | Py_ssize_t *return_period, int invert_alphabet) |
182 | { |
183 | /* Do a lexicographic search. Essentially this: |
184 | >>> max(needle[i:] for i in range(len(needle)+1)) |
185 | Also find the period of the right half. */ |
186 | Py_ssize_t max_suffix = 0; |
187 | Py_ssize_t candidate = 1; |
188 | Py_ssize_t k = 0; |
189 | // The period of the right half. |
190 | Py_ssize_t period = 1; |
191 | |
192 | while (candidate + k < len_needle) { |
193 | // each loop increases candidate + k + max_suffix |
194 | STRINGLIB_CHAR a = needle[candidate + k]; |
195 | STRINGLIB_CHAR b = needle[max_suffix + k]; |
196 | // check if the suffix at candidate is better than max_suffix |
197 | if (invert_alphabet ? (b < a) : (a < b)) { |
198 | // Fell short of max_suffix. |
199 | // The next k + 1 characters are non-increasing |
200 | // from candidate, so they won't start a maximal suffix. |
201 | candidate += k + 1; |
202 | k = 0; |
203 | // We've ruled out any period smaller than what's |
204 | // been scanned since max_suffix. |
205 | period = candidate - max_suffix; |
206 | } |
207 | else if (a == b) { |
208 | if (k + 1 != period) { |
209 | // Keep scanning the equal strings |
210 | k++; |
211 | } |
212 | else { |
213 | // Matched a whole period. |
214 | // Start matching the next period. |
215 | candidate += period; |
216 | k = 0; |
217 | } |
218 | } |
219 | else { |
220 | // Did better than max_suffix, so replace it. |
221 | max_suffix = candidate; |
222 | candidate++; |
223 | k = 0; |
224 | period = 1; |
225 | } |
226 | } |
227 | *return_period = period; |
228 | return max_suffix; |
229 | } |
230 | |
231 | Py_LOCAL_INLINE(Py_ssize_t) |
232 | STRINGLIB(_factorize)(const STRINGLIB_CHAR *needle, |
233 | Py_ssize_t len_needle, |
234 | Py_ssize_t *return_period) |
235 | { |
236 | /* Do a "critical factorization", making it so that: |
237 | >>> needle = (left := needle[:cut]) + (right := needle[cut:]) |
238 | where the "local period" of the cut is maximal. |
239 | |
240 | The local period of the cut is the minimal length of a string w |
241 | such that (left endswith w or w endswith left) |
242 | and (right startswith w or w startswith left). |
243 | |
244 | The Critical Factorization Theorem says that this maximal local |
245 | period is the global period of the string. |
246 | |
247 | Crochemore and Perrin (1991) show that this cut can be computed |
248 | as the later of two cuts: one that gives a lexicographically |
249 | maximal right half, and one that gives the same with the |
250 | with respect to a reversed alphabet-ordering. |
251 | |
252 | This is what we want to happen: |
253 | >>> x = "GCAGAGAG" |
254 | >>> cut, period = factorize(x) |
255 | >>> x[:cut], (right := x[cut:]) |
256 | ('GC', 'AGAGAG') |
257 | >>> period # right half period |
258 | 2 |
259 | >>> right[period:] == right[:-period] |
260 | True |
261 | |
262 | This is how the local period lines up in the above example: |
263 | GC | AGAGAG |
264 | AGAGAGC = AGAGAGC |
265 | The length of this minimal repetition is 7, which is indeed the |
266 | period of the original string. */ |
267 | |
268 | Py_ssize_t cut1, period1, cut2, period2, cut, period; |
269 | cut1 = STRINGLIB(_lex_search)(needle, len_needle, &period1, 0); |
270 | cut2 = STRINGLIB(_lex_search)(needle, len_needle, &period2, 1); |
271 | |
272 | // Take the later cut. |
273 | if (cut1 > cut2) { |
274 | period = period1; |
275 | cut = cut1; |
276 | } |
277 | else { |
278 | period = period2; |
279 | cut = cut2; |
280 | } |
281 | |
282 | LOG("split: " ); LOG_STRING(needle, cut); |
283 | LOG(" + " ); LOG_STRING(needle + cut, len_needle - cut); |
284 | LOG("\n" ); |
285 | |
286 | *return_period = period; |
287 | return cut; |
288 | } |
289 | |
290 | #define SHIFT_TYPE uint8_t |
291 | #define NOT_FOUND ((1U<<(8*sizeof(SHIFT_TYPE))) - 1U) |
292 | #define SHIFT_OVERFLOW (NOT_FOUND - 1U) |
293 | |
294 | #define TABLE_SIZE_BITS 6 |
295 | #define TABLE_SIZE (1U << TABLE_SIZE_BITS) |
296 | #define TABLE_MASK (TABLE_SIZE - 1U) |
297 | |
298 | typedef struct STRINGLIB(_pre) { |
299 | const STRINGLIB_CHAR *needle; |
300 | Py_ssize_t len_needle; |
301 | Py_ssize_t cut; |
302 | Py_ssize_t period; |
303 | int is_periodic; |
304 | SHIFT_TYPE table[TABLE_SIZE]; |
305 | } STRINGLIB(prework); |
306 | |
307 | |
308 | Py_LOCAL_INLINE(void) |
309 | STRINGLIB(_preprocess)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle, |
310 | STRINGLIB(prework) *p) |
311 | { |
312 | p->needle = needle; |
313 | p->len_needle = len_needle; |
314 | p->cut = STRINGLIB(_factorize)(needle, len_needle, &(p->period)); |
315 | assert(p->period + p->cut <= len_needle); |
316 | p->is_periodic = (0 == memcmp(needle, |
317 | needle + p->period, |
318 | p->cut * STRINGLIB_SIZEOF_CHAR)); |
319 | if (p->is_periodic) { |
320 | assert(p->cut <= len_needle/2); |
321 | assert(p->cut < p->period); |
322 | } |
323 | else { |
324 | // A lower bound on the period |
325 | p->period = Py_MAX(p->cut, len_needle - p->cut) + 1; |
326 | } |
327 | // Now fill up a table |
328 | memset(&(p->table[0]), 0xff, TABLE_SIZE*sizeof(SHIFT_TYPE)); |
329 | assert(p->table[0] == NOT_FOUND); |
330 | assert(p->table[TABLE_MASK] == NOT_FOUND); |
331 | for (Py_ssize_t i = 0; i < len_needle; i++) { |
332 | Py_ssize_t shift = len_needle - i; |
333 | if (shift > SHIFT_OVERFLOW) { |
334 | shift = SHIFT_OVERFLOW; |
335 | } |
336 | p->table[needle[i] & TABLE_MASK] = Py_SAFE_DOWNCAST(shift, |
337 | Py_ssize_t, |
338 | SHIFT_TYPE); |
339 | } |
340 | } |
341 | |
342 | Py_LOCAL_INLINE(Py_ssize_t) |
343 | STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack, |
344 | STRINGLIB(prework) *p) |
345 | { |
346 | // Crochemore and Perrin's (1991) Two-Way algorithm. |
347 | // See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260 |
348 | Py_ssize_t len_needle = p->len_needle; |
349 | Py_ssize_t cut = p->cut; |
350 | Py_ssize_t period = p->period; |
351 | const STRINGLIB_CHAR *needle = p->needle; |
352 | const STRINGLIB_CHAR *window = haystack; |
353 | const STRINGLIB_CHAR *last_window = haystack + len_haystack - len_needle; |
354 | SHIFT_TYPE *table = p->table; |
355 | LOG("===== Two-way: \"%s\" in \"%s\". =====\n" , needle, haystack); |
356 | |
357 | if (p->is_periodic) { |
358 | LOG("Needle is periodic.\n" ); |
359 | Py_ssize_t memory = 0; |
360 | periodicwindowloop: |
361 | while (window <= last_window) { |
362 | Py_ssize_t i = Py_MAX(cut, memory); |
363 | |
364 | // Visualize the line-up: |
365 | LOG("> " ); LOG_STRING(haystack, len_haystack); |
366 | LOG("\n> " ); LOG("%*s" , window - haystack, "" ); |
367 | LOG_STRING(needle, len_needle); |
368 | LOG("\n> " ); LOG("%*s" , window - haystack + i, "" ); |
369 | LOG(" ^ <-- cut\n" ); |
370 | |
371 | if (window[i] != needle[i]) { |
372 | // Sunday's trick: if we're going to jump, we might |
373 | // as well jump to line up the character *after* the |
374 | // current window. |
375 | STRINGLIB_CHAR first_outside = window[len_needle]; |
376 | SHIFT_TYPE shift = table[first_outside & TABLE_MASK]; |
377 | if (shift == NOT_FOUND) { |
378 | LOG("\"%c\" not found. Skipping entirely.\n" , |
379 | first_outside); |
380 | window += len_needle + 1; |
381 | } |
382 | else { |
383 | LOG("Shifting to line up \"%c\".\n" , first_outside); |
384 | Py_ssize_t memory_shift = i - cut + 1; |
385 | window += Py_MAX(shift, memory_shift); |
386 | } |
387 | memory = 0; |
388 | goto periodicwindowloop; |
389 | } |
390 | for (i = i + 1; i < len_needle; i++) { |
391 | if (needle[i] != window[i]) { |
392 | LOG("Right half does not match. Jump ahead by %d.\n" , |
393 | i - cut + 1); |
394 | window += i - cut + 1; |
395 | memory = 0; |
396 | goto periodicwindowloop; |
397 | } |
398 | } |
399 | for (i = memory; i < cut; i++) { |
400 | if (needle[i] != window[i]) { |
401 | LOG("Left half does not match. Jump ahead by period %d.\n" , |
402 | period); |
403 | window += period; |
404 | memory = len_needle - period; |
405 | goto periodicwindowloop; |
406 | } |
407 | } |
408 | LOG("Left half matches. Returning %d.\n" , |
409 | window - haystack); |
410 | return window - haystack; |
411 | } |
412 | } |
413 | else { |
414 | LOG("Needle is not periodic.\n" ); |
415 | assert(cut < len_needle); |
416 | STRINGLIB_CHAR needle_cut = needle[cut]; |
417 | windowloop: |
418 | while (window <= last_window) { |
419 | |
420 | // Visualize the line-up: |
421 | LOG("> " ); LOG_STRING(haystack, len_haystack); |
422 | LOG("\n> " ); LOG("%*s" , window - haystack, "" ); |
423 | LOG_STRING(needle, len_needle); |
424 | LOG("\n> " ); LOG("%*s" , window - haystack + cut, "" ); |
425 | LOG(" ^ <-- cut\n" ); |
426 | |
427 | if (window[cut] != needle_cut) { |
428 | // Sunday's trick: if we're going to jump, we might |
429 | // as well jump to line up the character *after* the |
430 | // current window. |
431 | STRINGLIB_CHAR first_outside = window[len_needle]; |
432 | SHIFT_TYPE shift = table[first_outside & TABLE_MASK]; |
433 | if (shift == NOT_FOUND) { |
434 | LOG("\"%c\" not found. Skipping entirely.\n" , |
435 | first_outside); |
436 | window += len_needle + 1; |
437 | } |
438 | else { |
439 | LOG("Shifting to line up \"%c\".\n" , first_outside); |
440 | window += shift; |
441 | } |
442 | goto windowloop; |
443 | } |
444 | for (Py_ssize_t i = cut + 1; i < len_needle; i++) { |
445 | if (needle[i] != window[i]) { |
446 | LOG("Right half does not match. Advance by %d.\n" , |
447 | i - cut + 1); |
448 | window += i - cut + 1; |
449 | goto windowloop; |
450 | } |
451 | } |
452 | for (Py_ssize_t i = 0; i < cut; i++) { |
453 | if (needle[i] != window[i]) { |
454 | LOG("Left half does not match. Advance by period %d.\n" , |
455 | period); |
456 | window += period; |
457 | goto windowloop; |
458 | } |
459 | } |
460 | LOG("Left half matches. Returning %d.\n" , window - haystack); |
461 | return window - haystack; |
462 | } |
463 | } |
464 | LOG("Not found. Returning -1.\n" ); |
465 | return -1; |
466 | } |
467 | |
468 | Py_LOCAL_INLINE(Py_ssize_t) |
469 | STRINGLIB(_two_way_find)(const STRINGLIB_CHAR *haystack, |
470 | Py_ssize_t len_haystack, |
471 | const STRINGLIB_CHAR *needle, |
472 | Py_ssize_t len_needle) |
473 | { |
474 | LOG("###### Finding \"%s\" in \"%s\".\n" , needle, haystack); |
475 | STRINGLIB(prework) p; |
476 | STRINGLIB(_preprocess)(needle, len_needle, &p); |
477 | return STRINGLIB(_two_way)(haystack, len_haystack, &p); |
478 | } |
479 | |
480 | Py_LOCAL_INLINE(Py_ssize_t) |
481 | STRINGLIB(_two_way_count)(const STRINGLIB_CHAR *haystack, |
482 | Py_ssize_t len_haystack, |
483 | const STRINGLIB_CHAR *needle, |
484 | Py_ssize_t len_needle, |
485 | Py_ssize_t maxcount) |
486 | { |
487 | LOG("###### Counting \"%s\" in \"%s\".\n" , needle, haystack); |
488 | STRINGLIB(prework) p; |
489 | STRINGLIB(_preprocess)(needle, len_needle, &p); |
490 | Py_ssize_t index = 0, count = 0; |
491 | while (1) { |
492 | Py_ssize_t result; |
493 | result = STRINGLIB(_two_way)(haystack + index, |
494 | len_haystack - index, &p); |
495 | if (result == -1) { |
496 | return count; |
497 | } |
498 | count++; |
499 | if (count == maxcount) { |
500 | return maxcount; |
501 | } |
502 | index += result + len_needle; |
503 | } |
504 | return count; |
505 | } |
506 | |
507 | #undef SHIFT_TYPE |
508 | #undef NOT_FOUND |
509 | #undef SHIFT_OVERFLOW |
510 | #undef TABLE_SIZE_BITS |
511 | #undef TABLE_SIZE |
512 | #undef TABLE_MASK |
513 | |
514 | #undef LOG |
515 | #undef LOG_STRING |
516 | |
517 | Py_LOCAL_INLINE(Py_ssize_t) |
518 | FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n, |
519 | const STRINGLIB_CHAR* p, Py_ssize_t m, |
520 | Py_ssize_t maxcount, int mode) |
521 | { |
522 | unsigned long mask; |
523 | Py_ssize_t skip, count = 0; |
524 | Py_ssize_t i, j, mlast, w; |
525 | |
526 | w = n - m; |
527 | |
528 | if (w < 0 || (mode == FAST_COUNT && maxcount == 0)) |
529 | return -1; |
530 | |
531 | /* look for special cases */ |
532 | if (m <= 1) { |
533 | if (m <= 0) |
534 | return -1; |
535 | /* use special case for 1-character strings */ |
536 | if (mode == FAST_SEARCH) |
537 | return STRINGLIB(find_char)(s, n, p[0]); |
538 | else if (mode == FAST_RSEARCH) |
539 | return STRINGLIB(rfind_char)(s, n, p[0]); |
540 | else { /* FAST_COUNT */ |
541 | for (i = 0; i < n; i++) |
542 | if (s[i] == p[0]) { |
543 | count++; |
544 | if (count == maxcount) |
545 | return maxcount; |
546 | } |
547 | return count; |
548 | } |
549 | } |
550 | |
551 | mlast = m - 1; |
552 | skip = mlast; |
553 | mask = 0; |
554 | |
555 | if (mode != FAST_RSEARCH) { |
556 | if (m >= 100 && w >= 2000 && w / m >= 5) { |
557 | /* For larger problems where the needle isn't a huge |
558 | percentage of the size of the haystack, the relatively |
559 | expensive O(m) startup cost of the two-way algorithm |
560 | will surely pay off. */ |
561 | if (mode == FAST_SEARCH) { |
562 | return STRINGLIB(_two_way_find)(s, n, p, m); |
563 | } |
564 | else { |
565 | return STRINGLIB(_two_way_count)(s, n, p, m, maxcount); |
566 | } |
567 | } |
568 | const STRINGLIB_CHAR *ss = s + m - 1; |
569 | const STRINGLIB_CHAR *pp = p + m - 1; |
570 | |
571 | /* create compressed boyer-moore delta 1 table */ |
572 | |
573 | /* process pattern[:-1] */ |
574 | for (i = 0; i < mlast; i++) { |
575 | STRINGLIB_BLOOM_ADD(mask, p[i]); |
576 | if (p[i] == p[mlast]) { |
577 | skip = mlast - i - 1; |
578 | } |
579 | } |
580 | /* process pattern[-1] outside the loop */ |
581 | STRINGLIB_BLOOM_ADD(mask, p[mlast]); |
582 | |
583 | if (m >= 100 && w >= 8000) { |
584 | /* To ensure that we have good worst-case behavior, |
585 | here's an adaptive version of the algorithm, where if |
586 | we match O(m) characters without any matches of the |
587 | entire needle, then we predict that the startup cost of |
588 | the two-way algorithm will probably be worth it. */ |
589 | Py_ssize_t hits = 0; |
590 | for (i = 0; i <= w; i++) { |
591 | if (ss[i] == pp[0]) { |
592 | /* candidate match */ |
593 | for (j = 0; j < mlast; j++) { |
594 | if (s[i+j] != p[j]) { |
595 | break; |
596 | } |
597 | } |
598 | if (j == mlast) { |
599 | /* got a match! */ |
600 | if (mode != FAST_COUNT) { |
601 | return i; |
602 | } |
603 | count++; |
604 | if (count == maxcount) { |
605 | return maxcount; |
606 | } |
607 | i = i + mlast; |
608 | continue; |
609 | } |
610 | /* miss: check if next character is part of pattern */ |
611 | if (!STRINGLIB_BLOOM(mask, ss[i+1])) { |
612 | i = i + m; |
613 | } |
614 | else { |
615 | i = i + skip; |
616 | } |
617 | hits += j + 1; |
618 | if (hits >= m / 4 && i < w - 1000) { |
619 | /* We've done O(m) fruitless comparisons |
620 | anyway, so spend the O(m) cost on the |
621 | setup for the two-way algorithm. */ |
622 | Py_ssize_t res; |
623 | if (mode == FAST_COUNT) { |
624 | res = STRINGLIB(_two_way_count)( |
625 | s+i, n-i, p, m, maxcount-count); |
626 | return count + res; |
627 | } |
628 | else { |
629 | res = STRINGLIB(_two_way_find)(s+i, n-i, p, m); |
630 | if (res == -1) { |
631 | return -1; |
632 | } |
633 | return i + res; |
634 | } |
635 | } |
636 | } |
637 | else { |
638 | /* skip: check if next character is part of pattern */ |
639 | if (!STRINGLIB_BLOOM(mask, ss[i+1])) { |
640 | i = i + m; |
641 | } |
642 | } |
643 | } |
644 | if (mode != FAST_COUNT) { |
645 | return -1; |
646 | } |
647 | return count; |
648 | } |
649 | /* The standard, non-adaptive version of the algorithm. */ |
650 | for (i = 0; i <= w; i++) { |
651 | /* note: using mlast in the skip path slows things down on x86 */ |
652 | if (ss[i] == pp[0]) { |
653 | /* candidate match */ |
654 | for (j = 0; j < mlast; j++) { |
655 | if (s[i+j] != p[j]) { |
656 | break; |
657 | } |
658 | } |
659 | if (j == mlast) { |
660 | /* got a match! */ |
661 | if (mode != FAST_COUNT) { |
662 | return i; |
663 | } |
664 | count++; |
665 | if (count == maxcount) { |
666 | return maxcount; |
667 | } |
668 | i = i + mlast; |
669 | continue; |
670 | } |
671 | /* miss: check if next character is part of pattern */ |
672 | if (!STRINGLIB_BLOOM(mask, ss[i+1])) { |
673 | i = i + m; |
674 | } |
675 | else { |
676 | i = i + skip; |
677 | } |
678 | } |
679 | else { |
680 | /* skip: check if next character is part of pattern */ |
681 | if (!STRINGLIB_BLOOM(mask, ss[i+1])) { |
682 | i = i + m; |
683 | } |
684 | } |
685 | } |
686 | } |
687 | else { /* FAST_RSEARCH */ |
688 | |
689 | /* create compressed boyer-moore delta 1 table */ |
690 | |
691 | /* process pattern[0] outside the loop */ |
692 | STRINGLIB_BLOOM_ADD(mask, p[0]); |
693 | /* process pattern[:0:-1] */ |
694 | for (i = mlast; i > 0; i--) { |
695 | STRINGLIB_BLOOM_ADD(mask, p[i]); |
696 | if (p[i] == p[0]) { |
697 | skip = i - 1; |
698 | } |
699 | } |
700 | |
701 | for (i = w; i >= 0; i--) { |
702 | if (s[i] == p[0]) { |
703 | /* candidate match */ |
704 | for (j = mlast; j > 0; j--) { |
705 | if (s[i+j] != p[j]) { |
706 | break; |
707 | } |
708 | } |
709 | if (j == 0) { |
710 | /* got a match! */ |
711 | return i; |
712 | } |
713 | /* miss: check if previous character is part of pattern */ |
714 | if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) { |
715 | i = i - m; |
716 | } |
717 | else { |
718 | i = i - skip; |
719 | } |
720 | } |
721 | else { |
722 | /* skip: check if previous character is part of pattern */ |
723 | if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) { |
724 | i = i - m; |
725 | } |
726 | } |
727 | } |
728 | } |
729 | |
730 | if (mode != FAST_COUNT) |
731 | return -1; |
732 | return count; |
733 | } |
734 | |
735 | |