1 | #define PY_SSIZE_T_CLEAN |
2 | #include "Python.h" |
3 | #include "pycore_abstract.h" // _PyIndex_Check() |
4 | #include "pycore_bytes_methods.h" |
5 | |
6 | PyDoc_STRVAR_shared(_Py_isspace__doc__, |
7 | "B.isspace() -> bool\n\ |
8 | \n\ |
9 | Return True if all characters in B are whitespace\n\ |
10 | and there is at least one character in B, False otherwise." ); |
11 | |
12 | PyObject* |
13 | _Py_bytes_isspace(const char *cptr, Py_ssize_t len) |
14 | { |
15 | const unsigned char *p |
16 | = (const unsigned char *) cptr; |
17 | const unsigned char *e; |
18 | |
19 | /* Shortcut for single character strings */ |
20 | if (len == 1 && Py_ISSPACE(*p)) |
21 | Py_RETURN_TRUE; |
22 | |
23 | /* Special case for empty strings */ |
24 | if (len == 0) |
25 | Py_RETURN_FALSE; |
26 | |
27 | e = p + len; |
28 | for (; p < e; p++) { |
29 | if (!Py_ISSPACE(*p)) |
30 | Py_RETURN_FALSE; |
31 | } |
32 | Py_RETURN_TRUE; |
33 | } |
34 | |
35 | |
36 | PyDoc_STRVAR_shared(_Py_isalpha__doc__, |
37 | "B.isalpha() -> bool\n\ |
38 | \n\ |
39 | Return True if all characters in B are alphabetic\n\ |
40 | and there is at least one character in B, False otherwise." ); |
41 | |
42 | PyObject* |
43 | _Py_bytes_isalpha(const char *cptr, Py_ssize_t len) |
44 | { |
45 | const unsigned char *p |
46 | = (const unsigned char *) cptr; |
47 | const unsigned char *e; |
48 | |
49 | /* Shortcut for single character strings */ |
50 | if (len == 1 && Py_ISALPHA(*p)) |
51 | Py_RETURN_TRUE; |
52 | |
53 | /* Special case for empty strings */ |
54 | if (len == 0) |
55 | Py_RETURN_FALSE; |
56 | |
57 | e = p + len; |
58 | for (; p < e; p++) { |
59 | if (!Py_ISALPHA(*p)) |
60 | Py_RETURN_FALSE; |
61 | } |
62 | Py_RETURN_TRUE; |
63 | } |
64 | |
65 | |
66 | PyDoc_STRVAR_shared(_Py_isalnum__doc__, |
67 | "B.isalnum() -> bool\n\ |
68 | \n\ |
69 | Return True if all characters in B are alphanumeric\n\ |
70 | and there is at least one character in B, False otherwise." ); |
71 | |
72 | PyObject* |
73 | _Py_bytes_isalnum(const char *cptr, Py_ssize_t len) |
74 | { |
75 | const unsigned char *p |
76 | = (const unsigned char *) cptr; |
77 | const unsigned char *e; |
78 | |
79 | /* Shortcut for single character strings */ |
80 | if (len == 1 && Py_ISALNUM(*p)) |
81 | Py_RETURN_TRUE; |
82 | |
83 | /* Special case for empty strings */ |
84 | if (len == 0) |
85 | Py_RETURN_FALSE; |
86 | |
87 | e = p + len; |
88 | for (; p < e; p++) { |
89 | if (!Py_ISALNUM(*p)) |
90 | Py_RETURN_FALSE; |
91 | } |
92 | Py_RETURN_TRUE; |
93 | } |
94 | |
95 | |
96 | PyDoc_STRVAR_shared(_Py_isascii__doc__, |
97 | "B.isascii() -> bool\n\ |
98 | \n\ |
99 | Return True if B is empty or all characters in B are ASCII,\n\ |
100 | False otherwise." ); |
101 | |
102 | // Optimization is copied from ascii_decode in unicodeobject.c |
103 | /* Mask to quickly check whether a C 'size_t' contains a |
104 | non-ASCII, UTF8-encoded char. */ |
105 | #if (SIZEOF_SIZE_T == 8) |
106 | # define ASCII_CHAR_MASK 0x8080808080808080ULL |
107 | #elif (SIZEOF_SIZE_T == 4) |
108 | # define ASCII_CHAR_MASK 0x80808080U |
109 | #else |
110 | # error C 'size_t' size should be either 4 or 8! |
111 | #endif |
112 | |
113 | PyObject* |
114 | _Py_bytes_isascii(const char *cptr, Py_ssize_t len) |
115 | { |
116 | const char *p = cptr; |
117 | const char *end = p + len; |
118 | |
119 | while (p < end) { |
120 | /* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h |
121 | for an explanation. */ |
122 | if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) { |
123 | /* Help allocation */ |
124 | const char *_p = p; |
125 | while (_p + SIZEOF_SIZE_T <= end) { |
126 | size_t value = *(const size_t *) _p; |
127 | if (value & ASCII_CHAR_MASK) { |
128 | Py_RETURN_FALSE; |
129 | } |
130 | _p += SIZEOF_SIZE_T; |
131 | } |
132 | p = _p; |
133 | if (_p == end) |
134 | break; |
135 | } |
136 | if ((unsigned char)*p & 0x80) { |
137 | Py_RETURN_FALSE; |
138 | } |
139 | p++; |
140 | } |
141 | Py_RETURN_TRUE; |
142 | } |
143 | |
144 | #undef ASCII_CHAR_MASK |
145 | |
146 | |
147 | PyDoc_STRVAR_shared(_Py_isdigit__doc__, |
148 | "B.isdigit() -> bool\n\ |
149 | \n\ |
150 | Return True if all characters in B are digits\n\ |
151 | and there is at least one character in B, False otherwise." ); |
152 | |
153 | PyObject* |
154 | _Py_bytes_isdigit(const char *cptr, Py_ssize_t len) |
155 | { |
156 | const unsigned char *p |
157 | = (const unsigned char *) cptr; |
158 | const unsigned char *e; |
159 | |
160 | /* Shortcut for single character strings */ |
161 | if (len == 1 && Py_ISDIGIT(*p)) |
162 | Py_RETURN_TRUE; |
163 | |
164 | /* Special case for empty strings */ |
165 | if (len == 0) |
166 | Py_RETURN_FALSE; |
167 | |
168 | e = p + len; |
169 | for (; p < e; p++) { |
170 | if (!Py_ISDIGIT(*p)) |
171 | Py_RETURN_FALSE; |
172 | } |
173 | Py_RETURN_TRUE; |
174 | } |
175 | |
176 | |
177 | PyDoc_STRVAR_shared(_Py_islower__doc__, |
178 | "B.islower() -> bool\n\ |
179 | \n\ |
180 | Return True if all cased characters in B are lowercase and there is\n\ |
181 | at least one cased character in B, False otherwise." ); |
182 | |
183 | PyObject* |
184 | _Py_bytes_islower(const char *cptr, Py_ssize_t len) |
185 | { |
186 | const unsigned char *p |
187 | = (const unsigned char *) cptr; |
188 | const unsigned char *e; |
189 | int cased; |
190 | |
191 | /* Shortcut for single character strings */ |
192 | if (len == 1) |
193 | return PyBool_FromLong(Py_ISLOWER(*p)); |
194 | |
195 | /* Special case for empty strings */ |
196 | if (len == 0) |
197 | Py_RETURN_FALSE; |
198 | |
199 | e = p + len; |
200 | cased = 0; |
201 | for (; p < e; p++) { |
202 | if (Py_ISUPPER(*p)) |
203 | Py_RETURN_FALSE; |
204 | else if (!cased && Py_ISLOWER(*p)) |
205 | cased = 1; |
206 | } |
207 | return PyBool_FromLong(cased); |
208 | } |
209 | |
210 | |
211 | PyDoc_STRVAR_shared(_Py_isupper__doc__, |
212 | "B.isupper() -> bool\n\ |
213 | \n\ |
214 | Return True if all cased characters in B are uppercase and there is\n\ |
215 | at least one cased character in B, False otherwise." ); |
216 | |
217 | PyObject* |
218 | _Py_bytes_isupper(const char *cptr, Py_ssize_t len) |
219 | { |
220 | const unsigned char *p |
221 | = (const unsigned char *) cptr; |
222 | const unsigned char *e; |
223 | int cased; |
224 | |
225 | /* Shortcut for single character strings */ |
226 | if (len == 1) |
227 | return PyBool_FromLong(Py_ISUPPER(*p)); |
228 | |
229 | /* Special case for empty strings */ |
230 | if (len == 0) |
231 | Py_RETURN_FALSE; |
232 | |
233 | e = p + len; |
234 | cased = 0; |
235 | for (; p < e; p++) { |
236 | if (Py_ISLOWER(*p)) |
237 | Py_RETURN_FALSE; |
238 | else if (!cased && Py_ISUPPER(*p)) |
239 | cased = 1; |
240 | } |
241 | return PyBool_FromLong(cased); |
242 | } |
243 | |
244 | |
245 | PyDoc_STRVAR_shared(_Py_istitle__doc__, |
246 | "B.istitle() -> bool\n\ |
247 | \n\ |
248 | Return True if B is a titlecased string and there is at least one\n\ |
249 | character in B, i.e. uppercase characters may only follow uncased\n\ |
250 | characters and lowercase characters only cased ones. Return False\n\ |
251 | otherwise." ); |
252 | |
253 | PyObject* |
254 | _Py_bytes_istitle(const char *cptr, Py_ssize_t len) |
255 | { |
256 | const unsigned char *p |
257 | = (const unsigned char *) cptr; |
258 | const unsigned char *e; |
259 | int cased, previous_is_cased; |
260 | |
261 | /* Shortcut for single character strings */ |
262 | if (len == 1) |
263 | return PyBool_FromLong(Py_ISUPPER(*p)); |
264 | |
265 | /* Special case for empty strings */ |
266 | if (len == 0) |
267 | Py_RETURN_FALSE; |
268 | |
269 | e = p + len; |
270 | cased = 0; |
271 | previous_is_cased = 0; |
272 | for (; p < e; p++) { |
273 | const unsigned char ch = *p; |
274 | |
275 | if (Py_ISUPPER(ch)) { |
276 | if (previous_is_cased) |
277 | Py_RETURN_FALSE; |
278 | previous_is_cased = 1; |
279 | cased = 1; |
280 | } |
281 | else if (Py_ISLOWER(ch)) { |
282 | if (!previous_is_cased) |
283 | Py_RETURN_FALSE; |
284 | previous_is_cased = 1; |
285 | cased = 1; |
286 | } |
287 | else |
288 | previous_is_cased = 0; |
289 | } |
290 | return PyBool_FromLong(cased); |
291 | } |
292 | |
293 | |
294 | PyDoc_STRVAR_shared(_Py_lower__doc__, |
295 | "B.lower() -> copy of B\n\ |
296 | \n\ |
297 | Return a copy of B with all ASCII characters converted to lowercase." ); |
298 | |
299 | void |
300 | _Py_bytes_lower(char *result, const char *cptr, Py_ssize_t len) |
301 | { |
302 | Py_ssize_t i; |
303 | |
304 | for (i = 0; i < len; i++) { |
305 | result[i] = Py_TOLOWER((unsigned char) cptr[i]); |
306 | } |
307 | } |
308 | |
309 | |
310 | PyDoc_STRVAR_shared(_Py_upper__doc__, |
311 | "B.upper() -> copy of B\n\ |
312 | \n\ |
313 | Return a copy of B with all ASCII characters converted to uppercase." ); |
314 | |
315 | void |
316 | _Py_bytes_upper(char *result, const char *cptr, Py_ssize_t len) |
317 | { |
318 | Py_ssize_t i; |
319 | |
320 | for (i = 0; i < len; i++) { |
321 | result[i] = Py_TOUPPER((unsigned char) cptr[i]); |
322 | } |
323 | } |
324 | |
325 | |
326 | PyDoc_STRVAR_shared(_Py_title__doc__, |
327 | "B.title() -> copy of B\n\ |
328 | \n\ |
329 | Return a titlecased version of B, i.e. ASCII words start with uppercase\n\ |
330 | characters, all remaining cased characters have lowercase." ); |
331 | |
332 | void |
333 | _Py_bytes_title(char *result, const char *s, Py_ssize_t len) |
334 | { |
335 | Py_ssize_t i; |
336 | int previous_is_cased = 0; |
337 | |
338 | for (i = 0; i < len; i++) { |
339 | int c = Py_CHARMASK(*s++); |
340 | if (Py_ISLOWER(c)) { |
341 | if (!previous_is_cased) |
342 | c = Py_TOUPPER(c); |
343 | previous_is_cased = 1; |
344 | } else if (Py_ISUPPER(c)) { |
345 | if (previous_is_cased) |
346 | c = Py_TOLOWER(c); |
347 | previous_is_cased = 1; |
348 | } else |
349 | previous_is_cased = 0; |
350 | *result++ = c; |
351 | } |
352 | } |
353 | |
354 | |
355 | PyDoc_STRVAR_shared(_Py_capitalize__doc__, |
356 | "B.capitalize() -> copy of B\n\ |
357 | \n\ |
358 | Return a copy of B with only its first character capitalized (ASCII)\n\ |
359 | and the rest lower-cased." ); |
360 | |
361 | void |
362 | _Py_bytes_capitalize(char *result, const char *s, Py_ssize_t len) |
363 | { |
364 | if (len > 0) { |
365 | *result = Py_TOUPPER(*s); |
366 | _Py_bytes_lower(result + 1, s + 1, len - 1); |
367 | } |
368 | } |
369 | |
370 | |
371 | PyDoc_STRVAR_shared(_Py_swapcase__doc__, |
372 | "B.swapcase() -> copy of B\n\ |
373 | \n\ |
374 | Return a copy of B with uppercase ASCII characters converted\n\ |
375 | to lowercase ASCII and vice versa." ); |
376 | |
377 | void |
378 | _Py_bytes_swapcase(char *result, const char *s, Py_ssize_t len) |
379 | { |
380 | Py_ssize_t i; |
381 | |
382 | for (i = 0; i < len; i++) { |
383 | int c = Py_CHARMASK(*s++); |
384 | if (Py_ISLOWER(c)) { |
385 | *result = Py_TOUPPER(c); |
386 | } |
387 | else if (Py_ISUPPER(c)) { |
388 | *result = Py_TOLOWER(c); |
389 | } |
390 | else |
391 | *result = c; |
392 | result++; |
393 | } |
394 | } |
395 | |
396 | |
397 | PyDoc_STRVAR_shared(_Py_maketrans__doc__, |
398 | "B.maketrans(frm, to) -> translation table\n\ |
399 | \n\ |
400 | Return a translation table (a bytes object of length 256) suitable\n\ |
401 | for use in the bytes or bytearray translate method where each byte\n\ |
402 | in frm is mapped to the byte at the same position in to.\n\ |
403 | The bytes objects frm and to must be of the same length." ); |
404 | |
405 | PyObject * |
406 | _Py_bytes_maketrans(Py_buffer *frm, Py_buffer *to) |
407 | { |
408 | PyObject *res = NULL; |
409 | Py_ssize_t i; |
410 | char *p; |
411 | |
412 | if (frm->len != to->len) { |
413 | PyErr_Format(PyExc_ValueError, |
414 | "maketrans arguments must have same length" ); |
415 | return NULL; |
416 | } |
417 | res = PyBytes_FromStringAndSize(NULL, 256); |
418 | if (!res) |
419 | return NULL; |
420 | p = PyBytes_AS_STRING(res); |
421 | for (i = 0; i < 256; i++) |
422 | p[i] = (char) i; |
423 | for (i = 0; i < frm->len; i++) { |
424 | p[((unsigned char *)frm->buf)[i]] = ((char *)to->buf)[i]; |
425 | } |
426 | |
427 | return res; |
428 | } |
429 | |
430 | #define FASTSEARCH fastsearch |
431 | #define STRINGLIB(F) stringlib_##F |
432 | #define STRINGLIB_CHAR char |
433 | #define STRINGLIB_SIZEOF_CHAR 1 |
434 | |
435 | #include "stringlib/fastsearch.h" |
436 | #include "stringlib/count.h" |
437 | #include "stringlib/find.h" |
438 | |
439 | /* |
440 | Wraps stringlib_parse_args_finds() and additionally checks the first |
441 | argument type. |
442 | |
443 | In case the first argument is a bytes-like object, sets it to subobj, |
444 | and doesn't touch the byte parameter. |
445 | In case it is an integer in range(0, 256), writes the integer value |
446 | to byte, and sets subobj to NULL. |
447 | |
448 | The other parameters are similar to those of |
449 | stringlib_parse_args_finds(). |
450 | */ |
451 | |
452 | Py_LOCAL_INLINE(int) |
453 | parse_args_finds_byte(const char *function_name, PyObject *args, |
454 | PyObject **subobj, char *byte, |
455 | Py_ssize_t *start, Py_ssize_t *end) |
456 | { |
457 | PyObject *tmp_subobj; |
458 | Py_ssize_t ival; |
459 | |
460 | if(!stringlib_parse_args_finds(function_name, args, &tmp_subobj, |
461 | start, end)) |
462 | return 0; |
463 | |
464 | if (PyObject_CheckBuffer(tmp_subobj)) { |
465 | *subobj = tmp_subobj; |
466 | return 1; |
467 | } |
468 | |
469 | if (!_PyIndex_Check(tmp_subobj)) { |
470 | PyErr_Format(PyExc_TypeError, |
471 | "argument should be integer or bytes-like object, " |
472 | "not '%.200s'" , |
473 | Py_TYPE(tmp_subobj)->tp_name); |
474 | return 0; |
475 | } |
476 | |
477 | ival = PyNumber_AsSsize_t(tmp_subobj, NULL); |
478 | if (ival == -1 && PyErr_Occurred()) { |
479 | return 0; |
480 | } |
481 | if (ival < 0 || ival > 255) { |
482 | PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)" ); |
483 | return 0; |
484 | } |
485 | |
486 | *subobj = NULL; |
487 | *byte = (char)ival; |
488 | return 1; |
489 | } |
490 | |
491 | /* helper macro to fixup start/end slice values */ |
492 | #define ADJUST_INDICES(start, end, len) \ |
493 | if (end > len) \ |
494 | end = len; \ |
495 | else if (end < 0) { \ |
496 | end += len; \ |
497 | if (end < 0) \ |
498 | end = 0; \ |
499 | } \ |
500 | if (start < 0) { \ |
501 | start += len; \ |
502 | if (start < 0) \ |
503 | start = 0; \ |
504 | } |
505 | |
506 | Py_LOCAL_INLINE(Py_ssize_t) |
507 | find_internal(const char *str, Py_ssize_t len, |
508 | const char *function_name, PyObject *args, int dir) |
509 | { |
510 | PyObject *subobj; |
511 | char byte; |
512 | Py_buffer subbuf; |
513 | const char *sub; |
514 | Py_ssize_t sub_len; |
515 | Py_ssize_t start = 0, end = PY_SSIZE_T_MAX; |
516 | Py_ssize_t res; |
517 | |
518 | if (!parse_args_finds_byte(function_name, args, |
519 | &subobj, &byte, &start, &end)) |
520 | return -2; |
521 | |
522 | if (subobj) { |
523 | if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0) |
524 | return -2; |
525 | |
526 | sub = subbuf.buf; |
527 | sub_len = subbuf.len; |
528 | } |
529 | else { |
530 | sub = &byte; |
531 | sub_len = 1; |
532 | } |
533 | |
534 | ADJUST_INDICES(start, end, len); |
535 | if (end - start < sub_len) |
536 | res = -1; |
537 | else if (sub_len == 1) { |
538 | if (dir > 0) |
539 | res = stringlib_find_char( |
540 | str + start, end - start, |
541 | *sub); |
542 | else |
543 | res = stringlib_rfind_char( |
544 | str + start, end - start, |
545 | *sub); |
546 | if (res >= 0) |
547 | res += start; |
548 | } |
549 | else { |
550 | if (dir > 0) |
551 | res = stringlib_find_slice( |
552 | str, len, |
553 | sub, sub_len, start, end); |
554 | else |
555 | res = stringlib_rfind_slice( |
556 | str, len, |
557 | sub, sub_len, start, end); |
558 | } |
559 | |
560 | if (subobj) |
561 | PyBuffer_Release(&subbuf); |
562 | |
563 | return res; |
564 | } |
565 | |
566 | PyDoc_STRVAR_shared(_Py_find__doc__, |
567 | "B.find(sub[, start[, end]]) -> int\n\ |
568 | \n\ |
569 | Return the lowest index in B where subsection sub is found,\n\ |
570 | such that sub is contained within B[start,end]. Optional\n\ |
571 | arguments start and end are interpreted as in slice notation.\n\ |
572 | \n\ |
573 | Return -1 on failure." ); |
574 | |
575 | PyObject * |
576 | _Py_bytes_find(const char *str, Py_ssize_t len, PyObject *args) |
577 | { |
578 | Py_ssize_t result = find_internal(str, len, "find" , args, +1); |
579 | if (result == -2) |
580 | return NULL; |
581 | return PyLong_FromSsize_t(result); |
582 | } |
583 | |
584 | PyDoc_STRVAR_shared(_Py_index__doc__, |
585 | "B.index(sub[, start[, end]]) -> int\n\ |
586 | \n\ |
587 | Return the lowest index in B where subsection sub is found,\n\ |
588 | such that sub is contained within B[start,end]. Optional\n\ |
589 | arguments start and end are interpreted as in slice notation.\n\ |
590 | \n\ |
591 | Raises ValueError when the subsection is not found." ); |
592 | |
593 | PyObject * |
594 | _Py_bytes_index(const char *str, Py_ssize_t len, PyObject *args) |
595 | { |
596 | Py_ssize_t result = find_internal(str, len, "index" , args, +1); |
597 | if (result == -2) |
598 | return NULL; |
599 | if (result == -1) { |
600 | PyErr_SetString(PyExc_ValueError, |
601 | "subsection not found" ); |
602 | return NULL; |
603 | } |
604 | return PyLong_FromSsize_t(result); |
605 | } |
606 | |
607 | PyDoc_STRVAR_shared(_Py_rfind__doc__, |
608 | "B.rfind(sub[, start[, end]]) -> int\n\ |
609 | \n\ |
610 | Return the highest index in B where subsection sub is found,\n\ |
611 | such that sub is contained within B[start,end]. Optional\n\ |
612 | arguments start and end are interpreted as in slice notation.\n\ |
613 | \n\ |
614 | Return -1 on failure." ); |
615 | |
616 | PyObject * |
617 | _Py_bytes_rfind(const char *str, Py_ssize_t len, PyObject *args) |
618 | { |
619 | Py_ssize_t result = find_internal(str, len, "rfind" , args, -1); |
620 | if (result == -2) |
621 | return NULL; |
622 | return PyLong_FromSsize_t(result); |
623 | } |
624 | |
625 | PyDoc_STRVAR_shared(_Py_rindex__doc__, |
626 | "B.rindex(sub[, start[, end]]) -> int\n\ |
627 | \n\ |
628 | Return the highest index in B where subsection sub is found,\n\ |
629 | such that sub is contained within B[start,end]. Optional\n\ |
630 | arguments start and end are interpreted as in slice notation.\n\ |
631 | \n\ |
632 | Raise ValueError when the subsection is not found." ); |
633 | |
634 | PyObject * |
635 | _Py_bytes_rindex(const char *str, Py_ssize_t len, PyObject *args) |
636 | { |
637 | Py_ssize_t result = find_internal(str, len, "rindex" , args, -1); |
638 | if (result == -2) |
639 | return NULL; |
640 | if (result == -1) { |
641 | PyErr_SetString(PyExc_ValueError, |
642 | "subsection not found" ); |
643 | return NULL; |
644 | } |
645 | return PyLong_FromSsize_t(result); |
646 | } |
647 | |
648 | PyDoc_STRVAR_shared(_Py_count__doc__, |
649 | "B.count(sub[, start[, end]]) -> int\n\ |
650 | \n\ |
651 | Return the number of non-overlapping occurrences of subsection sub in\n\ |
652 | bytes B[start:end]. Optional arguments start and end are interpreted\n\ |
653 | as in slice notation." ); |
654 | |
655 | PyObject * |
656 | _Py_bytes_count(const char *str, Py_ssize_t len, PyObject *args) |
657 | { |
658 | PyObject *sub_obj; |
659 | const char *sub; |
660 | Py_ssize_t sub_len; |
661 | char byte; |
662 | Py_ssize_t start = 0, end = PY_SSIZE_T_MAX; |
663 | |
664 | Py_buffer vsub; |
665 | PyObject *count_obj; |
666 | |
667 | if (!parse_args_finds_byte("count" , args, |
668 | &sub_obj, &byte, &start, &end)) |
669 | return NULL; |
670 | |
671 | if (sub_obj) { |
672 | if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0) |
673 | return NULL; |
674 | |
675 | sub = vsub.buf; |
676 | sub_len = vsub.len; |
677 | } |
678 | else { |
679 | sub = &byte; |
680 | sub_len = 1; |
681 | } |
682 | |
683 | ADJUST_INDICES(start, end, len); |
684 | |
685 | count_obj = PyLong_FromSsize_t( |
686 | stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX) |
687 | ); |
688 | |
689 | if (sub_obj) |
690 | PyBuffer_Release(&vsub); |
691 | |
692 | return count_obj; |
693 | } |
694 | |
695 | int |
696 | _Py_bytes_contains(const char *str, Py_ssize_t len, PyObject *arg) |
697 | { |
698 | Py_ssize_t ival = PyNumber_AsSsize_t(arg, NULL); |
699 | if (ival == -1 && PyErr_Occurred()) { |
700 | Py_buffer varg; |
701 | Py_ssize_t pos; |
702 | PyErr_Clear(); |
703 | if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0) |
704 | return -1; |
705 | pos = stringlib_find(str, len, |
706 | varg.buf, varg.len, 0); |
707 | PyBuffer_Release(&varg); |
708 | return pos >= 0; |
709 | } |
710 | if (ival < 0 || ival >= 256) { |
711 | PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)" ); |
712 | return -1; |
713 | } |
714 | |
715 | return memchr(str, (int) ival, len) != NULL; |
716 | } |
717 | |
718 | |
719 | /* Matches the end (direction >= 0) or start (direction < 0) of the buffer |
720 | * against substr, using the start and end arguments. Returns |
721 | * -1 on error, 0 if not found and 1 if found. |
722 | */ |
723 | static int |
724 | tailmatch(const char *str, Py_ssize_t len, PyObject *substr, |
725 | Py_ssize_t start, Py_ssize_t end, int direction) |
726 | { |
727 | Py_buffer sub_view = {NULL, NULL}; |
728 | const char *sub; |
729 | Py_ssize_t slen; |
730 | |
731 | if (PyBytes_Check(substr)) { |
732 | sub = PyBytes_AS_STRING(substr); |
733 | slen = PyBytes_GET_SIZE(substr); |
734 | } |
735 | else { |
736 | if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0) |
737 | return -1; |
738 | sub = sub_view.buf; |
739 | slen = sub_view.len; |
740 | } |
741 | |
742 | ADJUST_INDICES(start, end, len); |
743 | |
744 | if (direction < 0) { |
745 | /* startswith */ |
746 | if (start > len - slen) |
747 | goto notfound; |
748 | } else { |
749 | /* endswith */ |
750 | if (end - start < slen || start > len) |
751 | goto notfound; |
752 | |
753 | if (end - slen > start) |
754 | start = end - slen; |
755 | } |
756 | if (end - start < slen) |
757 | goto notfound; |
758 | if (memcmp(str + start, sub, slen) != 0) |
759 | goto notfound; |
760 | |
761 | PyBuffer_Release(&sub_view); |
762 | return 1; |
763 | |
764 | notfound: |
765 | PyBuffer_Release(&sub_view); |
766 | return 0; |
767 | } |
768 | |
769 | static PyObject * |
770 | _Py_bytes_tailmatch(const char *str, Py_ssize_t len, |
771 | const char *function_name, PyObject *args, |
772 | int direction) |
773 | { |
774 | Py_ssize_t start = 0; |
775 | Py_ssize_t end = PY_SSIZE_T_MAX; |
776 | PyObject *subobj; |
777 | int result; |
778 | |
779 | if (!stringlib_parse_args_finds(function_name, args, &subobj, &start, &end)) |
780 | return NULL; |
781 | if (PyTuple_Check(subobj)) { |
782 | Py_ssize_t i; |
783 | for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) { |
784 | result = tailmatch(str, len, PyTuple_GET_ITEM(subobj, i), |
785 | start, end, direction); |
786 | if (result == -1) |
787 | return NULL; |
788 | else if (result) { |
789 | Py_RETURN_TRUE; |
790 | } |
791 | } |
792 | Py_RETURN_FALSE; |
793 | } |
794 | result = tailmatch(str, len, subobj, start, end, direction); |
795 | if (result == -1) { |
796 | if (PyErr_ExceptionMatches(PyExc_TypeError)) |
797 | PyErr_Format(PyExc_TypeError, |
798 | "%s first arg must be bytes or a tuple of bytes, " |
799 | "not %s" , |
800 | function_name, Py_TYPE(subobj)->tp_name); |
801 | return NULL; |
802 | } |
803 | else |
804 | return PyBool_FromLong(result); |
805 | } |
806 | |
807 | PyDoc_STRVAR_shared(_Py_startswith__doc__, |
808 | "B.startswith(prefix[, start[, end]]) -> bool\n\ |
809 | \n\ |
810 | Return True if B starts with the specified prefix, False otherwise.\n\ |
811 | With optional start, test B beginning at that position.\n\ |
812 | With optional end, stop comparing B at that position.\n\ |
813 | prefix can also be a tuple of bytes to try." ); |
814 | |
815 | PyObject * |
816 | _Py_bytes_startswith(const char *str, Py_ssize_t len, PyObject *args) |
817 | { |
818 | return _Py_bytes_tailmatch(str, len, "startswith" , args, -1); |
819 | } |
820 | |
821 | PyDoc_STRVAR_shared(_Py_endswith__doc__, |
822 | "B.endswith(suffix[, start[, end]]) -> bool\n\ |
823 | \n\ |
824 | Return True if B ends with the specified suffix, False otherwise.\n\ |
825 | With optional start, test B beginning at that position.\n\ |
826 | With optional end, stop comparing B at that position.\n\ |
827 | suffix can also be a tuple of bytes to try." ); |
828 | |
829 | PyObject * |
830 | _Py_bytes_endswith(const char *str, Py_ssize_t len, PyObject *args) |
831 | { |
832 | return _Py_bytes_tailmatch(str, len, "endswith" , args, +1); |
833 | } |
834 | |