1#if STRINGLIB_IS_UNICODE
2# error "transmogrify.h only compatible with byte-wise strings"
3#endif
4
5/* the more complicated methods. parts of these should be pulled out into the
6 shared code in bytes_methods.c to cut down on duplicate code bloat. */
7
8/*[clinic input]
9class B "PyObject *" "&PyType_Type"
10[clinic start generated code]*/
11/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2935558188d97c76]*/
12
13#include "clinic/transmogrify.h.h"
14
15static inline PyObject *
16return_self(PyObject *self)
17{
18#if !STRINGLIB_MUTABLE
19 if (STRINGLIB_CHECK_EXACT(self)) {
20 Py_INCREF(self);
21 return self;
22 }
23#endif
24 return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
25}
26
27/*[clinic input]
28B.expandtabs as stringlib_expandtabs
29
30 tabsize: int = 8
31
32Return a copy where all tab characters are expanded using spaces.
33
34If tabsize is not given, a tab size of 8 characters is assumed.
35[clinic start generated code]*/
36
37static PyObject *
38stringlib_expandtabs_impl(PyObject *self, int tabsize)
39/*[clinic end generated code: output=069cb7fae72e4c2b input=3c6d3b12aa3ccbea]*/
40{
41 const char *e, *p;
42 char *q;
43 Py_ssize_t i, j;
44 PyObject *u;
45
46 /* First pass: determine size of output string */
47 i = j = 0;
48 e = STRINGLIB_STR(self) + STRINGLIB_LEN(self);
49 for (p = STRINGLIB_STR(self); p < e; p++) {
50 if (*p == '\t') {
51 if (tabsize > 0) {
52 Py_ssize_t incr = tabsize - (j % tabsize);
53 if (j > PY_SSIZE_T_MAX - incr)
54 goto overflow;
55 j += incr;
56 }
57 }
58 else {
59 if (j > PY_SSIZE_T_MAX - 1)
60 goto overflow;
61 j++;
62 if (*p == '\n' || *p == '\r') {
63 if (i > PY_SSIZE_T_MAX - j)
64 goto overflow;
65 i += j;
66 j = 0;
67 }
68 }
69 }
70
71 if (i > PY_SSIZE_T_MAX - j)
72 goto overflow;
73
74 /* Second pass: create output string and fill it */
75 u = STRINGLIB_NEW(NULL, i + j);
76 if (!u)
77 return NULL;
78
79 j = 0;
80 q = STRINGLIB_STR(u);
81
82 for (p = STRINGLIB_STR(self); p < e; p++) {
83 if (*p == '\t') {
84 if (tabsize > 0) {
85 i = tabsize - (j % tabsize);
86 j += i;
87 while (i--)
88 *q++ = ' ';
89 }
90 }
91 else {
92 j++;
93 *q++ = *p;
94 if (*p == '\n' || *p == '\r')
95 j = 0;
96 }
97 }
98
99 return u;
100 overflow:
101 PyErr_SetString(PyExc_OverflowError, "result too long");
102 return NULL;
103}
104
105static inline PyObject *
106pad(PyObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
107{
108 PyObject *u;
109
110 if (left < 0)
111 left = 0;
112 if (right < 0)
113 right = 0;
114
115 if (left == 0 && right == 0) {
116 return return_self(self);
117 }
118
119 u = STRINGLIB_NEW(NULL, left + STRINGLIB_LEN(self) + right);
120 if (u) {
121 if (left)
122 memset(STRINGLIB_STR(u), fill, left);
123 memcpy(STRINGLIB_STR(u) + left,
124 STRINGLIB_STR(self),
125 STRINGLIB_LEN(self));
126 if (right)
127 memset(STRINGLIB_STR(u) + left + STRINGLIB_LEN(self),
128 fill, right);
129 }
130
131 return u;
132}
133
134/*[clinic input]
135B.ljust as stringlib_ljust
136
137 width: Py_ssize_t
138 fillchar: char = b' '
139 /
140
141Return a left-justified string of length width.
142
143Padding is done using the specified fill character.
144[clinic start generated code]*/
145
146static PyObject *
147stringlib_ljust_impl(PyObject *self, Py_ssize_t width, char fillchar)
148/*[clinic end generated code: output=c79ca173c5ff8337 input=eff2d014bc7d80df]*/
149{
150 if (STRINGLIB_LEN(self) >= width) {
151 return return_self(self);
152 }
153
154 return pad(self, 0, width - STRINGLIB_LEN(self), fillchar);
155}
156
157
158/*[clinic input]
159B.rjust as stringlib_rjust
160
161 width: Py_ssize_t
162 fillchar: char = b' '
163 /
164
165Return a right-justified string of length width.
166
167Padding is done using the specified fill character.
168[clinic start generated code]*/
169
170static PyObject *
171stringlib_rjust_impl(PyObject *self, Py_ssize_t width, char fillchar)
172/*[clinic end generated code: output=7df5d728a5439570 input=218b0bd31308955d]*/
173{
174 if (STRINGLIB_LEN(self) >= width) {
175 return return_self(self);
176 }
177
178 return pad(self, width - STRINGLIB_LEN(self), 0, fillchar);
179}
180
181
182/*[clinic input]
183B.center as stringlib_center
184
185 width: Py_ssize_t
186 fillchar: char = b' '
187 /
188
189Return a centered string of length width.
190
191Padding is done using the specified fill character.
192[clinic start generated code]*/
193
194static PyObject *
195stringlib_center_impl(PyObject *self, Py_ssize_t width, char fillchar)
196/*[clinic end generated code: output=d8da2e055288b4c2 input=3776fd278765d89b]*/
197{
198 Py_ssize_t marg, left;
199
200 if (STRINGLIB_LEN(self) >= width) {
201 return return_self(self);
202 }
203
204 marg = width - STRINGLIB_LEN(self);
205 left = marg / 2 + (marg & width & 1);
206
207 return pad(self, left, marg - left, fillchar);
208}
209
210/*[clinic input]
211B.zfill as stringlib_zfill
212
213 width: Py_ssize_t
214 /
215
216Pad a numeric string with zeros on the left, to fill a field of the given width.
217
218The original string is never truncated.
219[clinic start generated code]*/
220
221static PyObject *
222stringlib_zfill_impl(PyObject *self, Py_ssize_t width)
223/*[clinic end generated code: output=0b3c684a7f1b2319 input=2da6d7b8e9bcb19a]*/
224{
225 Py_ssize_t fill;
226 PyObject *s;
227 char *p;
228
229 if (STRINGLIB_LEN(self) >= width) {
230 return return_self(self);
231 }
232
233 fill = width - STRINGLIB_LEN(self);
234
235 s = pad(self, fill, 0, '0');
236
237 if (s == NULL)
238 return NULL;
239
240 p = STRINGLIB_STR(s);
241 if (p[fill] == '+' || p[fill] == '-') {
242 /* move sign to beginning of string */
243 p[0] = p[fill];
244 p[fill] = '0';
245 }
246
247 return s;
248}
249
250
251/* find and count characters and substrings */
252
253#define findchar(target, target_len, c) \
254 ((char *)memchr((const void *)(target), c, target_len))
255
256
257static Py_ssize_t
258countchar(const char *target, Py_ssize_t target_len, char c,
259 Py_ssize_t maxcount)
260{
261 Py_ssize_t count = 0;
262 const char *start = target;
263 const char *end = target + target_len;
264
265 while ((start = findchar(start, end - start, c)) != NULL) {
266 count++;
267 if (count >= maxcount)
268 break;
269 start += 1;
270 }
271 return count;
272}
273
274
275/* Algorithms for different cases of string replacement */
276
277/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
278static PyObject *
279stringlib_replace_interleave(PyObject *self,
280 const char *to_s, Py_ssize_t to_len,
281 Py_ssize_t maxcount)
282{
283 const char *self_s;
284 char *result_s;
285 Py_ssize_t self_len, result_len;
286 Py_ssize_t count, i;
287 PyObject *result;
288
289 self_len = STRINGLIB_LEN(self);
290
291 /* 1 at the end plus 1 after every character;
292 count = min(maxcount, self_len + 1) */
293 if (maxcount <= self_len) {
294 count = maxcount;
295 }
296 else {
297 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
298 count = self_len + 1;
299 }
300
301 /* Check for overflow */
302 /* result_len = count * to_len + self_len; */
303 assert(count > 0);
304 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
305 PyErr_SetString(PyExc_OverflowError,
306 "replace bytes is too long");
307 return NULL;
308 }
309 result_len = count * to_len + self_len;
310 result = STRINGLIB_NEW(NULL, result_len);
311 if (result == NULL) {
312 return NULL;
313 }
314
315 self_s = STRINGLIB_STR(self);
316 result_s = STRINGLIB_STR(result);
317
318 if (to_len > 1) {
319 /* Lay the first one down (guaranteed this will occur) */
320 memcpy(result_s, to_s, to_len);
321 result_s += to_len;
322 count -= 1;
323
324 for (i = 0; i < count; i++) {
325 *result_s++ = *self_s++;
326 memcpy(result_s, to_s, to_len);
327 result_s += to_len;
328 }
329 }
330 else {
331 result_s[0] = to_s[0];
332 result_s += to_len;
333 count -= 1;
334 for (i = 0; i < count; i++) {
335 *result_s++ = *self_s++;
336 result_s[0] = to_s[0];
337 result_s += to_len;
338 }
339 }
340
341 /* Copy the rest of the original string */
342 memcpy(result_s, self_s, self_len - i);
343
344 return result;
345}
346
347/* Special case for deleting a single character */
348/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
349static PyObject *
350stringlib_replace_delete_single_character(PyObject *self,
351 char from_c, Py_ssize_t maxcount)
352{
353 const char *self_s, *start, *next, *end;
354 char *result_s;
355 Py_ssize_t self_len, result_len;
356 Py_ssize_t count;
357 PyObject *result;
358
359 self_len = STRINGLIB_LEN(self);
360 self_s = STRINGLIB_STR(self);
361
362 count = countchar(self_s, self_len, from_c, maxcount);
363 if (count == 0) {
364 return return_self(self);
365 }
366
367 result_len = self_len - count; /* from_len == 1 */
368 assert(result_len>=0);
369
370 result = STRINGLIB_NEW(NULL, result_len);
371 if (result == NULL) {
372 return NULL;
373 }
374 result_s = STRINGLIB_STR(result);
375
376 start = self_s;
377 end = self_s + self_len;
378 while (count-- > 0) {
379 next = findchar(start, end - start, from_c);
380 if (next == NULL)
381 break;
382 memcpy(result_s, start, next - start);
383 result_s += (next - start);
384 start = next + 1;
385 }
386 memcpy(result_s, start, end - start);
387
388 return result;
389}
390
391/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
392
393static PyObject *
394stringlib_replace_delete_substring(PyObject *self,
395 const char *from_s, Py_ssize_t from_len,
396 Py_ssize_t maxcount)
397{
398 const char *self_s, *start, *next, *end;
399 char *result_s;
400 Py_ssize_t self_len, result_len;
401 Py_ssize_t count, offset;
402 PyObject *result;
403
404 self_len = STRINGLIB_LEN(self);
405 self_s = STRINGLIB_STR(self);
406
407 count = stringlib_count(self_s, self_len,
408 from_s, from_len,
409 maxcount);
410
411 if (count == 0) {
412 /* no matches */
413 return return_self(self);
414 }
415
416 result_len = self_len - (count * from_len);
417 assert (result_len>=0);
418
419 result = STRINGLIB_NEW(NULL, result_len);
420 if (result == NULL) {
421 return NULL;
422 }
423 result_s = STRINGLIB_STR(result);
424
425 start = self_s;
426 end = self_s + self_len;
427 while (count-- > 0) {
428 offset = stringlib_find(start, end - start,
429 from_s, from_len,
430 0);
431 if (offset == -1)
432 break;
433 next = start + offset;
434
435 memcpy(result_s, start, next - start);
436
437 result_s += (next - start);
438 start = next + from_len;
439 }
440 memcpy(result_s, start, end - start);
441 return result;
442}
443
444/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
445static PyObject *
446stringlib_replace_single_character_in_place(PyObject *self,
447 char from_c, char to_c,
448 Py_ssize_t maxcount)
449{
450 const char *self_s, *end;
451 char *result_s, *start, *next;
452 Py_ssize_t self_len;
453 PyObject *result;
454
455 /* The result string will be the same size */
456 self_s = STRINGLIB_STR(self);
457 self_len = STRINGLIB_LEN(self);
458
459 next = findchar(self_s, self_len, from_c);
460
461 if (next == NULL) {
462 /* No matches; return the original bytes */
463 return return_self(self);
464 }
465
466 /* Need to make a new bytes */
467 result = STRINGLIB_NEW(NULL, self_len);
468 if (result == NULL) {
469 return NULL;
470 }
471 result_s = STRINGLIB_STR(result);
472 memcpy(result_s, self_s, self_len);
473
474 /* change everything in-place, starting with this one */
475 start = result_s + (next - self_s);
476 *start = to_c;
477 start++;
478 end = result_s + self_len;
479
480 while (--maxcount > 0) {
481 next = findchar(start, end - start, from_c);
482 if (next == NULL)
483 break;
484 *next = to_c;
485 start = next + 1;
486 }
487
488 return result;
489}
490
491/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
492static PyObject *
493stringlib_replace_substring_in_place(PyObject *self,
494 const char *from_s, Py_ssize_t from_len,
495 const char *to_s, Py_ssize_t to_len,
496 Py_ssize_t maxcount)
497{
498 const char *self_s, *end;
499 char *result_s, *start;
500 Py_ssize_t self_len, offset;
501 PyObject *result;
502
503 /* The result bytes will be the same size */
504
505 self_s = STRINGLIB_STR(self);
506 self_len = STRINGLIB_LEN(self);
507
508 offset = stringlib_find(self_s, self_len,
509 from_s, from_len,
510 0);
511 if (offset == -1) {
512 /* No matches; return the original bytes */
513 return return_self(self);
514 }
515
516 /* Need to make a new bytes */
517 result = STRINGLIB_NEW(NULL, self_len);
518 if (result == NULL) {
519 return NULL;
520 }
521 result_s = STRINGLIB_STR(result);
522 memcpy(result_s, self_s, self_len);
523
524 /* change everything in-place, starting with this one */
525 start = result_s + offset;
526 memcpy(start, to_s, from_len);
527 start += from_len;
528 end = result_s + self_len;
529
530 while ( --maxcount > 0) {
531 offset = stringlib_find(start, end - start,
532 from_s, from_len,
533 0);
534 if (offset == -1)
535 break;
536 memcpy(start + offset, to_s, from_len);
537 start += offset + from_len;
538 }
539
540 return result;
541}
542
543/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
544static PyObject *
545stringlib_replace_single_character(PyObject *self,
546 char from_c,
547 const char *to_s, Py_ssize_t to_len,
548 Py_ssize_t maxcount)
549{
550 const char *self_s, *start, *next, *end;
551 char *result_s;
552 Py_ssize_t self_len, result_len;
553 Py_ssize_t count;
554 PyObject *result;
555
556 self_s = STRINGLIB_STR(self);
557 self_len = STRINGLIB_LEN(self);
558
559 count = countchar(self_s, self_len, from_c, maxcount);
560 if (count == 0) {
561 /* no matches, return unchanged */
562 return return_self(self);
563 }
564
565 /* use the difference between current and new, hence the "-1" */
566 /* result_len = self_len + count * (to_len-1) */
567 assert(count > 0);
568 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
569 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
570 return NULL;
571 }
572 result_len = self_len + count * (to_len - 1);
573
574 result = STRINGLIB_NEW(NULL, result_len);
575 if (result == NULL) {
576 return NULL;
577 }
578 result_s = STRINGLIB_STR(result);
579
580 start = self_s;
581 end = self_s + self_len;
582 while (count-- > 0) {
583 next = findchar(start, end - start, from_c);
584 if (next == NULL)
585 break;
586
587 if (next == start) {
588 /* replace with the 'to' */
589 memcpy(result_s, to_s, to_len);
590 result_s += to_len;
591 start += 1;
592 } else {
593 /* copy the unchanged old then the 'to' */
594 memcpy(result_s, start, next - start);
595 result_s += (next - start);
596 memcpy(result_s, to_s, to_len);
597 result_s += to_len;
598 start = next + 1;
599 }
600 }
601 /* Copy the remainder of the remaining bytes */
602 memcpy(result_s, start, end - start);
603
604 return result;
605}
606
607/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
608static PyObject *
609stringlib_replace_substring(PyObject *self,
610 const char *from_s, Py_ssize_t from_len,
611 const char *to_s, Py_ssize_t to_len,
612 Py_ssize_t maxcount)
613{
614 const char *self_s, *start, *next, *end;
615 char *result_s;
616 Py_ssize_t self_len, result_len;
617 Py_ssize_t count, offset;
618 PyObject *result;
619
620 self_s = STRINGLIB_STR(self);
621 self_len = STRINGLIB_LEN(self);
622
623 count = stringlib_count(self_s, self_len,
624 from_s, from_len,
625 maxcount);
626
627 if (count == 0) {
628 /* no matches, return unchanged */
629 return return_self(self);
630 }
631
632 /* Check for overflow */
633 /* result_len = self_len + count * (to_len-from_len) */
634 assert(count > 0);
635 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
636 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
637 return NULL;
638 }
639 result_len = self_len + count * (to_len - from_len);
640
641 result = STRINGLIB_NEW(NULL, result_len);
642 if (result == NULL) {
643 return NULL;
644 }
645 result_s = STRINGLIB_STR(result);
646
647 start = self_s;
648 end = self_s + self_len;
649 while (count-- > 0) {
650 offset = stringlib_find(start, end - start,
651 from_s, from_len,
652 0);
653 if (offset == -1)
654 break;
655 next = start + offset;
656 if (next == start) {
657 /* replace with the 'to' */
658 memcpy(result_s, to_s, to_len);
659 result_s += to_len;
660 start += from_len;
661 } else {
662 /* copy the unchanged old then the 'to' */
663 memcpy(result_s, start, next - start);
664 result_s += (next - start);
665 memcpy(result_s, to_s, to_len);
666 result_s += to_len;
667 start = next + from_len;
668 }
669 }
670 /* Copy the remainder of the remaining bytes */
671 memcpy(result_s, start, end - start);
672
673 return result;
674}
675
676
677static PyObject *
678stringlib_replace(PyObject *self,
679 const char *from_s, Py_ssize_t from_len,
680 const char *to_s, Py_ssize_t to_len,
681 Py_ssize_t maxcount)
682{
683 if (STRINGLIB_LEN(self) < from_len) {
684 /* nothing to do; return the original bytes */
685 return return_self(self);
686 }
687 if (maxcount < 0) {
688 maxcount = PY_SSIZE_T_MAX;
689 } else if (maxcount == 0) {
690 /* nothing to do; return the original bytes */
691 return return_self(self);
692 }
693
694 /* Handle zero-length special cases */
695 if (from_len == 0) {
696 if (to_len == 0) {
697 /* nothing to do; return the original bytes */
698 return return_self(self);
699 }
700 /* insert the 'to' bytes everywhere. */
701 /* >>> b"Python".replace(b"", b".") */
702 /* b'.P.y.t.h.o.n.' */
703 return stringlib_replace_interleave(self, to_s, to_len, maxcount);
704 }
705
706 if (to_len == 0) {
707 /* delete all occurrences of 'from' bytes */
708 if (from_len == 1) {
709 return stringlib_replace_delete_single_character(
710 self, from_s[0], maxcount);
711 } else {
712 return stringlib_replace_delete_substring(
713 self, from_s, from_len, maxcount);
714 }
715 }
716
717 /* Handle special case where both bytes have the same length */
718
719 if (from_len == to_len) {
720 if (from_len == 1) {
721 return stringlib_replace_single_character_in_place(
722 self, from_s[0], to_s[0], maxcount);
723 } else {
724 return stringlib_replace_substring_in_place(
725 self, from_s, from_len, to_s, to_len, maxcount);
726 }
727 }
728
729 /* Otherwise use the more generic algorithms */
730 if (from_len == 1) {
731 return stringlib_replace_single_character(
732 self, from_s[0], to_s, to_len, maxcount);
733 } else {
734 /* len('from')>=2, len('to')>=1 */
735 return stringlib_replace_substring(
736 self, from_s, from_len, to_s, to_len, maxcount);
737 }
738}
739
740#undef findchar
741