bytesobject.c source code [python/Objects/bytesobject.c]

1	/ bytes object implementation /
2
3	#define PY_SSIZE_T_CLEAN
4
5	#include "Python.h"
6	#include "pycore_abstract.h" // _PyIndex_Check()
7	#include "pycore_bytes_methods.h" // _Py_bytes_startswith()
8	#include "pycore_format.h" // F_LJUST
9	#include "pycore_initconfig.h" // _PyStatus_OK()
10	#include "pycore_object.h" // _PyObject_GC_TRACK
11	#include "pycore_pymem.h" // PYMEM_CLEANBYTE
12
13	#include "pystrhex.h"
14	#include <stddef.h>
15
16	/[clinic input]*
17	class bytes "PyBytesObject " "&PyBytes_Type"*
18	[clinic start generated code]/*
19	/[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]/
20
21	#include "clinic/bytesobject.c.h"
22
23	_Py_IDENTIFIER(__bytes__);
24
25	/ PyBytesObject_SIZE gives the basic size of a bytes object; any memory allocation*
26	for a bytes object of length n should request PyBytesObject_SIZE + n bytes.
27
28	Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
29	3 or 7 bytes per bytes object allocation on a typical system.
30	*/
31	#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
32
33	/ Forward declaration /
34	Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
35	char *str);
36
37
38	static struct _Py_bytes_state*
39	get_bytes_state(void)
40	{
41	PyInterpreterState *interp = _PyInterpreterState_GET();
42	return &interp->bytes;
43	}
44
45
46	// Return a borrowed reference to the empty bytes string singleton.
47	static inline PyObject* bytes_get_empty(void)
48	{
49	struct _Py_bytes_state *state = get_bytes_state();
50	// bytes_get_empty() must not be called before _PyBytes_Init()
51	// or after _PyBytes_Fini()
52	assert(state->empty_string != NULL);
53	return state->empty_string;
54	}
55
56
57	// Return a strong reference to the empty bytes string singleton.
58	static inline PyObject* bytes_new_empty(void)
59	{
60	PyObject *empty = bytes_get_empty();
61	Py_INCREF(empty);
62	return (PyObject *)empty;
63	}
64
65
66	static int
67	bytes_create_empty_string_singleton(struct _Py_bytes_state *state)
68	{
69	// Create the empty bytes string singleton
70	PyBytesObject op = (PyBytesObject )PyObject_Malloc(PyBytesObject_SIZE);
71	if (op == NULL) {
72	return -`1`;
73	}
74	_PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, `0`);
75	op->ob_shash = -`1`;
76	op->ob_sval[`0`] = `'\0'`;
77
78	assert(state->empty_string == NULL);
79	state->empty_string = (PyObject *)op;
80	return `0`;
81	}
82
83
84	/*
85	For PyBytes_FromString(), the parameter `str' points to a null-terminated
86	string containing exactly `size' bytes.
87
88	For PyBytes_FromStringAndSize(), the parameter `str' is
89	either NULL or else points to a string containing at least `size' bytes.
90	For PyBytes_FromStringAndSize(), the string in the `str' parameter does
91	not have to be null-terminated. (Therefore it is safe to construct a
92	substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
93	If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
94	bytes (setting the last byte to the null terminating character) and you can
95	fill in the data yourself. If `str' is non-NULL then the resulting
96	PyBytes object must be treated as immutable and you must not fill in nor
97	alter the data yourself, since the strings may be shared.
98
99	The PyObject member `op->ob_size', which denotes the number of "extra
100	items" in a variable-size object, will contain the number of bytes
101	allocated for string data, not counting the null terminating character.
102	It is therefore equal to the `size' parameter (for
103	PyBytes_FromStringAndSize()) or the length of the string in the `str'
104	parameter (for PyBytes_FromString()).
105	*/
106	static PyObject *
107	_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
108	{
109	PyBytesObject *op;
110	assert(size >= `0`);
111
112	if (size == `0`) {
113	return bytes_new_empty();
114	}
115
116	if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
117	PyErr_SetString(PyExc_OverflowError,
118	"byte string is too large");
119	return NULL;
120	}
121
122	/ Inline PyObject_NewVar /
123	if (use_calloc)
124	op = (PyBytesObject *)PyObject_Calloc(`1`, PyBytesObject_SIZE + size);
125	else
126	op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
127	if (op == NULL) {
128	return PyErr_NoMemory();
129	}
130	_PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
131	op->ob_shash = -`1`;
132	if (!use_calloc) {
133	op->ob_sval[size] = `'\0'`;
134	}
135	return (PyObject *) op;
136	}
137
138	PyObject *
139	PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
140	{
141	PyBytesObject *op;
142	if (size < `0`) {
143	PyErr_SetString(PyExc_SystemError,
144	"Negative size passed to PyBytes_FromStringAndSize");
145	return NULL;
146	}
147	if (size == `1` && str != NULL) {
148	struct _Py_bytes_state *state = get_bytes_state();
149	op = state->characters[*str & UCHAR_MAX];
150	if (op != NULL) {
151	Py_INCREF(op);
152	return (PyObject *)op;
153	}
154	}
155	if (size == `0`) {
156	return bytes_new_empty();
157	}
158
159	op = (PyBytesObject *)_PyBytes_FromSize(size, `0`);
160	if (op == NULL)
161	return NULL;
162	if (str == NULL)
163	return (PyObject *) op;
164
165	memcpy(op->ob_sval, str, size);
166	/ share short strings /
167	if (size == `1`) {
168	struct _Py_bytes_state *state = get_bytes_state();
169	Py_INCREF(op);
170	state->characters[*str & UCHAR_MAX] = op;
171	}
172	return (PyObject *) op;
173	}
174
175	PyObject *
176	PyBytes_FromString(const char *str)
177	{
178	size_t size;
179	PyBytesObject *op;
180
181	assert(str != NULL);
182	size = strlen(str);
183	if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
184	PyErr_SetString(PyExc_OverflowError,
185	"byte string is too long");
186	return NULL;
187	}
188
189	struct _Py_bytes_state *state = get_bytes_state();
190	if (size == `0`) {
191	return bytes_new_empty();
192	}
193	else if (size == `1`) {
194	op = state->characters[*str & UCHAR_MAX];
195	if (op != NULL) {
196	Py_INCREF(op);
197	return (PyObject *)op;
198	}
199	}
200
201	/ Inline PyObject_NewVar /
202	op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
203	if (op == NULL) {
204	return PyErr_NoMemory();
205	}
206	_PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
207	op->ob_shash = -`1`;
208	memcpy(op->ob_sval, str, size+`1`);
209	/ share short strings /
210	if (size == `1`) {
211	assert(state->characters[*str & UCHAR_MAX] == NULL);
212	Py_INCREF(op);
213	state->characters[*str & UCHAR_MAX] = op;
214	}
215	return (PyObject *) op;
216	}
217
218	PyObject *
219	PyBytes_FromFormatV(const char *format, va_list vargs)
220	{
221	char *s;
222	const char *f;
223	const char *p;
224	Py_ssize_t prec;
225	int longflag;
226	int size_tflag;
227	/ Longest 64-bit formatted numbers:*
228	- "18446744073709551615\0" (21 bytes)
229	- "-9223372036854775808\0" (21 bytes)
230	Decimal takes the most space (it isn't enough for octal.)
231
232	Longest 64-bit pointer representation:
233	"0xffffffffffffffff\0" (19 bytes). /*
234	char buffer[`21`];
235	_PyBytesWriter writer;
236
237	_PyBytesWriter_Init(&writer);
238
239	s = _PyBytesWriter_Alloc(&writer, strlen(format));
240	if (s == NULL)
241	return NULL;
242	writer.overallocate = `1`;
243
244	#define WRITE_BYTES(str) \
245	do { \
246	s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
247	if (s == NULL) \
248	goto error; \
249	} while (0)
250
251	for (f = format; *f; f++) {
252	if (*f != `'%'`) {
253	s++ = f;
254	continue;
255	}
256
257	p = f++;
258
259	/ ignore the width (ex: 10 in "%10s") /
260	while (Py_ISDIGIT(*f))
261	f++;
262
263	/ parse the precision (ex: 10 in "%.10s") /
264	prec = `0`;
265	if (*f == `'.'`) {
266	f++;
267	for (; Py_ISDIGIT(*f); f++) {
268	prec = (prec * `10`) + (*f - `'0'`);
269	}
270	}
271
272	while (f && f != `'%'` && !Py_ISALPHA(*f))
273	f++;
274
275	/ handle the long flag ('l'), but only for %ld and %lu.*
276	others can be added when necessary. /*
277	longflag = `0`;
278	if (*f == `'l'` && (f[`1`] == `'d'` \|\| f[`1`] == `'u'`)) {
279	longflag = `1`;
280	++f;
281	}
282
283	/ handle the size_t flag ('z'). /
284	size_tflag = `0`;
285	if (*f == `'z'` && (f[`1`] == `'d'` \|\| f[`1`] == `'u'`)) {
286	size_tflag = `1`;
287	++f;
288	}
289
290	/ subtract bytes preallocated for the format string*
291	(ex: 2 for "%s") /*
292	writer.min_size -= (f - p + `1`);
293
294	switch (*f) {
295	case `'c'`:
296	{
297	int c = va_arg(vargs, int);
298	if (c < `0` \|\| c > `255`) {
299	PyErr_SetString(PyExc_OverflowError,
300	"PyBytes_FromFormatV(): %c format "
301	"expects an integer in range [0; 255]");
302	goto error;
303	}
304	writer.min_size++;
305	s++ = (unsigned* char)c;
306	break;
307	}
308
309	case `'d'`:
310	if (longflag) {
311	sprintf(buffer, "%ld", va_arg(vargs, long));
312	}
313	else if (size_tflag) {
314	sprintf(buffer, "%zd", va_arg(vargs, Py_ssize_t));
315	}
316	else {
317	sprintf(buffer, "%d", va_arg(vargs, int));
318	}
319	assert(strlen(buffer) < sizeof(buffer));
320	WRITE_BYTES(buffer);
321	break;
322
323	case `'u'`:
324	if (longflag) {
325	sprintf(buffer, "%lu", va_arg(vargs, unsigned long));
326	}
327	else if (size_tflag) {
328	sprintf(buffer, "%zu", va_arg(vargs, size_t));
329	}
330	else {
331	sprintf(buffer, "%u", va_arg(vargs, unsigned int));
332	}
333	assert(strlen(buffer) < sizeof(buffer));
334	WRITE_BYTES(buffer);
335	break;
336
337	case `'i'`:
338	sprintf(buffer, "%i", va_arg(vargs, int));
339	assert(strlen(buffer) < sizeof(buffer));
340	WRITE_BYTES(buffer);
341	break;
342
343	case `'x'`:
344	sprintf(buffer, "%x", va_arg(vargs, int));
345	assert(strlen(buffer) < sizeof(buffer));
346	WRITE_BYTES(buffer);
347	break;
348
349	case `'s'`:
350	{
351	Py_ssize_t i;
352
353	p = va_arg(vargs, const char*);
354	if (prec <= `0`) {
355	i = strlen(p);
356	}
357	else {
358	i = `0`;
359	while (i < prec && p[i]) {
360	i++;
361	}
362	}
363	s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
364	if (s == NULL)
365	goto error;
366	break;
367	}
368
369	case `'p'`:
370	sprintf(buffer, "%p", va_arg(vargs, void*));
371	assert(strlen(buffer) < sizeof(buffer));
372	/ %p is ill-defined: ensure leading 0x. /
373	if (buffer[`1`] == `'X'`)
374	buffer[`1`] = `'x'`;
375	else if (buffer[`1`] != `'x'`) {
376	memmove(buffer+`2`, buffer, strlen(buffer)+`1`);
377	buffer[`0`] = `'0'`;
378	buffer[`1`] = `'x'`;
379	}
380	WRITE_BYTES(buffer);
381	break;
382
383	case `'%'`:
384	writer.min_size++;
385	*s++ = `'%'`;
386	break;
387
388	default:
389	if (*f == `0`) {
390	/ fix min_size if we reached the end of the format string /
391	writer.min_size++;
392	}
393
394	/ invalid format string: copy unformatted string and exit /
395	WRITE_BYTES(p);
396	return _PyBytesWriter_Finish(&writer, s);
397	}
398	}
399
400	#undef WRITE_BYTES
401
402	return _PyBytesWriter_Finish(&writer, s);
403
404	error:
405	_PyBytesWriter_Dealloc(&writer);
406	return NULL;
407	}
408
409	PyObject *
410	PyBytes_FromFormat(const char *format, ...)
411	{
412	PyObject* ret;
413	va_list vargs;
414
415	#ifdef HAVE_STDARG_PROTOTYPES
416	va_start(vargs, format);
417	#else
418	va_start(vargs);
419	#endif
420	ret = PyBytes_FromFormatV(format, vargs);
421	va_end(vargs);
422	return ret;
423	}
424
425	/ Helpers for formatstring /
426
427	Py_LOCAL_INLINE(PyObject *)
428	getnextarg(PyObject args, Py_ssize_t arglen, Py_ssize_t p_argidx)
429	{
430	Py_ssize_t argidx = *p_argidx;
431	if (argidx < arglen) {
432	(*p_argidx)++;
433	if (arglen < `0`)
434	return args;
435	else
436	return PyTuple_GetItem(args, argidx);
437	}
438	PyErr_SetString(PyExc_TypeError,
439	"not enough arguments for format string");
440	return NULL;
441	}
442
443	/ Returns a new reference to a PyBytes object, or NULL on failure. /
444
445	static char*
446	formatfloat(PyObject v, int* flags, int prec, int type,
447	PyObject *p_result, _PyBytesWriter writer, char *str)
448	{
449	char *p;
450	PyObject *result;
451	double x;
452	size_t len;
453
454	x = PyFloat_AsDouble(v);
455	if (x == -`1.0` && PyErr_Occurred()) {
456	PyErr_Format(PyExc_TypeError, "float argument required, "
457	"not %.200s", Py_TYPE(v)->tp_name);
458	return NULL;
459	}
460
461	if (prec < `0`)
462	prec = `6`;
463
464	p = PyOS_double_to_string(x, type, prec,
465	(flags & F_ALT) ? Py_DTSF_ALT : `0`, NULL);
466
467	if (p == NULL)
468	return NULL;
469
470	len = strlen(p);
471	if (writer != NULL) {
472	str = _PyBytesWriter_Prepare(writer, str, len);
473	if (str == NULL)
474	return NULL;
475	memcpy(str, p, len);
476	PyMem_Free(p);
477	str += len;
478	return str;
479	}
480
481	result = PyBytes_FromStringAndSize(p, len);
482	PyMem_Free(p);
483	*p_result = result;
484	return result != NULL ? str : NULL;
485	}
486
487	static PyObject *
488	formatlong(PyObject v, int* flags, int prec, int type)
489	{
490	PyObject result, iobj;
491	if (type == `'i'`)
492	type = `'d'`;
493	if (PyLong_Check(v))
494	return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
495	if (PyNumber_Check(v)) {
496	/ make sure number is a type of integer for o, x, and X /
497	if (type == `'o'` \|\| type == `'x'` \|\| type == `'X'`)
498	iobj = _PyNumber_Index(v);
499	else
500	iobj = PyNumber_Long(v);
501	if (iobj != NULL) {
502	assert(PyLong_Check(iobj));
503	result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
504	Py_DECREF(iobj);
505	return result;
506	}
507	if (!PyErr_ExceptionMatches(PyExc_TypeError))
508	return NULL;
509	}
510	PyErr_Format(PyExc_TypeError,
511	"%%%c format: %s is required, not %.200s", type,
512	(type == `'o'` \|\| type == `'x'` \|\| type == `'X'`) ? "an integer"
513	: "a real number",
514	Py_TYPE(v)->tp_name);
515	return NULL;
516	}
517
518	static int
519	byte_converter(PyObject arg, char* *p)
520	{
521	if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == `1`) {
522	*p = PyBytes_AS_STRING(arg)[`0`];
523	return `1`;
524	}
525	else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == `1`) {
526	*p = PyByteArray_AS_STRING(arg)[`0`];
527	return `1`;
528	}
529	else {
530	int overflow;
531	long ival = PyLong_AsLongAndOverflow(arg, &overflow);
532	if (ival == -`1` && PyErr_Occurred()) {
533	if (PyErr_ExceptionMatches(PyExc_TypeError)) {
534	goto onError;
535	}
536	return `0`;
537	}
538	if (!(`0` <= ival && ival <= `255`)) {
539	/ this includes an overflow in converting to C long /
540	PyErr_SetString(PyExc_OverflowError,
541	"%c arg not in range(256)");
542	return `0`;
543	}
544	p = (char*)ival;
545	return `1`;
546	}
547	onError:
548	PyErr_SetString(PyExc_TypeError,
549	"%c requires an integer in range(256) or a single byte");
550	return `0`;
551	}
552
553	static PyObject _PyBytes_FromBuffer(PyObject x);
554
555	static PyObject *
556	format_obj(PyObject v, const* char *pbuf, Py_ssize_t plen)
557	{
558	PyObject func, result;
559	/ is it a bytes object? /
560	if (PyBytes_Check(v)) {
561	*pbuf = PyBytes_AS_STRING(v);
562	*plen = PyBytes_GET_SIZE(v);
563	Py_INCREF(v);
564	return v;
565	}
566	if (PyByteArray_Check(v)) {
567	*pbuf = PyByteArray_AS_STRING(v);
568	*plen = PyByteArray_GET_SIZE(v);
569	Py_INCREF(v);
570	return v;
571	}
572	/ does it support __bytes__? /
573	func = _PyObject_LookupSpecial(v, &PyId___bytes__);
574	if (func != NULL) {
575	result = _PyObject_CallNoArg(func);
576	Py_DECREF(func);
577	if (result == NULL)
578	return NULL;
579	if (!PyBytes_Check(result)) {
580	PyErr_Format(PyExc_TypeError,
581	"__bytes__ returned non-bytes (type %.200s)",
582	Py_TYPE(result)->tp_name);
583	Py_DECREF(result);
584	return NULL;
585	}
586	*pbuf = PyBytes_AS_STRING(result);
587	*plen = PyBytes_GET_SIZE(result);
588	return result;
589	}
590	/ does it support buffer protocol? /
591	if (PyObject_CheckBuffer(v)) {
592	/ maybe we can avoid making a copy of the buffer object here? /
593	result = _PyBytes_FromBuffer(v);
594	if (result == NULL)
595	return NULL;
596	*pbuf = PyBytes_AS_STRING(result);
597	*plen = PyBytes_GET_SIZE(result);
598	return result;
599	}
600	PyErr_Format(PyExc_TypeError,
601	"%%b requires a bytes-like object, "
602	"or an object that implements __bytes__, not '%.100s'",
603	Py_TYPE(v)->tp_name);
604	return NULL;
605	}
606
607	/ fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) /
608
609	PyObject *
610	_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
611	PyObject args, int* use_bytearray)
612	{
613	const char *fmt;
614	char *res;
615	Py_ssize_t arglen, argidx;
616	Py_ssize_t fmtcnt;
617	int args_owned = `0`;
618	PyObject *dict = NULL;
619	_PyBytesWriter writer;
620
621	if (args == NULL) {
622	PyErr_BadInternalCall();
623	return NULL;
624	}
625	fmt = format;
626	fmtcnt = format_len;
627
628	_PyBytesWriter_Init(&writer);
629	writer.use_bytearray = use_bytearray;
630
631	res = _PyBytesWriter_Alloc(&writer, fmtcnt);
632	if (res == NULL)
633	return NULL;
634	if (!use_bytearray)
635	writer.overallocate = `1`;
636
637	if (PyTuple_Check(args)) {
638	arglen = PyTuple_GET_SIZE(args);
639	argidx = `0`;
640	}
641	else {
642	arglen = -`1`;
643	argidx = -`2`;
644	}
645	if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
646	!PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
647	!PyByteArray_Check(args)) {
648	dict = args;
649	}
650
651	while (--fmtcnt >= `0`) {
652	if (*fmt != `'%'`) {
653	Py_ssize_t len;
654	char *pos;
655
656	pos = (char *)memchr(fmt + `1`, `'%'`, fmtcnt);
657	if (pos != NULL)
658	len = pos - fmt;
659	else
660	len = fmtcnt + `1`;
661	assert(len != `0`);
662
663	memcpy(res, fmt, len);
664	res += len;
665	fmt += len;
666	fmtcnt -= (len - `1`);
667	}
668	else {
669	/ Got a format specifier /
670	int flags = `0`;
671	Py_ssize_t width = -`1`;
672	int prec = -`1`;
673	int c = `'\0'`;
674	int fill;
675	PyObject *v = NULL;
676	PyObject *temp = NULL;
677	const char *pbuf = NULL;
678	int sign;
679	Py_ssize_t len = `0`;
680	char onechar; / For byte_converter() /
681	Py_ssize_t alloc;
682
683	fmt++;
684	if (*fmt == `'%'`) {
685	*res++ = `'%'`;
686	fmt++;
687	fmtcnt--;
688	continue;
689	}
690	if (*fmt == `'('`) {
691	const char *keystart;
692	Py_ssize_t keylen;
693	PyObject *key;
694	int pcount = `1`;
695
696	if (dict == NULL) {
697	PyErr_SetString(PyExc_TypeError,
698	"format requires a mapping");
699	goto error;
700	}
701	++fmt;
702	--fmtcnt;
703	keystart = fmt;
704	/ Skip over balanced parentheses /
705	while (pcount > `0` && --fmtcnt >= `0`) {
706	if (*fmt == `')'`)
707	--pcount;
708	else if (*fmt == `'('`)
709	++pcount;
710	fmt++;
711	}
712	keylen = fmt - keystart - `1`;
713	if (fmtcnt < `0` \|\| pcount > `0`) {
714	PyErr_SetString(PyExc_ValueError,
715	"incomplete format key");
716	goto error;
717	}
718	key = PyBytes_FromStringAndSize(keystart,
719	keylen);
720	if (key == NULL)
721	goto error;
722	if (args_owned) {
723	Py_DECREF(args);
724	args_owned = `0`;
725	}
726	args = PyObject_GetItem(dict, key);
727	Py_DECREF(key);
728	if (args == NULL) {
729	goto error;
730	}
731	args_owned = `1`;
732	arglen = -`1`;
733	argidx = -`2`;
734	}
735
736	/ Parse flags. Example: "%+i" => flags=F_SIGN. /
737	while (--fmtcnt >= `0`) {
738	switch (c = *fmt++) {
739	case `'-'`: flags \|= F_LJUST; continue;
740	case `'+'`: flags \|= F_SIGN; continue;
741	case `' '`: flags \|= F_BLANK; continue;
742	case `'#'`: flags \|= F_ALT; continue;
743	case `'0'`: flags \|= F_ZERO; continue;
744	}
745	break;
746	}
747
748	/ Parse width. Example: "%10s" => width=10 /
749	if (c == `'*'`) {
750	v = getnextarg(args, arglen, &argidx);
751	if (v == NULL)
752	goto error;
753	if (!PyLong_Check(v)) {
754	PyErr_SetString(PyExc_TypeError,
755	"* wants int");
756	goto error;
757	}
758	width = PyLong_AsSsize_t(v);
759	if (width == -`1` && PyErr_Occurred())
760	goto error;
761	if (width < `0`) {
762	flags \|= F_LJUST;
763	width = -width;
764	}
765	if (--fmtcnt >= `0`)
766	c = *fmt++;
767	}
768	else if (c >= `0` && isdigit(c)) {
769	width = c - `'0'`;
770	while (--fmtcnt >= `0`) {
771	c = Py_CHARMASK(*fmt++);
772	if (!isdigit(c))
773	break;
774	if (width > (PY_SSIZE_T_MAX - ((int)c - `'0'`)) / `10`) {
775	PyErr_SetString(
776	PyExc_ValueError,
777	"width too big");
778	goto error;
779	}
780	width = width*`10` + (c - `'0'`);
781	}
782	}
783
784	/ Parse precision. Example: "%.3f" => prec=3 /
785	if (c == `'.'`) {
786	prec = `0`;
787	if (--fmtcnt >= `0`)
788	c = *fmt++;
789	if (c == `'*'`) {
790	v = getnextarg(args, arglen, &argidx);
791	if (v == NULL)
792	goto error;
793	if (!PyLong_Check(v)) {
794	PyErr_SetString(
795	PyExc_TypeError,
796	"* wants int");
797	goto error;
798	}
799	prec = _PyLong_AsInt(v);
800	if (prec == -`1` && PyErr_Occurred())
801	goto error;
802	if (prec < `0`)
803	prec = `0`;
804	if (--fmtcnt >= `0`)
805	c = *fmt++;
806	}
807	else if (c >= `0` && isdigit(c)) {
808	prec = c - `'0'`;
809	while (--fmtcnt >= `0`) {
810	c = Py_CHARMASK(*fmt++);
811	if (!isdigit(c))
812	break;
813	if (prec > (INT_MAX - ((int)c - `'0'`)) / `10`) {
814	PyErr_SetString(
815	PyExc_ValueError,
816	"prec too big");
817	goto error;
818	}
819	prec = prec*`10` + (c - `'0'`);
820	}
821	}
822	} / prec /
823	if (fmtcnt >= `0`) {
824	if (c == `'h'` \|\| c == `'l'` \|\| c == `'L'`) {
825	if (--fmtcnt >= `0`)
826	c = *fmt++;
827	}
828	}
829	if (fmtcnt < `0`) {
830	PyErr_SetString(PyExc_ValueError,
831	"incomplete format");
832	goto error;
833	}
834	v = getnextarg(args, arglen, &argidx);
835	if (v == NULL)
836	goto error;
837
838	if (fmtcnt == `0`) {
839	/ last write: disable writer overallocation /
840	writer.overallocate = `0`;
841	}
842
843	sign = `0`;
844	fill = `' '`;
845	switch (c) {
846	case `'r'`:
847	// %r is only for 2/3 code; 3 only code should use %a
848	case `'a'`:
849	temp = PyObject_ASCII(v);
850	if (temp == NULL)
851	goto error;
852	assert(PyUnicode_IS_ASCII(temp));
853	pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
854	len = PyUnicode_GET_LENGTH(temp);
855	if (prec >= `0` && len > prec)
856	len = prec;
857	break;
858
859	case `'s'`:
860	// %s is only for 2/3 code; 3 only code should use %b
861	case `'b'`:
862	temp = format_obj(v, &pbuf, &len);
863	if (temp == NULL)
864	goto error;
865	if (prec >= `0` && len > prec)
866	len = prec;
867	break;
868
869	case `'i'`:
870	case `'d'`:
871	case `'u'`:
872	case `'o'`:
873	case `'x'`:
874	case `'X'`:
875	if (PyLong_CheckExact(v)
876	&& width == -`1` && prec == -`1`
877	&& !(flags & (F_SIGN \| F_BLANK))
878	&& c != `'X'`)
879	{
880	/ Fast path /
881	int alternate = flags & F_ALT;
882	int base;
883
884	switch(c)
885	{
886	default:
887	Py_UNREACHABLE();
888	case `'d'`:
889	case `'i'`:
890	case `'u'`:
891	base = `10`;
892	break;
893	case `'o'`:
894	base = `8`;
895	break;
896	case `'x'`:
897	case `'X'`:
898	base = `16`;
899	break;
900	}
901
902	/ Fast path /
903	writer.min_size -= `2`; / size preallocated for "%d" /
904	res = _PyLong_FormatBytesWriter(&writer, res,
905	v, base, alternate);
906	if (res == NULL)
907	goto error;
908	continue;
909	}
910
911	temp = formatlong(v, flags, prec, c);
912	if (!temp)
913	goto error;
914	assert(PyUnicode_IS_ASCII(temp));
915	pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
916	len = PyUnicode_GET_LENGTH(temp);
917	sign = `1`;
918	if (flags & F_ZERO)
919	fill = `'0'`;
920	break;
921
922	case `'e'`:
923	case `'E'`:
924	case `'f'`:
925	case `'F'`:
926	case `'g'`:
927	case `'G'`:
928	if (width == -`1` && prec == -`1`
929	&& !(flags & (F_SIGN \| F_BLANK)))
930	{
931	/ Fast path /
932	writer.min_size -= `2`; / size preallocated for "%f" /
933	res = formatfloat(v, flags, prec, c, NULL, &writer, res);
934	if (res == NULL)
935	goto error;
936	continue;
937	}
938
939	if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
940	goto error;
941	pbuf = PyBytes_AS_STRING(temp);
942	len = PyBytes_GET_SIZE(temp);
943	sign = `1`;
944	if (flags & F_ZERO)
945	fill = `'0'`;
946	break;
947
948	case `'c'`:
949	pbuf = &onechar;
950	len = byte_converter(v, &onechar);
951	if (!len)
952	goto error;
953	if (width == -`1`) {
954	/ Fast path /
955	*res++ = onechar;
956	continue;
957	}
958	break;
959
960	default:
961	PyErr_Format(PyExc_ValueError,
962	"unsupported format character '%c' (0x%x) "
963	"at index %zd",
964	c, c,
965	(Py_ssize_t)(fmt - `1` - format));
966	goto error;
967	}
968
969	if (sign) {
970	if (pbuf == `'-'` \|\| pbuf == `'+'`) {
971	sign = *pbuf++;
972	len--;
973	}
974	else if (flags & F_SIGN)
975	sign = `'+'`;
976	else if (flags & F_BLANK)
977	sign = `' '`;
978	else
979	sign = `0`;
980	}
981	if (width < len)
982	width = len;
983
984	alloc = width;
985	if (sign != `0` && len == width)
986	alloc++;
987	/ 2: size preallocated for %s /
988	if (alloc > `2`) {
989	res = _PyBytesWriter_Prepare(&writer, res, alloc - `2`);
990	if (res == NULL)
991	goto error;
992	}
993	#ifndef NDEBUG
994	char *before = res;
995	#endif
996
997	/ Write the sign if needed /
998	if (sign) {
999	if (fill != `' '`)
1000	*res++ = sign;
1001	if (width > len)
1002	width--;
1003	}
1004
1005	/ Write the numeric prefix for "x", "X" and "o" formats*
1006	if the alternate form is used.
1007	For example, write "0x" for the "%#x" format. /*
1008	if ((flags & F_ALT) && (c == `'o'` \|\| c == `'x'` \|\| c == `'X'`)) {
1009	assert(pbuf[`0`] == `'0'`);
1010	assert(pbuf[`1`] == c);
1011	if (fill != `' '`) {
1012	res++ = pbuf++;
1013	res++ = pbuf++;
1014	}
1015	width -= `2`;
1016	if (width < `0`)
1017	width = `0`;
1018	len -= `2`;
1019	}
1020
1021	/ Pad left with the fill character if needed /
1022	if (width > len && !(flags & F_LJUST)) {
1023	memset(res, fill, width - len);
1024	res += (width - len);
1025	width = len;
1026	}
1027
1028	/ If padding with spaces: write sign if needed and/or numeric*
1029	prefix if the alternate form is used /*
1030	if (fill == `' '`) {
1031	if (sign)
1032	*res++ = sign;
1033	if ((flags & F_ALT) && (c == `'o'` \|\| c == `'x'` \|\| c == `'X'`)) {
1034	assert(pbuf[`0`] == `'0'`);
1035	assert(pbuf[`1`] == c);
1036	res++ = pbuf++;
1037	res++ = pbuf++;
1038	}
1039	}
1040
1041	/ Copy bytes /
1042	memcpy(res, pbuf, len);
1043	res += len;
1044
1045	/ Pad right with the fill character if needed /
1046	if (width > len) {
1047	memset(res, `' '`, width - len);
1048	res += (width - len);
1049	}
1050
1051	if (dict && (argidx < arglen)) {
1052	PyErr_SetString(PyExc_TypeError,
1053	"not all arguments converted during bytes formatting");
1054	Py_XDECREF(temp);
1055	goto error;
1056	}
1057	Py_XDECREF(temp);
1058
1059	#ifndef NDEBUG
1060	/ check that we computed the exact size for this write /
1061	assert((res - before) == alloc);
1062	#endif
1063	} / '%' /
1064
1065	/ If overallocation was disabled, ensure that it was the last*
1066	write. Otherwise, we missed an optimization /*
1067	assert(writer.overallocate \|\| fmtcnt == `0` \|\| use_bytearray);
1068	} / until end /
1069
1070	if (argidx < arglen && !dict) {
1071	PyErr_SetString(PyExc_TypeError,
1072	"not all arguments converted during bytes formatting");
1073	goto error;
1074	}
1075
1076	if (args_owned) {
1077	Py_DECREF(args);
1078	}
1079	return _PyBytesWriter_Finish(&writer, res);
1080
1081	error:
1082	_PyBytesWriter_Dealloc(&writer);
1083	if (args_owned) {
1084	Py_DECREF(args);
1085	}
1086	return NULL;
1087	}
1088
1089	/ Unescape a backslash-escaped string. /
1090	PyObject _PyBytes_DecodeEscape(const* char *s,
1091	Py_ssize_t len,
1092	const char *errors,
1093	const char **first_invalid_escape)
1094	{
1095	int c;
1096	char *p;
1097	const char *end;
1098	_PyBytesWriter writer;
1099
1100	_PyBytesWriter_Init(&writer);
1101
1102	p = _PyBytesWriter_Alloc(&writer, len);
1103	if (p == NULL)
1104	return NULL;
1105	writer.overallocate = `1`;
1106
1107	*first_invalid_escape = NULL;
1108
1109	end = s + len;
1110	while (s < end) {
1111	if (*s != `'\\'`) {
1112	p++ = s++;
1113	continue;
1114	}
1115
1116	s++;
1117	if (s == end) {
1118	PyErr_SetString(PyExc_ValueError,
1119	"Trailing \\ in string");
1120	goto failed;
1121	}
1122
1123	switch (*s++) {
1124	/ XXX This assumes ASCII! /
1125	case `'\n'`: break;
1126	case `'\\'`: p++ = `'\\'`; break*;
1127	case `'\''`: p++ = `'\''`; break*;
1128	case `'\"'`: p++ = `'\"'`; break*;
1129	case `'b'`: p++ = `'\b'`; break*;
1130	case `'f'`: p++ = `'\014'`; break; /* FF /
1131	case `'t'`: p++ = `'\t'`; break*;
1132	case `'n'`: p++ = `'\n'`; break*;
1133	case `'r'`: p++ = `'\r'`; break*;
1134	case `'v'`: p++ = `'\013'`; break; /* VT /
1135	case `'a'`: p++ = `'\007'`; break; /* BEL, not classic C /
1136	case `'0'`: case `'1'`: case `'2'`: case `'3'`:
1137	case `'4'`: case `'5'`: case `'6'`: case `'7'`:
1138	c = s[-`1`] - `'0'`;
1139	if (s < end && `'0'` <= s && s <= `'7'`) {
1140	c = (c<<`3`) + *s++ - `'0'`;
1141	if (s < end && `'0'` <= s && s <= `'7'`)
1142	c = (c<<`3`) + *s++ - `'0'`;
1143	}
1144	*p++ = c;
1145	break;
1146	case `'x'`:
1147	if (s+`1` < end) {
1148	int digit1, digit2;
1149	digit1 = _PyLong_DigitValue[Py_CHARMASK(s[`0`])];
1150	digit2 = _PyLong_DigitValue[Py_CHARMASK(s[`1`])];
1151	if (digit1 < `16` && digit2 < `16`) {
1152	p++ = (unsigned* char)((digit1 << `4`) + digit2);
1153	s += `2`;
1154	break;
1155	}
1156	}
1157	/ invalid hexadecimal digits /
1158
1159	if (!errors \|\| strcmp(errors, "strict") == `0`) {
1160	PyErr_Format(PyExc_ValueError,
1161	"invalid \\x escape at position %zd",
1162	s - `2` - (end - len));
1163	goto failed;
1164	}
1165	if (strcmp(errors, "replace") == `0`) {
1166	*p++ = `'?'`;
1167	} else if (strcmp(errors, "ignore") == `0`)
1168	/ do nothing /;
1169	else {
1170	PyErr_Format(PyExc_ValueError,
1171	"decoding error; unknown "
1172	"error handling code: %.400s",
1173	errors);
1174	goto failed;
1175	}
1176	/ skip \x /
1177	if (s < end && Py_ISXDIGIT(s[`0`]))
1178	s++; / and a hexdigit /
1179	break;
1180
1181	default:
1182	if (*first_invalid_escape == NULL) {
1183	first_invalid_escape = s-`1`; /* Back up one char, since we've*
1184	already incremented s. /*
1185	}
1186	*p++ = `'\\'`;
1187	s--;
1188	}
1189	}
1190
1191	return _PyBytesWriter_Finish(&writer, p);
1192
1193	failed:
1194	_PyBytesWriter_Dealloc(&writer);
1195	return NULL;
1196	}
1197
1198	PyObject PyBytes_DecodeEscape(const* char *s,
1199	Py_ssize_t len,
1200	const char *errors,
1201	Py_ssize_t Py_UNUSED(unicode),
1202	const char *Py_UNUSED(recode_encoding))
1203	{
1204	const char* first_invalid_escape;
1205	PyObject *result = _PyBytes_DecodeEscape(s, len, errors,
1206	&first_invalid_escape);
1207	if (result == NULL)
1208	return NULL;
1209	if (first_invalid_escape != NULL) {
1210	if (PyErr_WarnFormat(PyExc_DeprecationWarning, `1`,
1211	"invalid escape sequence '\\%c'",
1212	(unsigned char)*first_invalid_escape) < `0`) {
1213	Py_DECREF(result);
1214	return NULL;
1215	}
1216	}
1217	return result;
1218
1219	}
1220	/ -------------------------------------------------------------------- /
1221	/ object api /
1222
1223	Py_ssize_t
1224	PyBytes_Size(PyObject *op)
1225	{
1226	if (!PyBytes_Check(op)) {
1227	PyErr_Format(PyExc_TypeError,
1228	"expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1229	return -`1`;
1230	}
1231	return Py_SIZE(op);
1232	}
1233
1234	char *
1235	PyBytes_AsString(PyObject *op)
1236	{
1237	if (!PyBytes_Check(op)) {
1238	PyErr_Format(PyExc_TypeError,
1239	"expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1240	return NULL;
1241	}
1242	return ((PyBytesObject *)op)->ob_sval;
1243	}
1244
1245	int
1246	PyBytes_AsStringAndSize(PyObject *obj,
1247	char **s,
1248	Py_ssize_t *len)
1249	{
1250	if (s == NULL) {
1251	PyErr_BadInternalCall();
1252	return -`1`;
1253	}
1254
1255	if (!PyBytes_Check(obj)) {
1256	PyErr_Format(PyExc_TypeError,
1257	"expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1258	return -`1`;
1259	}
1260
1261	*s = PyBytes_AS_STRING(obj);
1262	if (len != NULL)
1263	*len = PyBytes_GET_SIZE(obj);
1264	else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
1265	PyErr_SetString(PyExc_ValueError,
1266	"embedded null byte");
1267	return -`1`;
1268	}
1269	return `0`;
1270	}
1271
1272	/ -------------------------------------------------------------------- /
1273	/ Methods /
1274
1275	#define STRINGLIB_GET_EMPTY() bytes_get_empty()
1276
1277	#include "stringlib/stringdefs.h"
1278
1279	#include "stringlib/fastsearch.h"
1280	#include "stringlib/count.h"
1281	#include "stringlib/find.h"
1282	#include "stringlib/join.h"
1283	#include "stringlib/partition.h"
1284	#include "stringlib/split.h"
1285	#include "stringlib/ctype.h"
1286
1287	#include "stringlib/transmogrify.h"
1288
1289	#undef STRINGLIB_GET_EMPTY
1290
1291	PyObject *
1292	PyBytes_Repr(PyObject obj, int* smartquotes)
1293	{
1294	PyBytesObject* op = (PyBytesObject*) obj;
1295	Py_ssize_t i, length = Py_SIZE(op);
1296	Py_ssize_t newsize, squotes, dquotes;
1297	PyObject *v;
1298	unsigned char quote;
1299	const unsigned char *s;
1300	Py_UCS1 *p;
1301
1302	/ Compute size of output string /
1303	squotes = dquotes = `0`;
1304	newsize = `3`; / b'' /
1305	s = (const unsigned char*)op->ob_sval;
1306	for (i = `0`; i < length; i++) {
1307	Py_ssize_t incr = `1`;
1308	switch(s[i]) {
1309	case `'\''`: squotes++; break;
1310	case `'"'`: dquotes++; break;
1311	case `'\\'`: case `'\t'`: case `'\n'`: case `'\r'`:
1312	incr = `2`; break; / \C /
1313	default:
1314	if (s[i] < `' '` \|\| s[i] >= `0x7f`)
1315	incr = `4`; / \xHH /
1316	}
1317	if (newsize > PY_SSIZE_T_MAX - incr)
1318	goto overflow;
1319	newsize += incr;
1320	}
1321	quote = `'\''`;
1322	if (smartquotes && squotes && !dquotes)
1323	quote = `'"'`;
1324	if (squotes && quote == `'\''`) {
1325	if (newsize > PY_SSIZE_T_MAX - squotes)
1326	goto overflow;
1327	newsize += squotes;
1328	}
1329
1330	v = PyUnicode_New(newsize, `127`);
1331	if (v == NULL) {
1332	return NULL;
1333	}
1334	p = PyUnicode_1BYTE_DATA(v);
1335
1336	p++ = `'b'`, p++ = quote;
1337	for (i = `0`; i < length; i++) {
1338	unsigned char c = op->ob_sval[i];
1339	if (c == quote \|\| c == `'\\'`)
1340	p++ = `'\\'`, p++ = c;
1341	else if (c == `'\t'`)
1342	p++ = `'\\'`, p++ = `'t'`;
1343	else if (c == `'\n'`)
1344	p++ = `'\\'`, p++ = `'n'`;
1345	else if (c == `'\r'`)
1346	p++ = `'\\'`, p++ = `'r'`;
1347	else if (c < `' '` \|\| c >= `0x7f`) {
1348	*p++ = `'\\'`;
1349	*p++ = `'x'`;
1350	*p++ = Py_hexdigits[(c & `0xf0`) >> `4`];
1351	*p++ = Py_hexdigits[c & `0xf`];
1352	}
1353	else
1354	*p++ = c;
1355	}
1356	*p++ = quote;
1357	assert(_PyUnicode_CheckConsistency(v, `1`));
1358	return v;
1359
1360	overflow:
1361	PyErr_SetString(PyExc_OverflowError,
1362	"bytes object is too large to make repr");
1363	return NULL;
1364	}
1365
1366	static PyObject *
1367	bytes_repr(PyObject *op)
1368	{
1369	return PyBytes_Repr(op, `1`);
1370	}
1371
1372	static PyObject *
1373	bytes_str(PyObject *op)
1374	{
1375	if (_Py_GetConfig()->bytes_warning) {
1376	if (PyErr_WarnEx(PyExc_BytesWarning,
1377	"str() on a bytes instance", `1`)) {
1378	return NULL;
1379	}
1380	}
1381	return bytes_repr(op);
1382	}
1383
1384	static Py_ssize_t
1385	bytes_length(PyBytesObject *a)
1386	{
1387	return Py_SIZE(a);
1388	}
1389
1390	/ This is also used by PyBytes_Concat() /
1391	static PyObject *
1392	bytes_concat(PyObject a, PyObject b)
1393	{
1394	Py_buffer va, vb;
1395	PyObject *result = NULL;
1396
1397	va.len = -`1`;
1398	vb.len = -`1`;
1399	if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != `0` \|\|
1400	PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != `0`) {
1401	PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1402	Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
1403	goto done;
1404	}
1405
1406	/ Optimize end cases /
1407	if (va.len == `0` && PyBytes_CheckExact(b)) {
1408	result = b;
1409	Py_INCREF(result);
1410	goto done;
1411	}
1412	if (vb.len == `0` && PyBytes_CheckExact(a)) {
1413	result = a;
1414	Py_INCREF(result);
1415	goto done;
1416	}
1417
1418	if (va.len > PY_SSIZE_T_MAX - vb.len) {
1419	PyErr_NoMemory();
1420	goto done;
1421	}
1422
1423	result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
1424	if (result != NULL) {
1425	memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1426	memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1427	}
1428
1429	done:
1430	if (va.len != -`1`)
1431	PyBuffer_Release(&va);
1432	if (vb.len != -`1`)
1433	PyBuffer_Release(&vb);
1434	return result;
1435	}
1436
1437	static PyObject *
1438	bytes_repeat(PyBytesObject *a, Py_ssize_t n)
1439	{
1440	Py_ssize_t i;
1441	Py_ssize_t j;
1442	Py_ssize_t size;
1443	PyBytesObject *op;
1444	size_t nbytes;
1445	if (n < `0`)
1446	n = `0`;
1447	/ watch out for overflows: the size can overflow int,*
1448	* and the # of bytes needed can overflow size_t
1449	*/
1450	if (n > `0` && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
1451	PyErr_SetString(PyExc_OverflowError,
1452	"repeated bytes are too long");
1453	return NULL;
1454	}
1455	size = Py_SIZE(a) * n;
1456	if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1457	Py_INCREF(a);
1458	return (PyObject *)a;
1459	}
1460	nbytes = (size_t)size;
1461	if (nbytes + PyBytesObject_SIZE <= nbytes) {
1462	PyErr_SetString(PyExc_OverflowError,
1463	"repeated bytes are too long");
1464	return NULL;
1465	}
1466	op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + nbytes);
1467	if (op == NULL) {
1468	return PyErr_NoMemory();
1469	}
1470	_PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
1471	op->ob_shash = -`1`;
1472	op->ob_sval[size] = `'\0'`;
1473	if (Py_SIZE(a) == `1` && n > `0`) {
1474	memset(op->ob_sval, a->ob_sval[`0`] , n);
1475	return (PyObject *) op;
1476	}
1477	i = `0`;
1478	if (i < size) {
1479	memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
1480	i = Py_SIZE(a);
1481	}
1482	while (i < size) {
1483	j = (i <= size-i) ? i : size-i;
1484	memcpy(op->ob_sval+i, op->ob_sval, j);
1485	i += j;
1486	}
1487	return (PyObject *) op;
1488	}
1489
1490	static int
1491	bytes_contains(PyObject self, PyObject arg)
1492	{
1493	return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1494	}
1495
1496	static PyObject *
1497	bytes_item(PyBytesObject *a, Py_ssize_t i)
1498	{
1499	if (i < `0` \|\| i >= Py_SIZE(a)) {
1500	PyErr_SetString(PyExc_IndexError, "index out of range");
1501	return NULL;
1502	}
1503	return PyLong_FromLong((unsigned char)a->ob_sval[i]);
1504	}
1505
1506	static int
1507	bytes_compare_eq(PyBytesObject a, PyBytesObject b)
1508	{
1509	int cmp;
1510	Py_ssize_t len;
1511
1512	len = Py_SIZE(a);
1513	if (Py_SIZE(b) != len)
1514	return `0`;
1515
1516	if (a->ob_sval[`0`] != b->ob_sval[`0`])
1517	return `0`;
1518
1519	cmp = memcmp(a->ob_sval, b->ob_sval, len);
1520	return (cmp == `0`);
1521	}
1522
1523	static PyObject*
1524	bytes_richcompare(PyBytesObject a, PyBytesObject b, int op)
1525	{
1526	int c;
1527	Py_ssize_t len_a, len_b;
1528	Py_ssize_t min_len;
1529
1530	/ Make sure both arguments are strings. /
1531	if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
1532	if (_Py_GetConfig()->bytes_warning && (op == Py_EQ \|\| op == Py_NE)) {
1533	if (PyUnicode_Check(a) \|\| PyUnicode_Check(b)) {
1534	if (PyErr_WarnEx(PyExc_BytesWarning,
1535	"Comparison between bytes and string", `1`))
1536	return NULL;
1537	}
1538	if (PyLong_Check(a) \|\| PyLong_Check(b)) {
1539	if (PyErr_WarnEx(PyExc_BytesWarning,
1540	"Comparison between bytes and int", `1`))
1541	return NULL;
1542	}
1543	}
1544	Py_RETURN_NOTIMPLEMENTED;
1545	}
1546	else if (a == b) {
1547	switch (op) {
1548	case Py_EQ:
1549	case Py_LE:
1550	case Py_GE:
1551	/ a byte string is equal to itself /
1552	Py_RETURN_TRUE;
1553	case Py_NE:
1554	case Py_LT:
1555	case Py_GT:
1556	Py_RETURN_FALSE;
1557	default:
1558	PyErr_BadArgument();
1559	return NULL;
1560	}
1561	}
1562	else if (op == Py_EQ \|\| op == Py_NE) {
1563	int eq = bytes_compare_eq(a, b);
1564	eq ^= (op == Py_NE);
1565	return PyBool_FromLong(eq);
1566	}
1567	else {
1568	len_a = Py_SIZE(a);
1569	len_b = Py_SIZE(b);
1570	min_len = Py_MIN(len_a, len_b);
1571	if (min_len > `0`) {
1572	c = Py_CHARMASK(a->ob_sval) - Py_CHARMASK(b->ob_sval);
1573	if (c == `0`)
1574	c = memcmp(a->ob_sval, b->ob_sval, min_len);
1575	}
1576	else
1577	c = `0`;
1578	if (c != `0`)
1579	Py_RETURN_RICHCOMPARE(c, `0`, op);
1580	Py_RETURN_RICHCOMPARE(len_a, len_b, op);
1581	}
1582	}
1583
1584	static Py_hash_t
1585	bytes_hash(PyBytesObject *a)
1586	{
1587	if (a->ob_shash == -`1`) {
1588	/ Can't fail /
1589	a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
1590	}
1591	return a->ob_shash;
1592	}
1593
1594	static PyObject*
1595	bytes_subscript(PyBytesObject* self, PyObject* item)
1596	{
1597	if (_PyIndex_Check(item)) {
1598	Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1599	if (i == -`1` && PyErr_Occurred())
1600	return NULL;
1601	if (i < `0`)
1602	i += PyBytes_GET_SIZE(self);
1603	if (i < `0` \|\| i >= PyBytes_GET_SIZE(self)) {
1604	PyErr_SetString(PyExc_IndexError,
1605	"index out of range");
1606	return NULL;
1607	}
1608	return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1609	}
1610	else if (PySlice_Check(item)) {
1611	Py_ssize_t start, stop, step, slicelength, i;
1612	size_t cur;
1613	const char* source_buf;
1614	char* result_buf;
1615	PyObject* result;
1616
1617	if (PySlice_Unpack(item, &start, &stop, &step) < `0`) {
1618	return NULL;
1619	}
1620	slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1621	&stop, step);
1622
1623	if (slicelength <= `0`) {
1624	return PyBytes_FromStringAndSize("", `0`);
1625	}
1626	else if (start == `0` && step == `1` &&
1627	slicelength == PyBytes_GET_SIZE(self) &&
1628	PyBytes_CheckExact(self)) {
1629	Py_INCREF(self);
1630	return (PyObject *)self;
1631	}
1632	else if (step == `1`) {
1633	return PyBytes_FromStringAndSize(
1634	PyBytes_AS_STRING(self) + start,
1635	slicelength);
1636	}
1637	else {
1638	source_buf = PyBytes_AS_STRING(self);
1639	result = PyBytes_FromStringAndSize(NULL, slicelength);
1640	if (result == NULL)
1641	return NULL;
1642
1643	result_buf = PyBytes_AS_STRING(result);
1644	for (cur = start, i = `0`; i < slicelength;
1645	cur += step, i++) {
1646	result_buf[i] = source_buf[cur];
1647	}
1648
1649	return result;
1650	}
1651	}
1652	else {
1653	PyErr_Format(PyExc_TypeError,
1654	"byte indices must be integers or slices, not %.200s",
1655	Py_TYPE(item)->tp_name);
1656	return NULL;
1657	}
1658	}
1659
1660	static int
1661	bytes_buffer_getbuffer(PyBytesObject self, Py_buffer view, int flags)
1662	{
1663	return PyBuffer_FillInfo(view, (PyObject)self, (void* *)self->ob_sval, Py_SIZE(self),
1664	`1`, flags);
1665	}
1666
1667	static PySequenceMethods bytes_as_sequence = {
1668	(lenfunc)bytes_length, /sq_length/
1669	(binaryfunc)bytes_concat, /sq_concat/
1670	(ssizeargfunc)bytes_repeat, /sq_repeat/
1671	(ssizeargfunc)bytes_item, /sq_item/
1672	`0`, /sq_slice/
1673	`0`, /sq_ass_item/
1674	`0`, /sq_ass_slice/
1675	(objobjproc)bytes_contains /sq_contains/
1676	};
1677
1678	static PyMappingMethods bytes_as_mapping = {
1679	(lenfunc)bytes_length,
1680	(binaryfunc)bytes_subscript,
1681	`0`,
1682	};
1683
1684	static PyBufferProcs bytes_as_buffer = {
1685	(getbufferproc)bytes_buffer_getbuffer,
1686	NULL,
1687	};
1688
1689
1690	#define LEFTSTRIP 0
1691	#define RIGHTSTRIP 1
1692	#define BOTHSTRIP 2
1693
1694	/[clinic input]*
1695	bytes.split
1696
1697	sep: object = None
1698	The delimiter according which to split the bytes.
1699	None (the default value) means split on ASCII whitespace characters
1700	(space, tab, return, newline, formfeed, vertical tab).
1701	maxsplit: Py_ssize_t = -1
1702	Maximum number of splits to do.
1703	-1 (the default value) means no limit.
1704
1705	Return a list of the sections in the bytes, using sep as the delimiter.
1706	[clinic start generated code]/*
1707
1708	static PyObject *
1709	bytes_split_impl(PyBytesObject self, PyObject sep, Py_ssize_t maxsplit)
1710	/[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]/
1711	{
1712	Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1713	const char s = PyBytes_AS_STRING(self), sub;
1714	Py_buffer vsub;
1715	PyObject *list;
1716
1717	if (maxsplit < `0`)
1718	maxsplit = PY_SSIZE_T_MAX;
1719	if (sep == Py_None)
1720	return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1721	if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != `0`)
1722	return NULL;
1723	sub = vsub.buf;
1724	n = vsub.len;
1725
1726	list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1727	PyBuffer_Release(&vsub);
1728	return list;
1729	}
1730
1731	/[clinic input]*
1732	bytes.partition
1733
1734	sep: Py_buffer
1735	/
1736
1737	Partition the bytes into three parts using the given separator.
1738
1739	This will search for the separator sep in the bytes. If the separator is found,
1740	returns a 3-tuple containing the part before the separator, the separator
1741	itself, and the part after it.
1742
1743	If the separator is not found, returns a 3-tuple containing the original bytes
1744	object and two empty bytes objects.
1745	[clinic start generated code]/*
1746
1747	static PyObject *
1748	bytes_partition_impl(PyBytesObject self, Py_buffer sep)
1749	/[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]/
1750	{
1751	return stringlib_partition(
1752	(PyObject*) self,
1753	PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1754	sep->obj, (const char *)sep->buf, sep->len
1755	);
1756	}
1757
1758	/[clinic input]*
1759	bytes.rpartition
1760
1761	sep: Py_buffer
1762	/
1763
1764	Partition the bytes into three parts using the given separator.
1765
1766	This will search for the separator sep in the bytes, starting at the end. If
1767	the separator is found, returns a 3-tuple containing the part before the
1768	separator, the separator itself, and the part after it.
1769
1770	If the separator is not found, returns a 3-tuple containing two empty bytes
1771	objects and the original bytes object.
1772	[clinic start generated code]/*
1773
1774	static PyObject *
1775	bytes_rpartition_impl(PyBytesObject self, Py_buffer sep)
1776	/[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]/
1777	{
1778	return stringlib_rpartition(
1779	(PyObject*) self,
1780	PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1781	sep->obj, (const char *)sep->buf, sep->len
1782	);
1783	}
1784
1785	/[clinic input]*
1786	bytes.rsplit = bytes.split
1787
1788	Return a list of the sections in the bytes, using sep as the delimiter.
1789
1790	Splitting is done starting at the end of the bytes and working to the front.
1791	[clinic start generated code]/*
1792
1793	static PyObject *
1794	bytes_rsplit_impl(PyBytesObject self, PyObject sep, Py_ssize_t maxsplit)
1795	/[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]/
1796	{
1797	Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1798	const char s = PyBytes_AS_STRING(self), sub;
1799	Py_buffer vsub;
1800	PyObject *list;
1801
1802	if (maxsplit < `0`)
1803	maxsplit = PY_SSIZE_T_MAX;
1804	if (sep == Py_None)
1805	return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1806	if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != `0`)
1807	return NULL;
1808	sub = vsub.buf;
1809	n = vsub.len;
1810
1811	list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1812	PyBuffer_Release(&vsub);
1813	return list;
1814	}
1815
1816
1817	/[clinic input]*
1818	bytes.join
1819
1820	iterable_of_bytes: object
1821	/
1822
1823	Concatenate any number of bytes objects.
1824
1825	The bytes whose method is called is inserted in between each pair.
1826
1827	The result is returned as a new bytes object.
1828
1829	Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1830	[clinic start generated code]/*
1831
1832	static PyObject *
1833	bytes_join(PyBytesObject self, PyObject iterable_of_bytes)
1834	/[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]/
1835	{
1836	return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
1837	}
1838
1839	PyObject *
1840	_PyBytes_Join(PyObject sep, PyObject x)
1841	{
1842	assert(sep != NULL && PyBytes_Check(sep));
1843	assert(x != NULL);
1844	return bytes_join((PyBytesObject*)sep, x);
1845	}
1846
1847	static PyObject *
1848	bytes_find(PyBytesObject self, PyObject args)
1849	{
1850	return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1851	}
1852
1853	static PyObject *
1854	bytes_index(PyBytesObject self, PyObject args)
1855	{
1856	return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1857	}
1858
1859
1860	static PyObject *
1861	bytes_rfind(PyBytesObject self, PyObject args)
1862	{
1863	return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1864	}
1865
1866
1867	static PyObject *
1868	bytes_rindex(PyBytesObject self, PyObject args)
1869	{
1870	return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1871	}
1872
1873
1874	Py_LOCAL_INLINE(PyObject *)
1875	do_xstrip(PyBytesObject self, int* striptype, PyObject *sepobj)
1876	{
1877	Py_buffer vsep;
1878	const char *s = PyBytes_AS_STRING(self);
1879	Py_ssize_t len = PyBytes_GET_SIZE(self);
1880	char *sep;
1881	Py_ssize_t seplen;
1882	Py_ssize_t i, j;
1883
1884	if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != `0`)
1885	return NULL;
1886	sep = vsep.buf;
1887	seplen = vsep.len;
1888
1889	i = `0`;
1890	if (striptype != RIGHTSTRIP) {
1891	while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1892	i++;
1893	}
1894	}
1895
1896	j = len;
1897	if (striptype != LEFTSTRIP) {
1898	do {
1899	j--;
1900	} while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1901	j++;
1902	}
1903
1904	PyBuffer_Release(&vsep);
1905
1906	if (i == `0` && j == len && PyBytes_CheckExact(self)) {
1907	Py_INCREF(self);
1908	return (PyObject*)self;
1909	}
1910	else
1911	return PyBytes_FromStringAndSize(s+i, j-i);
1912	}
1913
1914
1915	Py_LOCAL_INLINE(PyObject *)
1916	do_strip(PyBytesObject self, int* striptype)
1917	{
1918	const char *s = PyBytes_AS_STRING(self);
1919	Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
1920
1921	i = `0`;
1922	if (striptype != RIGHTSTRIP) {
1923	while (i < len && Py_ISSPACE(s[i])) {
1924	i++;
1925	}
1926	}
1927
1928	j = len;
1929	if (striptype != LEFTSTRIP) {
1930	do {
1931	j--;
1932	} while (j >= i && Py_ISSPACE(s[j]));
1933	j++;
1934	}
1935
1936	if (i == `0` && j == len && PyBytes_CheckExact(self)) {
1937	Py_INCREF(self);
1938	return (PyObject*)self;
1939	}
1940	else
1941	return PyBytes_FromStringAndSize(s+i, j-i);
1942	}
1943
1944
1945	Py_LOCAL_INLINE(PyObject *)
1946	do_argstrip(PyBytesObject self, int* striptype, PyObject *bytes)
1947	{
1948	if (bytes != Py_None) {
1949	return do_xstrip(self, striptype, bytes);
1950	}
1951	return do_strip(self, striptype);
1952	}
1953
1954	/[clinic input]*
1955	bytes.strip
1956
1957	bytes: object = None
1958	/
1959
1960	Strip leading and trailing bytes contained in the argument.
1961
1962	If the argument is omitted or None, strip leading and trailing ASCII whitespace.
1963	[clinic start generated code]/*
1964
1965	static PyObject *
1966	bytes_strip_impl(PyBytesObject self, PyObject bytes)
1967	/[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]/
1968	{
1969	return do_argstrip(self, BOTHSTRIP, bytes);
1970	}
1971
1972	/[clinic input]*
1973	bytes.lstrip
1974
1975	bytes: object = None
1976	/
1977
1978	Strip leading bytes contained in the argument.
1979
1980	If the argument is omitted or None, strip leading ASCII whitespace.
1981	[clinic start generated code]/*
1982
1983	static PyObject *
1984	bytes_lstrip_impl(PyBytesObject self, PyObject bytes)
1985	/[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]/
1986	{
1987	return do_argstrip(self, LEFTSTRIP, bytes);
1988	}
1989
1990	/[clinic input]*
1991	bytes.rstrip
1992
1993	bytes: object = None
1994	/
1995
1996	Strip trailing bytes contained in the argument.
1997
1998	If the argument is omitted or None, strip trailing ASCII whitespace.
1999	[clinic start generated code]/*
2000
2001	static PyObject *
2002	bytes_rstrip_impl(PyBytesObject self, PyObject bytes)
2003	/[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]/
2004	{
2005	return do_argstrip(self, RIGHTSTRIP, bytes);
2006	}
2007
2008
2009	static PyObject *
2010	bytes_count(PyBytesObject self, PyObject args)
2011	{
2012	return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2013	}
2014
2015
2016	/[clinic input]*
2017	bytes.translate
2018
2019	table: object
2020	Translation table, which must be a bytes object of length 256.
2021	/
2022	delete as deletechars: object(c_default="NULL") = b''
2023
2024	Return a copy with each character mapped by the given translation table.
2025
2026	All characters occurring in the optional argument delete are removed.
2027	The remaining characters are mapped through the given translation table.
2028	[clinic start generated code]/*
2029
2030	static PyObject *
2031	bytes_translate_impl(PyBytesObject self, PyObject table,
2032	PyObject *deletechars)
2033	/[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]/
2034	{
2035	const char *input;
2036	char *output;
2037	Py_buffer table_view = {NULL, NULL};
2038	Py_buffer del_table_view = {NULL, NULL};
2039	const char *table_chars;
2040	Py_ssize_t i, c, changed = `0`;
2041	PyObject input_obj = (PyObject)self;
2042	const char output_start, del_table_chars=NULL;
2043	Py_ssize_t inlen, tablen, dellen = `0`;
2044	PyObject *result;
2045	int trans_table[`256`];
2046
2047	if (PyBytes_Check(table)) {
2048	table_chars = PyBytes_AS_STRING(table);
2049	tablen = PyBytes_GET_SIZE(table);
2050	}
2051	else if (table == Py_None) {
2052	table_chars = NULL;
2053	tablen = `256`;
2054	}
2055	else {
2056	if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != `0`)
2057	return NULL;
2058	table_chars = table_view.buf;
2059	tablen = table_view.len;
2060	}
2061
2062	if (tablen != `256`) {
2063	PyErr_SetString(PyExc_ValueError,
2064	"translation table must be 256 characters long");
2065	PyBuffer_Release(&table_view);
2066	return NULL;
2067	}
2068
2069	if (deletechars != NULL) {
2070	if (PyBytes_Check(deletechars)) {
2071	del_table_chars = PyBytes_AS_STRING(deletechars);
2072	dellen = PyBytes_GET_SIZE(deletechars);
2073	}
2074	else {
2075	if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != `0`) {
2076	PyBuffer_Release(&table_view);
2077	return NULL;
2078	}
2079	del_table_chars = del_table_view.buf;
2080	dellen = del_table_view.len;
2081	}
2082	}
2083	else {
2084	del_table_chars = NULL;
2085	dellen = `0`;
2086	}
2087
2088	inlen = PyBytes_GET_SIZE(input_obj);
2089	result = PyBytes_FromStringAndSize((char *)NULL, inlen);
2090	if (result == NULL) {
2091	PyBuffer_Release(&del_table_view);
2092	PyBuffer_Release(&table_view);
2093	return NULL;
2094	}
2095	output_start = output = PyBytes_AS_STRING(result);
2096	input = PyBytes_AS_STRING(input_obj);
2097
2098	if (dellen == `0` && table_chars != NULL) {
2099	/ If no deletions are required, use faster code /
2100	for (i = inlen; --i >= `0`; ) {
2101	c = Py_CHARMASK(*input++);
2102	if (Py_CHARMASK((*output++ = table_chars[c])) != c)
2103	changed = `1`;
2104	}
2105	if (!changed && PyBytes_CheckExact(input_obj)) {
2106	Py_INCREF(input_obj);
2107	Py_DECREF(result);
2108	result = input_obj;
2109	}
2110	PyBuffer_Release(&del_table_view);
2111	PyBuffer_Release(&table_view);
2112	return result;
2113	}
2114
2115	if (table_chars == NULL) {
2116	for (i = `0`; i < `256`; i++)
2117	trans_table[i] = Py_CHARMASK(i);
2118	} else {
2119	for (i = `0`; i < `256`; i++)
2120	trans_table[i] = Py_CHARMASK(table_chars[i]);
2121	}
2122	PyBuffer_Release(&table_view);
2123
2124	for (i = `0`; i < dellen; i++)
2125	trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -`1`;
2126	PyBuffer_Release(&del_table_view);
2127
2128	for (i = inlen; --i >= `0`; ) {
2129	c = Py_CHARMASK(*input++);
2130	if (trans_table[c] != -`1`)
2131	if (Py_CHARMASK(output++ = (char*)trans_table[c]) == c)
2132	continue;
2133	changed = `1`;
2134	}
2135	if (!changed && PyBytes_CheckExact(input_obj)) {
2136	Py_DECREF(result);
2137	Py_INCREF(input_obj);
2138	return input_obj;
2139	}
2140	/ Fix the size of the resulting byte string /
2141	if (inlen > `0`)
2142	_PyBytes_Resize(&result, output - output_start);
2143	return result;
2144	}
2145
2146
2147	/[clinic input]*
2148
2149	@staticmethod
2150	bytes.maketrans
2151
2152	frm: Py_buffer
2153	to: Py_buffer
2154	/
2155
2156	Return a translation table useable for the bytes or bytearray translate method.
2157
2158	The returned table will be one where each byte in frm is mapped to the byte at
2159	the same position in to.
2160
2161	The bytes objects frm and to must be of the same length.
2162	[clinic start generated code]/*
2163
2164	static PyObject *
2165	bytes_maketrans_impl(Py_buffer frm, Py_buffer to)
2166	/[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]/
2167	{
2168	return _Py_bytes_maketrans(frm, to);
2169	}
2170
2171
2172	/[clinic input]*
2173	bytes.replace
2174
2175	old: Py_buffer
2176	new: Py_buffer
2177	count: Py_ssize_t = -1
2178	Maximum number of occurrences to replace.
2179	-1 (the default value) means replace all occurrences.
2180	/
2181
2182	Return a copy with all occurrences of substring old replaced by new.
2183
2184	If the optional argument count is given, only the first count occurrences are
2185	replaced.
2186	[clinic start generated code]/*
2187
2188	static PyObject *
2189	bytes_replace_impl(PyBytesObject self, Py_buffer old, Py_buffer *new,
2190	Py_ssize_t count)
2191	/[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]/
2192	{
2193	return stringlib_replace((PyObject *)self,
2194	(const char *)old->buf, old->len,
2195	(const char *)new->buf, new->len, count);
2196	}
2197
2198	/* End DALKE */
2199
2200	/[clinic input]*
2201	bytes.removeprefix as bytes_removeprefix
2202
2203	prefix: Py_buffer
2204	/
2205
2206	Return a bytes object with the given prefix string removed if present.
2207
2208	If the bytes starts with the prefix string, return bytes[len(prefix):].
2209	Otherwise, return a copy of the original bytes.
2210	[clinic start generated code]/*
2211
2212	static PyObject *
2213	bytes_removeprefix_impl(PyBytesObject self, Py_buffer prefix)
2214	/[clinic end generated code: output=f006865331a06ab6 input=0c93bac817a8502c]/
2215	{
2216	const char *self_start = PyBytes_AS_STRING(self);
2217	Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2218	const char *prefix_start = prefix->buf;
2219	Py_ssize_t prefix_len = prefix->len;
2220
2221	if (self_len >= prefix_len
2222	&& prefix_len > `0`
2223	&& memcmp(self_start, prefix_start, prefix_len) == `0`)
2224	{
2225	return PyBytes_FromStringAndSize(self_start + prefix_len,
2226	self_len - prefix_len);
2227	}
2228
2229	if (PyBytes_CheckExact(self)) {
2230	Py_INCREF(self);
2231	return (PyObject *)self;
2232	}
2233
2234	return PyBytes_FromStringAndSize(self_start, self_len);
2235	}
2236
2237	/[clinic input]*
2238	bytes.removesuffix as bytes_removesuffix
2239
2240	suffix: Py_buffer
2241	/
2242
2243	Return a bytes object with the given suffix string removed if present.
2244
2245	If the bytes ends with the suffix string and that suffix is not empty,
2246	return bytes[:-len(prefix)]. Otherwise, return a copy of the original
2247	bytes.
2248	[clinic start generated code]/*
2249
2250	static PyObject *
2251	bytes_removesuffix_impl(PyBytesObject self, Py_buffer suffix)
2252	/[clinic end generated code: output=d887d308e3242eeb input=9f4e1da8c637bbf1]/
2253	{
2254	const char *self_start = PyBytes_AS_STRING(self);
2255	Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2256	const char *suffix_start = suffix->buf;
2257	Py_ssize_t suffix_len = suffix->len;
2258
2259	if (self_len >= suffix_len
2260	&& suffix_len > `0`
2261	&& memcmp(self_start + self_len - suffix_len,
2262	suffix_start, suffix_len) == `0`)
2263	{
2264	return PyBytes_FromStringAndSize(self_start,
2265	self_len - suffix_len);
2266	}
2267
2268	if (PyBytes_CheckExact(self)) {
2269	Py_INCREF(self);
2270	return (PyObject *)self;
2271	}
2272
2273	return PyBytes_FromStringAndSize(self_start, self_len);
2274	}
2275
2276	static PyObject *
2277	bytes_startswith(PyBytesObject self, PyObject args)
2278	{
2279	return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2280	}
2281
2282	static PyObject *
2283	bytes_endswith(PyBytesObject self, PyObject args)
2284	{
2285	return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2286	}
2287
2288
2289	/[clinic input]*
2290	bytes.decode
2291
2292	encoding: str(c_default="NULL") = 'utf-8'
2293	The encoding with which to decode the bytes.
2294	errors: str(c_default="NULL") = 'strict'
2295	The error handling scheme to use for the handling of decoding errors.
2296	The default is 'strict' meaning that decoding errors raise a
2297	UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2298	as well as any other name registered with codecs.register_error that
2299	can handle UnicodeDecodeErrors.
2300
2301	Decode the bytes using the codec registered for encoding.
2302	[clinic start generated code]/*
2303
2304	static PyObject *
2305	bytes_decode_impl(PyBytesObject self, const* char *encoding,
2306	const char *errors)
2307	/[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]/
2308	{
2309	return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
2310	}
2311
2312
2313	/[clinic input]*
2314	bytes.splitlines
2315
2316	keepends: bool(accept={int}) = False
2317
2318	Return a list of the lines in the bytes, breaking at line boundaries.
2319
2320	Line breaks are not included in the resulting list unless keepends is given and
2321	true.
2322	[clinic start generated code]/*
2323
2324	static PyObject *
2325	bytes_splitlines_impl(PyBytesObject self, int* keepends)
2326	/[clinic end generated code: output=3484149a5d880ffb input=a8b32eb01ff5a5ed]/
2327	{
2328	return stringlib_splitlines(
2329	(PyObject*) self, PyBytes_AS_STRING(self),
2330	PyBytes_GET_SIZE(self), keepends
2331	);
2332	}
2333
2334	/[clinic input]*
2335	@classmethod
2336	bytes.fromhex
2337
2338	string: unicode
2339	/
2340
2341	Create a bytes object from a string of hexadecimal numbers.
2342
2343	Spaces between two numbers are accepted.
2344	Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2345	[clinic start generated code]/*
2346
2347	static PyObject *
2348	bytes_fromhex_impl(PyTypeObject type, PyObject string)
2349	/[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]/
2350	{
2351	PyObject *result = _PyBytes_FromHex(string, `0`);
2352	if (type != &PyBytes_Type && result != NULL) {
2353	Py_SETREF(result, PyObject_CallOneArg((PyObject *)type, result));
2354	}
2355	return result;
2356	}
2357
2358	PyObject*
2359	_PyBytes_FromHex(PyObject string, int* use_bytearray)
2360	{
2361	char *buf;
2362	Py_ssize_t hexlen, invalid_char;
2363	unsigned int top, bot;
2364	const Py_UCS1 str, end;
2365	_PyBytesWriter writer;
2366
2367	_PyBytesWriter_Init(&writer);
2368	writer.use_bytearray = use_bytearray;
2369
2370	assert(PyUnicode_Check(string));
2371	if (PyUnicode_READY(string))
2372	return NULL;
2373	hexlen = PyUnicode_GET_LENGTH(string);
2374
2375	if (!PyUnicode_IS_ASCII(string)) {
2376	const void *data = PyUnicode_DATA(string);
2377	unsigned int kind = PyUnicode_KIND(string);
2378	Py_ssize_t i;
2379
2380	/ search for the first non-ASCII character /
2381	for (i = `0`; i < hexlen; i++) {
2382	if (PyUnicode_READ(kind, data, i) >= `128`)
2383	break;
2384	}
2385	invalid_char = i;
2386	goto error;
2387	}
2388
2389	assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2390	str = PyUnicode_1BYTE_DATA(string);
2391
2392	/ This overestimates if there are spaces /
2393	buf = _PyBytesWriter_Alloc(&writer, hexlen / `2`);
2394	if (buf == NULL)
2395	return NULL;
2396
2397	end = str + hexlen;
2398	while (str < end) {
2399	/ skip over spaces in the input /
2400	if (Py_ISSPACE(*str)) {
2401	do {
2402	str++;
2403	} while (Py_ISSPACE(*str));
2404	if (str >= end)
2405	break;
2406	}
2407
2408	top = _PyLong_DigitValue[*str];
2409	if (top >= `16`) {
2410	invalid_char = str - PyUnicode_1BYTE_DATA(string);
2411	goto error;
2412	}
2413	str++;
2414
2415	bot = _PyLong_DigitValue[*str];
2416	if (bot >= `16`) {
2417	invalid_char = str - PyUnicode_1BYTE_DATA(string);
2418	goto error;
2419	}
2420	str++;
2421
2422	buf++ = (unsigned* char)((top << `4`) + bot);
2423	}
2424
2425	return _PyBytesWriter_Finish(&writer, buf);
2426
2427	error:
2428	PyErr_Format(PyExc_ValueError,
2429	"non-hexadecimal number found in "
2430	"fromhex() arg at position %zd", invalid_char);
2431	_PyBytesWriter_Dealloc(&writer);
2432	return NULL;
2433	}
2434
2435	/[clinic input]*
2436	bytes.hex
2437
2438	sep: object = NULL
2439	An optional single character or byte to separate hex bytes.
2440	bytes_per_sep: int = 1
2441	How many bytes between separators. Positive values count from the
2442	right, negative values count from the left.
2443
2444	Create a string of hexadecimal numbers from a bytes object.
2445
2446	Example:
2447	>>> value = b'\xb9\x01\xef'
2448	>>> value.hex()
2449	'b901ef'
2450	>>> value.hex(':')
2451	'b9:01:ef'
2452	>>> value.hex(':', 2)
2453	'b9:01ef'
2454	>>> value.hex(':', -2)
2455	'b901:ef'
2456	[clinic start generated code]/*
2457
2458	static PyObject *
2459	bytes_hex_impl(PyBytesObject self, PyObject sep, int bytes_per_sep)
2460	/[clinic end generated code: output=1f134da504064139 input=1a21282b1f1ae595]/
2461	{
2462	const char *argbuf = PyBytes_AS_STRING(self);
2463	Py_ssize_t arglen = PyBytes_GET_SIZE(self);
2464	return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
2465	}
2466
2467	static PyObject *
2468	bytes_getnewargs(PyBytesObject v, PyObject Py_UNUSED(ignored))
2469	{
2470	return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
2471	}
2472
2473
2474	static PyMethodDef
2475	bytes_methods[] = {
2476	{"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2477	{"capitalize", stringlib_capitalize, METH_NOARGS,
2478	_Py_capitalize__doc__},
2479	STRINGLIB_CENTER_METHODDEF
2480	{"count", (PyCFunction)bytes_count, METH_VARARGS,
2481	_Py_count__doc__},
2482	BYTES_DECODE_METHODDEF
2483	{"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2484	_Py_endswith__doc__},
2485	STRINGLIB_EXPANDTABS_METHODDEF
2486	{"find", (PyCFunction)bytes_find, METH_VARARGS,
2487	_Py_find__doc__},
2488	BYTES_FROMHEX_METHODDEF
2489	BYTES_HEX_METHODDEF
2490	{"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
2491	{"isalnum", stringlib_isalnum, METH_NOARGS,
2492	_Py_isalnum__doc__},
2493	{"isalpha", stringlib_isalpha, METH_NOARGS,
2494	_Py_isalpha__doc__},
2495	{"isascii", stringlib_isascii, METH_NOARGS,
2496	_Py_isascii__doc__},
2497	{"isdigit", stringlib_isdigit, METH_NOARGS,
2498	_Py_isdigit__doc__},
2499	{"islower", stringlib_islower, METH_NOARGS,
2500	_Py_islower__doc__},
2501	{"isspace", stringlib_isspace, METH_NOARGS,
2502	_Py_isspace__doc__},
2503	{"istitle", stringlib_istitle, METH_NOARGS,
2504	_Py_istitle__doc__},
2505	{"isupper", stringlib_isupper, METH_NOARGS,
2506	_Py_isupper__doc__},
2507	BYTES_JOIN_METHODDEF
2508	STRINGLIB_LJUST_METHODDEF
2509	{"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2510	BYTES_LSTRIP_METHODDEF
2511	BYTES_MAKETRANS_METHODDEF
2512	BYTES_PARTITION_METHODDEF
2513	BYTES_REPLACE_METHODDEF
2514	BYTES_REMOVEPREFIX_METHODDEF
2515	BYTES_REMOVESUFFIX_METHODDEF
2516	{"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2517	{"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
2518	STRINGLIB_RJUST_METHODDEF
2519	BYTES_RPARTITION_METHODDEF
2520	BYTES_RSPLIT_METHODDEF
2521	BYTES_RSTRIP_METHODDEF
2522	BYTES_SPLIT_METHODDEF
2523	BYTES_SPLITLINES_METHODDEF
2524	{"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2525	_Py_startswith__doc__},
2526	BYTES_STRIP_METHODDEF
2527	{"swapcase", stringlib_swapcase, METH_NOARGS,
2528	_Py_swapcase__doc__},
2529	{"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
2530	BYTES_TRANSLATE_METHODDEF
2531	{"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2532	STRINGLIB_ZFILL_METHODDEF
2533	{NULL, NULL} / sentinel /
2534	};
2535
2536	static PyObject *
2537	bytes_mod(PyObject self, PyObject arg)
2538	{
2539	if (!PyBytes_Check(self)) {
2540	Py_RETURN_NOTIMPLEMENTED;
2541	}
2542	return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2543	arg, `0`);
2544	}
2545
2546	static PyNumberMethods bytes_as_number = {
2547	`0`, /nb_add/
2548	`0`, /nb_subtract/
2549	`0`, /nb_multiply/
2550	bytes_mod, /nb_remainder/
2551	};
2552
2553	static PyObject *
2554	bytes_subtype_new(PyTypeObject , PyObject );
2555
2556	/[clinic input]*
2557	@classmethod
2558	bytes.__new__ as bytes_new
2559
2560	source as x: object = NULL
2561	encoding: str = NULL
2562	errors: str = NULL
2563
2564	[clinic start generated code]/*
2565
2566	static PyObject *
2567	bytes_new_impl(PyTypeObject type, PyObject x, const char *encoding,
2568	const char *errors)
2569	/[clinic end generated code: output=1e0c471be311a425 input=f0a966d19b7262b4]/
2570	{
2571	PyObject *bytes;
2572	PyObject *func;
2573	Py_ssize_t size;
2574
2575	if (x == NULL) {
2576	if (encoding != NULL \|\| errors != NULL) {
2577	PyErr_SetString(PyExc_TypeError,
2578	encoding != NULL ?
2579	"encoding without a string argument" :
2580	"errors without a string argument");
2581	return NULL;
2582	}
2583	bytes = PyBytes_FromStringAndSize(NULL, `0`);
2584	}
2585	else if (encoding != NULL) {
2586	/ Encode via the codec registry /
2587	if (!PyUnicode_Check(x)) {
2588	PyErr_SetString(PyExc_TypeError,
2589	"encoding without a string argument");
2590	return NULL;
2591	}
2592	bytes = PyUnicode_AsEncodedString(x, encoding, errors);
2593	}
2594	else if (errors != NULL) {
2595	PyErr_SetString(PyExc_TypeError,
2596	PyUnicode_Check(x) ?
2597	"string argument without an encoding" :
2598	"errors without a string argument");
2599	return NULL;
2600	}
2601	/ We'd like to call PyObject_Bytes here, but we need to check for an*
2602	integer argument before deferring to PyBytes_FromObject, something
2603	PyObject_Bytes doesn't do. /*
2604	else if ((func = _PyObject_LookupSpecial(x, &PyId___bytes__)) != NULL) {
2605	bytes = _PyObject_CallNoArg(func);
2606	Py_DECREF(func);
2607	if (bytes == NULL)
2608	return NULL;
2609	if (!PyBytes_Check(bytes)) {
2610	PyErr_Format(PyExc_TypeError,
2611	"__bytes__ returned non-bytes (type %.200s)",
2612	Py_TYPE(bytes)->tp_name);
2613	Py_DECREF(bytes);
2614	return NULL;
2615	}
2616	}
2617	else if (PyErr_Occurred())
2618	return NULL;
2619	else if (PyUnicode_Check(x)) {
2620	PyErr_SetString(PyExc_TypeError,
2621	"string argument without an encoding");
2622	return NULL;
2623	}
2624	/ Is it an integer? /
2625	else if (_PyIndex_Check(x)) {
2626	size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2627	if (size == -`1` && PyErr_Occurred()) {
2628	if (!PyErr_ExceptionMatches(PyExc_TypeError))
2629	return NULL;
2630	PyErr_Clear(); / fall through /
2631	bytes = PyBytes_FromObject(x);
2632	}
2633	else {
2634	if (size < `0`) {
2635	PyErr_SetString(PyExc_ValueError, "negative count");
2636	return NULL;
2637	}
2638	bytes = _PyBytes_FromSize(size, `1`);
2639	}
2640	}
2641	else {
2642	bytes = PyBytes_FromObject(x);
2643	}
2644
2645	if (bytes != NULL && type != &PyBytes_Type) {
2646	Py_SETREF(bytes, bytes_subtype_new(type, bytes));
2647	}
2648
2649	return bytes;
2650	}
2651
2652	static PyObject*
2653	_PyBytes_FromBuffer(PyObject *x)
2654	{
2655	PyObject *new;
2656	Py_buffer view;
2657
2658	if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < `0`)
2659	return NULL;
2660
2661	new = PyBytes_FromStringAndSize(NULL, view.len);
2662	if (!new)
2663	goto fail;
2664	if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2665	&view, view.len, `'C'`) < `0`)
2666	goto fail;
2667	PyBuffer_Release(&view);
2668	return new;
2669
2670	fail:
2671	Py_XDECREF(new);
2672	PyBuffer_Release(&view);
2673	return NULL;
2674	}
2675
2676	static PyObject*
2677	_PyBytes_FromList(PyObject *x)
2678	{
2679	Py_ssize_t i, size = PyList_GET_SIZE(x);
2680	Py_ssize_t value;
2681	char *str;
2682	PyObject *item;
2683	_PyBytesWriter writer;
2684
2685	_PyBytesWriter_Init(&writer);
2686	str = _PyBytesWriter_Alloc(&writer, size);
2687	if (str == NULL)
2688	return NULL;
2689	writer.overallocate = `1`;
2690	size = writer.allocated;
2691
2692	for (i = `0`; i < PyList_GET_SIZE(x); i++) {
2693	item = PyList_GET_ITEM(x, i);
2694	Py_INCREF(item);
2695	value = PyNumber_AsSsize_t(item, NULL);
2696	Py_DECREF(item);
2697	if (value == -`1` && PyErr_Occurred())
2698	goto error;
2699
2700	if (value < `0` \|\| value >= `256`) {
2701	PyErr_SetString(PyExc_ValueError,
2702	"bytes must be in range(0, 256)");
2703	goto error;
2704	}
2705
2706	if (i >= size) {
2707	str = _PyBytesWriter_Resize(&writer, str, size+`1`);
2708	if (str == NULL)
2709	return NULL;
2710	size = writer.allocated;
2711	}
2712	str++ = (char*) value;
2713	}
2714	return _PyBytesWriter_Finish(&writer, str);
2715
2716	error:
2717	_PyBytesWriter_Dealloc(&writer);
2718	return NULL;
2719	}
2720
2721	static PyObject*
2722	_PyBytes_FromTuple(PyObject *x)
2723	{
2724	PyObject *bytes;
2725	Py_ssize_t i, size = PyTuple_GET_SIZE(x);
2726	Py_ssize_t value;
2727	char *str;
2728	PyObject *item;
2729
2730	bytes = PyBytes_FromStringAndSize(NULL, size);
2731	if (bytes == NULL)
2732	return NULL;
2733	str = ((PyBytesObject *)bytes)->ob_sval;
2734
2735	for (i = `0`; i < size; i++) {
2736	item = PyTuple_GET_ITEM(x, i);
2737	value = PyNumber_AsSsize_t(item, NULL);
2738	if (value == -`1` && PyErr_Occurred())
2739	goto error;
2740
2741	if (value < `0` \|\| value >= `256`) {
2742	PyErr_SetString(PyExc_ValueError,
2743	"bytes must be in range(0, 256)");
2744	goto error;
2745	}
2746	str++ = (char*) value;
2747	}
2748	return bytes;
2749
2750	error:
2751	Py_DECREF(bytes);
2752	return NULL;
2753	}
2754
2755	static PyObject *
2756	_PyBytes_FromIterator(PyObject it, PyObject x)
2757	{
2758	char *str;
2759	Py_ssize_t i, size;
2760	_PyBytesWriter writer;
2761
2762	/ For iterator version, create a bytes object and resize as needed /
2763	size = PyObject_LengthHint(x, `64`);
2764	if (size == -`1` && PyErr_Occurred())
2765	return NULL;
2766
2767	_PyBytesWriter_Init(&writer);
2768	str = _PyBytesWriter_Alloc(&writer, size);
2769	if (str == NULL)
2770	return NULL;
2771	writer.overallocate = `1`;
2772	size = writer.allocated;
2773
2774	/ Run the iterator to exhaustion /
2775	for (i = `0`; ; i++) {
2776	PyObject *item;
2777	Py_ssize_t value;
2778
2779	/ Get the next item /
2780	item = PyIter_Next(it);
2781	if (item == NULL) {
2782	if (PyErr_Occurred())
2783	goto error;
2784	break;
2785	}
2786
2787	/ Interpret it as an int (__index__) /
2788	value = PyNumber_AsSsize_t(item, NULL);
2789	Py_DECREF(item);
2790	if (value == -`1` && PyErr_Occurred())
2791	goto error;
2792
2793	/ Range check /
2794	if (value < `0` \|\| value >= `256`) {
2795	PyErr_SetString(PyExc_ValueError,
2796	"bytes must be in range(0, 256)");
2797	goto error;
2798	}
2799
2800	/ Append the byte /
2801	if (i >= size) {
2802	str = _PyBytesWriter_Resize(&writer, str, size+`1`);
2803	if (str == NULL)
2804	return NULL;
2805	size = writer.allocated;
2806	}
2807	str++ = (char*) value;
2808	}
2809
2810	return _PyBytesWriter_Finish(&writer, str);
2811
2812	error:
2813	_PyBytesWriter_Dealloc(&writer);
2814	return NULL;
2815	}
2816
2817	PyObject *
2818	PyBytes_FromObject(PyObject *x)
2819	{
2820	PyObject it, result;
2821
2822	if (x == NULL) {
2823	PyErr_BadInternalCall();
2824	return NULL;
2825	}
2826
2827	if (PyBytes_CheckExact(x)) {
2828	Py_INCREF(x);
2829	return x;
2830	}
2831
2832	/ Use the modern buffer interface /
2833	if (PyObject_CheckBuffer(x))
2834	return _PyBytes_FromBuffer(x);
2835
2836	if (PyList_CheckExact(x))
2837	return _PyBytes_FromList(x);
2838
2839	if (PyTuple_CheckExact(x))
2840	return _PyBytes_FromTuple(x);
2841
2842	if (!PyUnicode_Check(x)) {
2843	it = PyObject_GetIter(x);
2844	if (it != NULL) {
2845	result = _PyBytes_FromIterator(it, x);
2846	Py_DECREF(it);
2847	return result;
2848	}
2849	if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
2850	return NULL;
2851	}
2852	}
2853
2854	PyErr_Format(PyExc_TypeError,
2855	"cannot convert '%.200s' object to bytes",
2856	Py_TYPE(x)->tp_name);
2857	return NULL;
2858	}
2859
2860	static PyObject *
2861	bytes_subtype_new(PyTypeObject type, PyObject tmp)
2862	{
2863	PyObject *pnew;
2864	Py_ssize_t n;
2865
2866	assert(PyType_IsSubtype(type, &PyBytes_Type));
2867	assert(PyBytes_Check(tmp));
2868	n = PyBytes_GET_SIZE(tmp);
2869	pnew = type->tp_alloc(type, n);
2870	if (pnew != NULL) {
2871	memcpy(PyBytes_AS_STRING(pnew),
2872	PyBytes_AS_STRING(tmp), n+`1`);
2873	((PyBytesObject *)pnew)->ob_shash =
2874	((PyBytesObject *)tmp)->ob_shash;
2875	}
2876	return pnew;
2877	}
2878
2879	PyDoc_STRVAR(bytes_doc,
2880	"bytes(iterable_of_ints) -> bytes\n\
2881	bytes(string, encoding[, errors]) -> bytes\n\
2882	bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
2883	bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2884	bytes() -> empty bytes object\n\
2885	\n\
2886	Construct an immutable array of bytes from:\n\
2887	- an iterable yielding integers in range(256)\n\
2888	- a text string encoded using the specified encoding\n\
2889	- any object implementing the buffer API.\n\
2890	- an integer");
2891
2892	static PyObject bytes_iter(PyObject seq);
2893
2894	PyTypeObject PyBytes_Type = {
2895	PyVarObject_HEAD_INIT(&PyType_Type, `0`)
2896	"bytes",
2897	PyBytesObject_SIZE,
2898	sizeof(char),
2899	`0`, / tp_dealloc /
2900	`0`, / tp_vectorcall_offset /
2901	`0`, / tp_getattr /
2902	`0`, / tp_setattr /
2903	`0`, / tp_as_async /
2904	(reprfunc)bytes_repr, / tp_repr /
2905	&bytes_as_number, / tp_as_number /
2906	&bytes_as_sequence, / tp_as_sequence /
2907	&bytes_as_mapping, / tp_as_mapping /
2908	(hashfunc)bytes_hash, / tp_hash /
2909	`0`, / tp_call /
2910	bytes_str, / tp_str /
2911	PyObject_GenericGetAttr, / tp_getattro /
2912	`0`, / tp_setattro /
2913	&bytes_as_buffer, / tp_as_buffer /
2914	Py_TPFLAGS_DEFAULT \| Py_TPFLAGS_BASETYPE \|
2915	Py_TPFLAGS_BYTES_SUBCLASS \|
2916	_Py_TPFLAGS_MATCH_SELF, / tp_flags /
2917	bytes_doc, / tp_doc /
2918	`0`, / tp_traverse /
2919	`0`, / tp_clear /
2920	(richcmpfunc)bytes_richcompare, / tp_richcompare /
2921	`0`, / tp_weaklistoffset /
2922	bytes_iter, / tp_iter /
2923	`0`, / tp_iternext /
2924	bytes_methods, / tp_methods /
2925	`0`, / tp_members /
2926	`0`, / tp_getset /
2927	&PyBaseObject_Type, / tp_base /
2928	`0`, / tp_dict /
2929	`0`, / tp_descr_get /
2930	`0`, / tp_descr_set /
2931	`0`, / tp_dictoffset /
2932	`0`, / tp_init /
2933	`0`, / tp_alloc /
2934	bytes_new, / tp_new /
2935	PyObject_Del, / tp_free /
2936	};
2937
2938	void
2939	PyBytes_Concat(PyObject *pv, PyObject w)
2940	{
2941	assert(pv != NULL);
2942	if (*pv == NULL)
2943	return;
2944	if (w == NULL) {
2945	Py_CLEAR(*pv);
2946	return;
2947	}
2948
2949	if (Py_REFCNT(pv) == `1` && PyBytes_CheckExact(pv)) {
2950	/ Only one reference, so we can resize in place /
2951	Py_ssize_t oldsize;
2952	Py_buffer wb;
2953
2954	if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != `0`) {
2955	PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2956	Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2957	Py_CLEAR(*pv);
2958	return;
2959	}
2960
2961	oldsize = PyBytes_GET_SIZE(*pv);
2962	if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2963	PyErr_NoMemory();
2964	goto error;
2965	}
2966	if (_PyBytes_Resize(pv, oldsize + wb.len) < `0`)
2967	goto error;
2968
2969	memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2970	PyBuffer_Release(&wb);
2971	return;
2972
2973	error:
2974	PyBuffer_Release(&wb);
2975	Py_CLEAR(*pv);
2976	return;
2977	}
2978
2979	else {
2980	/ Multiple references, need to create new object /
2981	PyObject *v;
2982	v = bytes_concat(*pv, w);
2983	Py_SETREF(*pv, v);
2984	}
2985	}
2986
2987	void
2988	PyBytes_ConcatAndDel(PyObject *pv, PyObject w)
2989	{
2990	PyBytes_Concat(pv, w);
2991	Py_XDECREF(w);
2992	}
2993
2994
2995	/ The following function breaks the notion that bytes are immutable:*
2996	it changes the size of a bytes object. We get away with this only if there
2997	is only one module referencing the object. You can also think of it
2998	as creating a new bytes object and destroying the old one, only
2999	more efficiently. In any case, don't use this if the bytes object may
3000	already be known to some other part of the code...
3001	Note that if there's not enough memory to resize the bytes object, the
3002	original bytes object at pv is deallocated, pv is set to NULL, an "out of
3003	memory" exception is set, and -1 is returned. Else (on success) 0 is
3004	returned, and the value in pv may or may not be the same as on input.*
3005	As always, an extra byte is allocated for a trailing \0 byte (newsize
3006	does not* include that), and a trailing \0 byte is stored.*
3007	*/
3008
3009	int
3010	_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3011	{
3012	PyObject *v;
3013	PyBytesObject *sv;
3014	v = *pv;
3015	if (!PyBytes_Check(v) \|\| newsize < `0`) {
3016	goto error;
3017	}
3018	if (Py_SIZE(v) == newsize) {
3019	/ return early if newsize equals to v->ob_size /
3020	return `0`;
3021	}
3022	if (Py_SIZE(v) == `0`) {
3023	if (newsize == `0`) {
3024	return `0`;
3025	}
3026	*pv = _PyBytes_FromSize(newsize, `0`);
3027	Py_DECREF(v);
3028	return (*pv == NULL) ? -`1` : `0`;
3029	}
3030	if (Py_REFCNT(v) != `1`) {
3031	goto error;
3032	}
3033	if (newsize == `0`) {
3034	*pv = bytes_new_empty();
3035	Py_DECREF(v);
3036	return `0`;
3037	}
3038	/ XXX UNREF/NEWREF interface should be more symmetrical /
3039	#ifdef Py_REF_DEBUG
3040	_Py_RefTotal--;
3041	#endif
3042	#ifdef Py_TRACE_REFS
3043	_Py_ForgetReference(v);
3044	#endif
3045	pv = (PyObject )
3046	PyObject_Realloc(v, PyBytesObject_SIZE + newsize);
3047	if (*pv == NULL) {
3048	PyObject_Free(v);
3049	PyErr_NoMemory();
3050	return -`1`;
3051	}
3052	_Py_NewReference(*pv);
3053	sv = (PyBytesObject ) pv;
3054	Py_SET_SIZE(sv, newsize);
3055	sv->ob_sval[newsize] = `'\0'`;
3056	sv->ob_shash = -`1`; / invalidate cached hash value /
3057	return `0`;
3058	error:
3059	*pv = `0`;
3060	Py_DECREF(v);
3061	PyErr_BadInternalCall();
3062	return -`1`;
3063	}
3064
3065
3066	PyStatus
3067	_PyBytes_Init(PyInterpreterState *interp)
3068	{
3069	struct _Py_bytes_state *state = &interp->bytes;
3070	if (bytes_create_empty_string_singleton(state) < `0`) {
3071	return _PyStatus_NO_MEMORY();
3072	}
3073	return _PyStatus_OK();
3074	}
3075
3076
3077	void
3078	_PyBytes_Fini(PyInterpreterState *interp)
3079	{
3080	struct _Py_bytes_state* state = &interp->bytes;
3081	for (int i = `0`; i < UCHAR_MAX + `1`; i++) {
3082	Py_CLEAR(state->characters[i]);
3083	}
3084	Py_CLEAR(state->empty_string);
3085	}
3086
3087	/******************** Bytes Iterator *************************/
3088
3089	typedef struct {
3090	PyObject_HEAD
3091	Py_ssize_t it_index;
3092	PyBytesObject it_seq; /* Set to NULL when iterator is exhausted /
3093	} striterobject;
3094
3095	static void
3096	striter_dealloc(striterobject *it)
3097	{
3098	_PyObject_GC_UNTRACK(it);
3099	Py_XDECREF(it->it_seq);
3100	PyObject_GC_Del(it);
3101	}
3102
3103	static int
3104	striter_traverse(striterobject it, visitproc visit, void* *arg)
3105	{
3106	Py_VISIT(it->it_seq);
3107	return `0`;
3108	}
3109
3110	static PyObject *
3111	striter_next(striterobject *it)
3112	{
3113	PyBytesObject *seq;
3114
3115	assert(it != NULL);
3116	seq = it->it_seq;
3117	if (seq == NULL)
3118	return NULL;
3119	assert(PyBytes_Check(seq));
3120
3121	if (it->it_index < PyBytes_GET_SIZE(seq)) {
3122	return PyLong_FromLong(
3123	(unsigned char)seq->ob_sval[it->it_index++]);
3124	}
3125
3126	it->it_seq = NULL;
3127	Py_DECREF(seq);
3128	return NULL;
3129	}
3130
3131	static PyObject *
3132	striter_len(striterobject it, PyObject Py_UNUSED(ignored))
3133	{
3134	Py_ssize_t len = `0`;
3135	if (it->it_seq)
3136	len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3137	return PyLong_FromSsize_t(len);
3138	}
3139
3140	PyDoc_STRVAR(length_hint_doc,
3141	"Private method returning an estimate of len(list(it)).");
3142
3143	static PyObject *
3144	striter_reduce(striterobject it, PyObject Py_UNUSED(ignored))
3145	{
3146	_Py_IDENTIFIER(iter);
3147	if (it->it_seq != NULL) {
3148	return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
3149	it->it_seq, it->it_index);
3150	} else {
3151	return Py_BuildValue("N(())", _PyEval_GetBuiltinId(&PyId_iter));
3152	}
3153	}
3154
3155	PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3156
3157	static PyObject *
3158	striter_setstate(striterobject it, PyObject state)
3159	{
3160	Py_ssize_t index = PyLong_AsSsize_t(state);
3161	if (index == -`1` && PyErr_Occurred())
3162	return NULL;
3163	if (it->it_seq != NULL) {
3164	if (index < `0`)
3165	index = `0`;
3166	else if (index > PyBytes_GET_SIZE(it->it_seq))
3167	index = PyBytes_GET_SIZE(it->it_seq); / iterator exhausted /
3168	it->it_index = index;
3169	}
3170	Py_RETURN_NONE;
3171	}
3172
3173	PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3174
3175	static PyMethodDef striter_methods[] = {
3176	{"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3177	length_hint_doc},
3178	{"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3179	reduce_doc},
3180	{"__setstate__", (PyCFunction)striter_setstate, METH_O,
3181	setstate_doc},
3182	{NULL, NULL} / sentinel /
3183	};
3184
3185	PyTypeObject PyBytesIter_Type = {
3186	PyVarObject_HEAD_INIT(&PyType_Type, `0`)
3187	"bytes_iterator", / tp_name /
3188	sizeof(striterobject), / tp_basicsize /
3189	`0`, / tp_itemsize /
3190	/ methods /
3191	(destructor)striter_dealloc, / tp_dealloc /
3192	`0`, / tp_vectorcall_offset /
3193	`0`, / tp_getattr /
3194	`0`, / tp_setattr /
3195	`0`, / tp_as_async /
3196	`0`, / tp_repr /
3197	`0`, / tp_as_number /
3198	`0`, / tp_as_sequence /
3199	`0`, / tp_as_mapping /
3200	`0`, / tp_hash /
3201	`0`, / tp_call /
3202	`0`, / tp_str /
3203	PyObject_GenericGetAttr, / tp_getattro /
3204	`0`, / tp_setattro /
3205	`0`, / tp_as_buffer /
3206	Py_TPFLAGS_DEFAULT \| Py_TPFLAGS_HAVE_GC,/ tp_flags /
3207	`0`, / tp_doc /
3208	(traverseproc)striter_traverse, / tp_traverse /
3209	`0`, / tp_clear /
3210	`0`, / tp_richcompare /
3211	`0`, / tp_weaklistoffset /
3212	PyObject_SelfIter, / tp_iter /
3213	(iternextfunc)striter_next, / tp_iternext /
3214	striter_methods, / tp_methods /
3215	`0`,
3216	};
3217
3218	static PyObject *
3219	bytes_iter(PyObject *seq)
3220	{
3221	striterobject *it;
3222
3223	if (!PyBytes_Check(seq)) {
3224	PyErr_BadInternalCall();
3225	return NULL;
3226	}
3227	it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3228	if (it == NULL)
3229	return NULL;
3230	it->it_index = `0`;
3231	Py_INCREF(seq);
3232	it->it_seq = (PyBytesObject *)seq;
3233	_PyObject_GC_TRACK(it);
3234	return (PyObject *)it;
3235	}
3236
3237
3238	/ _PyBytesWriter API /
3239
3240	#ifdef MS_WINDOWS
3241	/ On Windows, overallocate by 50% is the best factor /
3242	# define OVERALLOCATE_FACTOR 2
3243	#else
3244	/ On Linux, overallocate by 25% is the best factor /
3245	# define OVERALLOCATE_FACTOR 4
3246	#endif
3247
3248	void
3249	_PyBytesWriter_Init(_PyBytesWriter *writer)
3250	{
3251	/ Set all attributes before small_buffer to 0 /
3252	memset(writer, `0`, offsetof(_PyBytesWriter, small_buffer));
3253	#ifndef NDEBUG
3254	memset(writer->small_buffer, PYMEM_CLEANBYTE,
3255	sizeof(writer->small_buffer));
3256	#endif
3257	}
3258
3259	void
3260	_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3261	{
3262	Py_CLEAR(writer->buffer);
3263	}
3264
3265	Py_LOCAL_INLINE(char*)
3266	_PyBytesWriter_AsString(_PyBytesWriter *writer)
3267	{
3268	if (writer->use_small_buffer) {
3269	assert(writer->buffer == NULL);
3270	return writer->small_buffer;
3271	}
3272	else if (writer->use_bytearray) {
3273	assert(writer->buffer != NULL);
3274	return PyByteArray_AS_STRING(writer->buffer);
3275	}
3276	else {
3277	assert(writer->buffer != NULL);
3278	return PyBytes_AS_STRING(writer->buffer);
3279	}
3280	}
3281
3282	Py_LOCAL_INLINE(Py_ssize_t)
3283	_PyBytesWriter_GetSize(_PyBytesWriter writer, char* *str)
3284	{
3285	const char *start = _PyBytesWriter_AsString(writer);
3286	assert(str != NULL);
3287	assert(str >= start);
3288	assert(str - start <= writer->allocated);
3289	return str - start;
3290	}
3291
3292	#ifndef NDEBUG
3293	Py_LOCAL_INLINE(int)
3294	_PyBytesWriter_CheckConsistency(_PyBytesWriter writer, char* *str)
3295	{
3296	const char start, end;
3297
3298	if (writer->use_small_buffer) {
3299	assert(writer->buffer == NULL);
3300	}
3301	else {
3302	assert(writer->buffer != NULL);
3303	if (writer->use_bytearray)
3304	assert(PyByteArray_CheckExact(writer->buffer));
3305	else
3306	assert(PyBytes_CheckExact(writer->buffer));
3307	assert(Py_REFCNT(writer->buffer) == `1`);
3308	}
3309
3310	if (writer->use_bytearray) {
3311	/ bytearray has its own overallocation algorithm,*
3312	writer overallocation must be disabled /*
3313	assert(!writer->overallocate);
3314	}
3315
3316	assert(`0` <= writer->allocated);
3317	assert(`0` <= writer->min_size && writer->min_size <= writer->allocated);
3318	/ the last byte must always be null /
3319	start = _PyBytesWriter_AsString(writer);
3320	assert(start[writer->allocated] == `0`);
3321
3322	end = start + writer->allocated;
3323	assert(str != NULL);
3324	assert(start <= str && str <= end);
3325	return `1`;
3326	}
3327	#endif
3328
3329	void*
3330	_PyBytesWriter_Resize(_PyBytesWriter writer, void* *str, Py_ssize_t size)
3331	{
3332	Py_ssize_t allocated, pos;
3333
3334	assert(_PyBytesWriter_CheckConsistency(writer, str));
3335	assert(writer->allocated < size);
3336
3337	allocated = size;
3338	if (writer->overallocate
3339	&& allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3340	/ overallocate to limit the number of realloc() /
3341	allocated += allocated / OVERALLOCATE_FACTOR;
3342	}
3343
3344	pos = _PyBytesWriter_GetSize(writer, str);
3345	if (!writer->use_small_buffer) {
3346	if (writer->use_bytearray) {
3347	if (PyByteArray_Resize(writer->buffer, allocated))
3348	goto error;
3349	/ writer->allocated can be smaller than writer->buffer->ob_alloc,*
3350	but we cannot use ob_alloc because bytes may need to be moved
3351	to use the whole buffer. bytearray uses an internal optimization
3352	to avoid moving or copying bytes when bytes are removed at the
3353	beginning (ex: del bytearray[:1]). /*
3354	}
3355	else {
3356	if (_PyBytes_Resize(&writer->buffer, allocated))
3357	goto error;
3358	}
3359	}
3360	else {
3361	/ convert from stack buffer to bytes object buffer /
3362	assert(writer->buffer == NULL);
3363
3364	if (writer->use_bytearray)
3365	writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3366	else
3367	writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
3368	if (writer->buffer == NULL)
3369	goto error;
3370
3371	if (pos != `0`) {
3372	char *dest;
3373	if (writer->use_bytearray)
3374	dest = PyByteArray_AS_STRING(writer->buffer);
3375	else
3376	dest = PyBytes_AS_STRING(writer->buffer);
3377	memcpy(dest,
3378	writer->small_buffer,
3379	pos);
3380	}
3381
3382	writer->use_small_buffer = `0`;
3383	#ifndef NDEBUG
3384	memset(writer->small_buffer, PYMEM_CLEANBYTE,
3385	sizeof(writer->small_buffer));
3386	#endif
3387	}
3388	writer->allocated = allocated;
3389
3390	str = _PyBytesWriter_AsString(writer) + pos;
3391	assert(_PyBytesWriter_CheckConsistency(writer, str));
3392	return str;
3393
3394	error:
3395	_PyBytesWriter_Dealloc(writer);
3396	return NULL;
3397	}
3398
3399	void*
3400	_PyBytesWriter_Prepare(_PyBytesWriter writer, void* *str, Py_ssize_t size)
3401	{
3402	Py_ssize_t new_min_size;
3403
3404	assert(_PyBytesWriter_CheckConsistency(writer, str));
3405	assert(size >= `0`);
3406
3407	if (size == `0`) {
3408	/ nothing to do /
3409	return str;
3410	}
3411
3412	if (writer->min_size > PY_SSIZE_T_MAX - size) {
3413	PyErr_NoMemory();
3414	_PyBytesWriter_Dealloc(writer);
3415	return NULL;
3416	}
3417	new_min_size = writer->min_size + size;
3418
3419	if (new_min_size > writer->allocated)
3420	str = _PyBytesWriter_Resize(writer, str, new_min_size);
3421
3422	writer->min_size = new_min_size;
3423	return str;
3424	}
3425
3426	/ Allocate the buffer to write size bytes.*
3427	Return the pointer to the beginning of buffer data.
3428	Raise an exception and return NULL on error. /*
3429	void*
3430	_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3431	{
3432	/ ensure that _PyBytesWriter_Alloc() is only called once /
3433	assert(writer->min_size == `0` && writer->buffer == NULL);
3434	assert(size >= `0`);
3435
3436	writer->use_small_buffer = `1`;
3437	#ifndef NDEBUG
3438	writer->allocated = sizeof(writer->small_buffer) - `1`;
3439	/ In debug mode, don't use the full small buffer because it is less*
3440	efficient than bytes and bytearray objects to detect buffer underflow
3441	and buffer overflow. Use 10 bytes of the small buffer to test also
3442	code using the smaller buffer in debug mode.
3443
3444	Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3445	in debug mode to also be able to detect stack overflow when running
3446	tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3447	if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3448	stack overflow. /*
3449	writer->allocated = Py_MIN(writer->allocated, `10`);
3450	/ _PyBytesWriter_CheckConsistency() requires the last byte to be 0,*
3451	to detect buffer overflow /*
3452	writer->small_buffer[writer->allocated] = `0`;
3453	#else
3454	writer->allocated = sizeof(writer->small_buffer);
3455	#endif
3456	return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
3457	}
3458
3459	PyObject *
3460	_PyBytesWriter_Finish(_PyBytesWriter writer, void* *str)
3461	{
3462	Py_ssize_t size;
3463	PyObject *result;
3464
3465	assert(_PyBytesWriter_CheckConsistency(writer, str));
3466
3467	size = _PyBytesWriter_GetSize(writer, str);
3468	if (size == `0` && !writer->use_bytearray) {
3469	Py_CLEAR(writer->buffer);
3470	/ Get the empty byte string singleton /
3471	result = PyBytes_FromStringAndSize(NULL, `0`);
3472	}
3473	else if (writer->use_small_buffer) {
3474	if (writer->use_bytearray) {
3475	result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3476	}
3477	else {
3478	result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3479	}
3480	}
3481	else {
3482	result = writer->buffer;
3483	writer->buffer = NULL;
3484
3485	if (size != writer->allocated) {
3486	if (writer->use_bytearray) {
3487	if (PyByteArray_Resize(result, size)) {
3488	Py_DECREF(result);
3489	return NULL;
3490	}
3491	}
3492	else {
3493	if (_PyBytes_Resize(&result, size)) {
3494	assert(result == NULL);
3495	return NULL;
3496	}
3497	}
3498	}
3499	}
3500	return result;
3501	}
3502
3503	void*
3504	_PyBytesWriter_WriteBytes(_PyBytesWriter writer, void* *ptr,
3505	const void *bytes, Py_ssize_t size)
3506	{
3507	char str = (char* *)ptr;
3508
3509	str = _PyBytesWriter_Prepare(writer, str, size);
3510	if (str == NULL)
3511	return NULL;
3512
3513	memcpy(str, bytes, size);
3514	str += size;
3515
3516	return str;
3517	}
3518

Browse the source code of python/Objects/bytesobject.c