codecs.c source code [python/Python/codecs.c]

1	/ ------------------------------------------------------------------------*
2
3	Python Codec Registry and support functions
4
5	Written by Marc-Andre Lemburg ([email protected]).
6
7	Copyright (c) Corporation for National Research Initiatives.
8
9	------------------------------------------------------------------------ /*
10
11	#include "Python.h"
12	#include "pycore_interp.h" // PyInterpreterState.codec_search_path
13	#include "pycore_pystate.h" // _PyInterpreterState_GET()
14	#include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI
15	#include <ctype.h>
16
17	const char *Py_hexdigits = "0123456789abcdef";
18
19	/ --- Codec Registry ----------------------------------------------------- /
20
21	/ Import the standard encodings package which will register the first*
22	codec search function.
23
24	This is done in a lazy way so that the Unicode implementation does
25	not downgrade startup time of scripts not needing it.
26
27	ImportErrors are silently ignored by this function. Only one try is
28	made.
29
30	*/
31
32	static int _PyCodecRegistry_Init(void); / Forward /
33
34	int PyCodec_Register(PyObject *search_function)
35	{
36	PyInterpreterState *interp = _PyInterpreterState_GET();
37	if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
38	goto onError;
39	if (search_function == NULL) {
40	PyErr_BadArgument();
41	goto onError;
42	}
43	if (!PyCallable_Check(search_function)) {
44	PyErr_SetString(PyExc_TypeError, "argument must be callable");
45	goto onError;
46	}
47	return PyList_Append(interp->codec_search_path, search_function);
48
49	onError:
50	return -`1`;
51	}
52
53	int
54	PyCodec_Unregister(PyObject *search_function)
55	{
56	PyInterpreterState *interp = PyInterpreterState_Get();
57	PyObject *codec_search_path = interp->codec_search_path;
58	/ Do nothing if codec_search_path is not created yet or was cleared. /
59	if (codec_search_path == NULL) {
60	return `0`;
61	}
62
63	assert(PyList_CheckExact(codec_search_path));
64	Py_ssize_t n = PyList_GET_SIZE(codec_search_path);
65	for (Py_ssize_t i = `0`; i < n; i++) {
66	PyObject *item = PyList_GET_ITEM(codec_search_path, i);
67	if (item == search_function) {
68	if (interp->codec_search_cache != NULL) {
69	assert(PyDict_CheckExact(interp->codec_search_cache));
70	PyDict_Clear(interp->codec_search_cache);
71	}
72	return PyList_SetSlice(codec_search_path, i, i+`1`, NULL);
73	}
74	}
75	return `0`;
76	}
77
78	extern int _Py_normalize_encoding(const char , char* *, size_t);
79
80	/ Convert a string to a normalized Python string(decoded from UTF-8): all characters are*
81	converted to lower case, spaces and hyphens are replaced with underscores. /*
82
83	static
84	PyObject normalizestring(const* char *string)
85	{
86	size_t len = strlen(string);
87	char *encoding;
88	PyObject *v;
89
90	if (len > PY_SSIZE_T_MAX) {
91	PyErr_SetString(PyExc_OverflowError, "string is too large");
92	return NULL;
93	}
94
95	encoding = PyMem_Malloc(len + `1`);
96	if (encoding == NULL)
97	return PyErr_NoMemory();
98
99	if (!_Py_normalize_encoding(string, encoding, len + `1`))
100	{
101	PyErr_SetString(PyExc_RuntimeError, "_Py_normalize_encoding() failed");
102	PyMem_Free(encoding);
103	return NULL;
104	}
105
106	v = PyUnicode_FromString(encoding);
107	PyMem_Free(encoding);
108	return v;
109	}
110
111	/ Lookup the given encoding and return a tuple providing the codec*
112	facilities.
113
114	The encoding string is looked up converted to all lower-case
115	characters. This makes encodings looked up through this mechanism
116	effectively case-insensitive.
117
118	If no codec is found, a LookupError is set and NULL returned.
119
120	As side effect, this tries to load the encodings package, if not
121	yet done. This is part of the lazy load strategy for the encodings
122	package.
123
124	*/
125
126	PyObject _PyCodec_Lookup(const* char *encoding)
127	{
128	if (encoding == NULL) {
129	PyErr_BadArgument();
130	return NULL;
131	}
132
133	PyInterpreterState *interp = _PyInterpreterState_GET();
134	if (interp->codec_search_path == NULL && _PyCodecRegistry_Init()) {
135	return NULL;
136	}
137
138	/ Convert the encoding to a normalized Python string: all*
139	characters are converted to lower case, spaces and hyphens are
140	replaced with underscores. /*
141	PyObject *v = normalizestring(encoding);
142	if (v == NULL) {
143	return NULL;
144	}
145	PyUnicode_InternInPlace(&v);
146
147	/ First, try to lookup the name in the registry dictionary /
148	PyObject *result = PyDict_GetItemWithError(interp->codec_search_cache, v);
149	if (result != NULL) {
150	Py_INCREF(result);
151	Py_DECREF(v);
152	return result;
153	}
154	else if (PyErr_Occurred()) {
155	goto onError;
156	}
157
158	/ Next, scan the search functions in order of registration /
159	const Py_ssize_t len = PyList_Size(interp->codec_search_path);
160	if (len < `0`)
161	goto onError;
162	if (len == `0`) {
163	PyErr_SetString(PyExc_LookupError,
164	"no codec search functions registered: "
165	"can't find encoding");
166	goto onError;
167	}
168
169	Py_ssize_t i;
170	for (i = `0`; i < len; i++) {
171	PyObject *func;
172
173	func = PyList_GetItem(interp->codec_search_path, i);
174	if (func == NULL)
175	goto onError;
176	result = PyObject_CallOneArg(func, v);
177	if (result == NULL)
178	goto onError;
179	if (result == Py_None) {
180	Py_DECREF(result);
181	continue;
182	}
183	if (!PyTuple_Check(result) \|\| PyTuple_GET_SIZE(result) != `4`) {
184	PyErr_SetString(PyExc_TypeError,
185	"codec search functions must return 4-tuples");
186	Py_DECREF(result);
187	goto onError;
188	}
189	break;
190	}
191	if (i == len) {
192	/ XXX Perhaps we should cache misses too ? /
193	PyErr_Format(PyExc_LookupError,
194	"unknown encoding: %s", encoding);
195	goto onError;
196	}
197
198	/ Cache and return the result /
199	if (PyDict_SetItem(interp->codec_search_cache, v, result) < `0`) {
200	Py_DECREF(result);
201	goto onError;
202	}
203	Py_DECREF(v);
204	return result;
205
206	onError:
207	Py_DECREF(v);
208	return NULL;
209	}
210
211	/ Codec registry encoding check API. /
212
213	int PyCodec_KnownEncoding(const char *encoding)
214	{
215	PyObject *codecs;
216
217	codecs = _PyCodec_Lookup(encoding);
218	if (!codecs) {
219	PyErr_Clear();
220	return `0`;
221	}
222	else {
223	Py_DECREF(codecs);
224	return `1`;
225	}
226	}
227
228	static
229	PyObject args_tuple(PyObject object,
230	const char *errors)
231	{
232	PyObject *args;
233
234	args = PyTuple_New(`1` + (errors != NULL));
235	if (args == NULL)
236	return NULL;
237	Py_INCREF(object);
238	PyTuple_SET_ITEM(args,`0`,object);
239	if (errors) {
240	PyObject *v;
241
242	v = PyUnicode_FromString(errors);
243	if (v == NULL) {
244	Py_DECREF(args);
245	return NULL;
246	}
247	PyTuple_SET_ITEM(args, `1`, v);
248	}
249	return args;
250	}
251
252	/ Helper function to get a codec item /
253
254	static
255	PyObject codec_getitem(const* char encoding, int* index)
256	{
257	PyObject *codecs;
258	PyObject *v;
259
260	codecs = _PyCodec_Lookup(encoding);
261	if (codecs == NULL)
262	return NULL;
263	v = PyTuple_GET_ITEM(codecs, index);
264	Py_DECREF(codecs);
265	Py_INCREF(v);
266	return v;
267	}
268
269	/ Helper functions to create an incremental codec. /
270	static
271	PyObject codec_makeincrementalcodec(PyObject codec_info,
272	const char *errors,
273	const char *attrname)
274	{
275	PyObject ret, inccodec;
276
277	inccodec = PyObject_GetAttrString(codec_info, attrname);
278	if (inccodec == NULL)
279	return NULL;
280	if (errors)
281	ret = PyObject_CallFunction(inccodec, "s", errors);
282	else
283	ret = _PyObject_CallNoArg(inccodec);
284	Py_DECREF(inccodec);
285	return ret;
286	}
287
288	static
289	PyObject codec_getincrementalcodec(const* char *encoding,
290	const char *errors,
291	const char *attrname)
292	{
293	PyObject codec_info, ret;
294
295	codec_info = _PyCodec_Lookup(encoding);
296	if (codec_info == NULL)
297	return NULL;
298	ret = codec_makeincrementalcodec(codec_info, errors, attrname);
299	Py_DECREF(codec_info);
300	return ret;
301	}
302
303	/ Helper function to create a stream codec. /
304
305	static
306	PyObject codec_getstreamcodec(const* char *encoding,
307	PyObject *stream,
308	const char *errors,
309	const int index)
310	{
311	PyObject codecs, streamcodec, *codeccls;
312
313	codecs = _PyCodec_Lookup(encoding);
314	if (codecs == NULL)
315	return NULL;
316
317	codeccls = PyTuple_GET_ITEM(codecs, index);
318	if (errors != NULL)
319	streamcodec = PyObject_CallFunction(codeccls, "Os", stream, errors);
320	else
321	streamcodec = PyObject_CallOneArg(codeccls, stream);
322	Py_DECREF(codecs);
323	return streamcodec;
324	}
325
326	/ Helpers to work with the result of _PyCodec_Lookup*
327
328	*/
329	PyObject _PyCodecInfo_GetIncrementalDecoder(PyObject codec_info,
330	const char *errors)
331	{
332	return codec_makeincrementalcodec(codec_info, errors,
333	"incrementaldecoder");
334	}
335
336	PyObject _PyCodecInfo_GetIncrementalEncoder(PyObject codec_info,
337	const char *errors)
338	{
339	return codec_makeincrementalcodec(codec_info, errors,
340	"incrementalencoder");
341	}
342
343
344	/ Convenience APIs to query the Codec registry.*
345
346	All APIs return a codec object with incremented refcount.
347
348	*/
349
350	PyObject PyCodec_Encoder(const* char *encoding)
351	{
352	return codec_getitem(encoding, `0`);
353	}
354
355	PyObject PyCodec_Decoder(const* char *encoding)
356	{
357	return codec_getitem(encoding, `1`);
358	}
359
360	PyObject PyCodec_IncrementalEncoder(const* char *encoding,
361	const char *errors)
362	{
363	return codec_getincrementalcodec(encoding, errors, "incrementalencoder");
364	}
365
366	PyObject PyCodec_IncrementalDecoder(const* char *encoding,
367	const char *errors)
368	{
369	return codec_getincrementalcodec(encoding, errors, "incrementaldecoder");
370	}
371
372	PyObject PyCodec_StreamReader(const* char *encoding,
373	PyObject *stream,
374	const char *errors)
375	{
376	return codec_getstreamcodec(encoding, stream, errors, `2`);
377	}
378
379	PyObject PyCodec_StreamWriter(const* char *encoding,
380	PyObject *stream,
381	const char *errors)
382	{
383	return codec_getstreamcodec(encoding, stream, errors, `3`);
384	}
385
386	/ Helper that tries to ensure the reported exception chain indicates the*
387	* codec that was invoked to trigger the failure without changing the type
388	* of the exception raised.
389	*/
390	static void
391	wrap_codec_error(const char *operation,
392	const char *encoding)
393	{
394	/ TrySetFromCause will replace the active exception with a suitably*
395	* updated clone if it can, otherwise it will leave the original
396	* exception alone.
397	*/
398	_PyErr_TrySetFromCause("%s with '%s' codec failed",
399	operation, encoding);
400	}
401
402	/ Encode an object (e.g. a Unicode object) using the given encoding*
403	and return the resulting encoded object (usually a Python string).
404
405	errors is passed to the encoder factory as argument if non-NULL. /*
406
407	static PyObject *
408	_PyCodec_EncodeInternal(PyObject *object,
409	PyObject *encoder,
410	const char *encoding,
411	const char *errors)
412	{
413	PyObject args = NULL, result = NULL;
414	PyObject *v = NULL;
415
416	args = args_tuple(object, errors);
417	if (args == NULL)
418	goto onError;
419
420	result = PyObject_Call(encoder, args, NULL);
421	if (result == NULL) {
422	wrap_codec_error("encoding", encoding);
423	goto onError;
424	}
425
426	if (!PyTuple_Check(result) \|\|
427	PyTuple_GET_SIZE(result) != `2`) {
428	PyErr_SetString(PyExc_TypeError,
429	"encoder must return a tuple (object, integer)");
430	goto onError;
431	}
432	v = PyTuple_GET_ITEM(result,`0`);
433	Py_INCREF(v);
434	/ We don't check or use the second (integer) entry. /
435
436	Py_DECREF(args);
437	Py_DECREF(encoder);
438	Py_DECREF(result);
439	return v;
440
441	onError:
442	Py_XDECREF(result);
443	Py_XDECREF(args);
444	Py_XDECREF(encoder);
445	return NULL;
446	}
447
448	/ Decode an object (usually a Python string) using the given encoding*
449	and return an equivalent object (e.g. a Unicode object).
450
451	errors is passed to the decoder factory as argument if non-NULL. /*
452
453	static PyObject *
454	_PyCodec_DecodeInternal(PyObject *object,
455	PyObject *decoder,
456	const char *encoding,
457	const char *errors)
458	{
459	PyObject args = NULL, result = NULL;
460	PyObject *v;
461
462	args = args_tuple(object, errors);
463	if (args == NULL)
464	goto onError;
465
466	result = PyObject_Call(decoder, args, NULL);
467	if (result == NULL) {
468	wrap_codec_error("decoding", encoding);
469	goto onError;
470	}
471	if (!PyTuple_Check(result) \|\|
472	PyTuple_GET_SIZE(result) != `2`) {
473	PyErr_SetString(PyExc_TypeError,
474	"decoder must return a tuple (object,integer)");
475	goto onError;
476	}
477	v = PyTuple_GET_ITEM(result,`0`);
478	Py_INCREF(v);
479	/ We don't check or use the second (integer) entry. /
480
481	Py_DECREF(args);
482	Py_DECREF(decoder);
483	Py_DECREF(result);
484	return v;
485
486	onError:
487	Py_XDECREF(args);
488	Py_XDECREF(decoder);
489	Py_XDECREF(result);
490	return NULL;
491	}
492
493	/ Generic encoding/decoding API /
494	PyObject PyCodec_Encode(PyObject object,
495	const char *encoding,
496	const char *errors)
497	{
498	PyObject *encoder;
499
500	encoder = PyCodec_Encoder(encoding);
501	if (encoder == NULL)
502	return NULL;
503
504	return _PyCodec_EncodeInternal(object, encoder, encoding, errors);
505	}
506
507	PyObject PyCodec_Decode(PyObject object,
508	const char *encoding,
509	const char *errors)
510	{
511	PyObject *decoder;
512
513	decoder = PyCodec_Decoder(encoding);
514	if (decoder == NULL)
515	return NULL;
516
517	return _PyCodec_DecodeInternal(object, decoder, encoding, errors);
518	}
519
520	/ Text encoding/decoding API /
521	PyObject * _PyCodec_LookupTextEncoding(const char *encoding,
522	const char *alternate_command)
523	{
524	_Py_IDENTIFIER(_is_text_encoding);
525	PyObject *codec;
526	PyObject *attr;
527	int is_text_codec;
528
529	codec = _PyCodec_Lookup(encoding);
530	if (codec == NULL)
531	return NULL;
532
533	/ Backwards compatibility: assume any raw tuple describes a text*
534	* encoding, and the same for anything lacking the private
535	* attribute.
536	*/
537	if (!PyTuple_CheckExact(codec)) {
538	if (_PyObject_LookupAttrId(codec, &PyId__is_text_encoding, &attr) < `0`) {
539	Py_DECREF(codec);
540	return NULL;
541	}
542	if (attr != NULL) {
543	is_text_codec = PyObject_IsTrue(attr);
544	Py_DECREF(attr);
545	if (is_text_codec <= `0`) {
546	Py_DECREF(codec);
547	if (!is_text_codec)
548	PyErr_Format(PyExc_LookupError,
549	"'%.400s' is not a text encoding; "
550	"use %s to handle arbitrary codecs",
551	encoding, alternate_command);
552	return NULL;
553	}
554	}
555	}
556
557	/ This appears to be a valid text encoding /
558	return codec;
559	}
560
561
562	static
563	PyObject codec_getitem_checked(const* char *encoding,
564	const char *alternate_command,
565	int index)
566	{
567	PyObject *codec;
568	PyObject *v;
569
570	codec = _PyCodec_LookupTextEncoding(encoding, alternate_command);
571	if (codec == NULL)
572	return NULL;
573
574	v = PyTuple_GET_ITEM(codec, index);
575	Py_INCREF(v);
576	Py_DECREF(codec);
577	return v;
578	}
579
580	static PyObject * _PyCodec_TextEncoder(const char *encoding)
581	{
582	return codec_getitem_checked(encoding, "codecs.encode()", `0`);
583	}
584
585	static PyObject * _PyCodec_TextDecoder(const char *encoding)
586	{
587	return codec_getitem_checked(encoding, "codecs.decode()", `1`);
588	}
589
590	PyObject _PyCodec_EncodeText(PyObject object,
591	const char *encoding,
592	const char *errors)
593	{
594	PyObject *encoder;
595
596	encoder = _PyCodec_TextEncoder(encoding);
597	if (encoder == NULL)
598	return NULL;
599
600	return _PyCodec_EncodeInternal(object, encoder, encoding, errors);
601	}
602
603	PyObject _PyCodec_DecodeText(PyObject object,
604	const char *encoding,
605	const char *errors)
606	{
607	PyObject *decoder;
608
609	decoder = _PyCodec_TextDecoder(encoding);
610	if (decoder == NULL)
611	return NULL;
612
613	return _PyCodec_DecodeInternal(object, decoder, encoding, errors);
614	}
615
616	/ Register the error handling callback function error under the name*
617	name. This function will be called by the codec when it encounters
618	an unencodable characters/undecodable bytes and doesn't know the
619	callback name, when name is specified as the error parameter
620	in the call to the encode/decode function.
621	Return 0 on success, -1 on error /*
622	int PyCodec_RegisterError(const char name, PyObject error)
623	{
624	PyInterpreterState *interp = _PyInterpreterState_GET();
625	if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
626	return -`1`;
627	if (!PyCallable_Check(error)) {
628	PyErr_SetString(PyExc_TypeError, "handler must be callable");
629	return -`1`;
630	}
631	return PyDict_SetItemString(interp->codec_error_registry,
632	name, error);
633	}
634
635	/ Lookup the error handling callback function registered under the*
636	name error. As a special case NULL can be passed, in which case
637	the error handling callback for strict encoding will be returned. /*
638	PyObject PyCodec_LookupError(const* char *name)
639	{
640	PyObject *handler = NULL;
641
642	PyInterpreterState *interp = _PyInterpreterState_GET();
643	if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
644	return NULL;
645
646	if (name==NULL)
647	name = "strict";
648	handler = _PyDict_GetItemStringWithError(interp->codec_error_registry, name);
649	if (handler) {
650	Py_INCREF(handler);
651	}
652	else if (!PyErr_Occurred()) {
653	PyErr_Format(PyExc_LookupError, "unknown error handler name '%.400s'", name);
654	}
655	return handler;
656	}
657
658	static void wrong_exception_type(PyObject *exc)
659	{
660	PyErr_Format(PyExc_TypeError,
661	"don't know how to handle %.200s in error callback",
662	Py_TYPE(exc)->tp_name);
663	}
664
665	PyObject PyCodec_StrictErrors(PyObject exc)
666	{
667	if (PyExceptionInstance_Check(exc))
668	PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
669	else
670	PyErr_SetString(PyExc_TypeError, "codec must pass exception instance");
671	return NULL;
672	}
673
674
675	PyObject PyCodec_IgnoreErrors(PyObject exc)
676	{
677	Py_ssize_t end;
678
679	if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) {
680	if (PyUnicodeEncodeError_GetEnd(exc, &end))
681	return NULL;
682	}
683	else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) {
684	if (PyUnicodeDecodeError_GetEnd(exc, &end))
685	return NULL;
686	}
687	else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeTranslateError)) {
688	if (PyUnicodeTranslateError_GetEnd(exc, &end))
689	return NULL;
690	}
691	else {
692	wrong_exception_type(exc);
693	return NULL;
694	}
695	return Py_BuildValue("(Nn)", PyUnicode_New(`0`, `0`), end);
696	}
697
698
699	PyObject PyCodec_ReplaceErrors(PyObject exc)
700	{
701	Py_ssize_t start, end, i, len;
702
703	if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) {
704	PyObject *res;
705	Py_UCS1 *outp;
706	if (PyUnicodeEncodeError_GetStart(exc, &start))
707	return NULL;
708	if (PyUnicodeEncodeError_GetEnd(exc, &end))
709	return NULL;
710	len = end - start;
711	res = PyUnicode_New(len, `'?'`);
712	if (res == NULL)
713	return NULL;
714	assert(PyUnicode_KIND(res) == PyUnicode_1BYTE_KIND);
715	outp = PyUnicode_1BYTE_DATA(res);
716	for (i = `0`; i < len; ++i)
717	outp[i] = `'?'`;
718	assert(_PyUnicode_CheckConsistency(res, `1`));
719	return Py_BuildValue("(Nn)", res, end);
720	}
721	else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) {
722	if (PyUnicodeDecodeError_GetEnd(exc, &end))
723	return NULL;
724	return Py_BuildValue("(Cn)",
725	(int)Py_UNICODE_REPLACEMENT_CHARACTER,
726	end);
727	}
728	else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeTranslateError)) {
729	PyObject *res;
730	Py_UCS2 *outp;
731	if (PyUnicodeTranslateError_GetStart(exc, &start))
732	return NULL;
733	if (PyUnicodeTranslateError_GetEnd(exc, &end))
734	return NULL;
735	len = end - start;
736	res = PyUnicode_New(len, Py_UNICODE_REPLACEMENT_CHARACTER);
737	if (res == NULL)
738	return NULL;
739	assert(PyUnicode_KIND(res) == PyUnicode_2BYTE_KIND);
740	outp = PyUnicode_2BYTE_DATA(res);
741	for (i = `0`; i < len; i++)
742	outp[i] = Py_UNICODE_REPLACEMENT_CHARACTER;
743	assert(_PyUnicode_CheckConsistency(res, `1`));
744	return Py_BuildValue("(Nn)", res, end);
745	}
746	else {
747	wrong_exception_type(exc);
748	return NULL;
749	}
750	}
751
752	PyObject PyCodec_XMLCharRefReplaceErrors(PyObject exc)
753	{
754	if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) {
755	PyObject *restuple;
756	PyObject *object;
757	Py_ssize_t i;
758	Py_ssize_t start;
759	Py_ssize_t end;
760	PyObject *res;
761	Py_UCS1 *outp;
762	Py_ssize_t ressize;
763	Py_UCS4 ch;
764	if (PyUnicodeEncodeError_GetStart(exc, &start))
765	return NULL;
766	if (PyUnicodeEncodeError_GetEnd(exc, &end))
767	return NULL;
768	if (!(object = PyUnicodeEncodeError_GetObject(exc)))
769	return NULL;
770	if (end - start > PY_SSIZE_T_MAX / (`2`+`7`+`1`))
771	end = start + PY_SSIZE_T_MAX / (`2`+`7`+`1`);
772	for (i = start, ressize = `0`; i < end; ++i) {
773	/ object is guaranteed to be "ready" /
774	ch = PyUnicode_READ_CHAR(object, i);
775	if (ch<`10`)
776	ressize += `2`+`1`+`1`;
777	else if (ch<`100`)
778	ressize += `2`+`2`+`1`;
779	else if (ch<`1000`)
780	ressize += `2`+`3`+`1`;
781	else if (ch<`10000`)
782	ressize += `2`+`4`+`1`;
783	else if (ch<`100000`)
784	ressize += `2`+`5`+`1`;
785	else if (ch<`1000000`)
786	ressize += `2`+`6`+`1`;
787	else
788	ressize += `2`+`7`+`1`;
789	}
790	/ allocate replacement /
791	res = PyUnicode_New(ressize, `127`);
792	if (res == NULL) {
793	Py_DECREF(object);
794	return NULL;
795	}
796	outp = PyUnicode_1BYTE_DATA(res);
797	/ generate replacement /
798	for (i = start; i < end; ++i) {
799	int digits;
800	int base;
801	ch = PyUnicode_READ_CHAR(object, i);
802	*outp++ = `'&'`;
803	*outp++ = `'#'`;
804	if (ch<`10`) {
805	digits = `1`;
806	base = `1`;
807	}
808	else if (ch<`100`) {
809	digits = `2`;
810	base = `10`;
811	}
812	else if (ch<`1000`) {
813	digits = `3`;
814	base = `100`;
815	}
816	else if (ch<`10000`) {
817	digits = `4`;
818	base = `1000`;
819	}
820	else if (ch<`100000`) {
821	digits = `5`;
822	base = `10000`;
823	}
824	else if (ch<`1000000`) {
825	digits = `6`;
826	base = `100000`;
827	}
828	else {
829	digits = `7`;
830	base = `1000000`;
831	}
832	while (digits-->`0`) {
833	*outp++ = `'0'` + ch/base;
834	ch %= base;
835	base /= `10`;
836	}
837	*outp++ = `';'`;
838	}
839	assert(_PyUnicode_CheckConsistency(res, `1`));
840	restuple = Py_BuildValue("(Nn)", res, end);
841	Py_DECREF(object);
842	return restuple;
843	}
844	else {
845	wrong_exception_type(exc);
846	return NULL;
847	}
848	}
849
850	PyObject PyCodec_BackslashReplaceErrors(PyObject exc)
851	{
852	PyObject *object;
853	Py_ssize_t i;
854	Py_ssize_t start;
855	Py_ssize_t end;
856	PyObject *res;
857	Py_UCS1 *outp;
858	int ressize;
859	Py_UCS4 c;
860
861	if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) {
862	const unsigned char *p;
863	if (PyUnicodeDecodeError_GetStart(exc, &start))
864	return NULL;
865	if (PyUnicodeDecodeError_GetEnd(exc, &end))
866	return NULL;
867	if (!(object = PyUnicodeDecodeError_GetObject(exc)))
868	return NULL;
869	p = (const unsigned char*)PyBytes_AS_STRING(object);
870	res = PyUnicode_New(`4` * (end - start), `127`);
871	if (res == NULL) {
872	Py_DECREF(object);
873	return NULL;
874	}
875	outp = PyUnicode_1BYTE_DATA(res);
876	for (i = start; i < end; i++, outp += `4`) {
877	unsigned char c = p[i];
878	outp[`0`] = `'\\'`;
879	outp[`1`] = `'x'`;
880	outp[`2`] = Py_hexdigits[(c>>`4`)&`0xf`];
881	outp[`3`] = Py_hexdigits[c&`0xf`];
882	}
883
884	assert(_PyUnicode_CheckConsistency(res, `1`));
885	Py_DECREF(object);
886	return Py_BuildValue("(Nn)", res, end);
887	}
888	if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) {
889	if (PyUnicodeEncodeError_GetStart(exc, &start))
890	return NULL;
891	if (PyUnicodeEncodeError_GetEnd(exc, &end))
892	return NULL;
893	if (!(object = PyUnicodeEncodeError_GetObject(exc)))
894	return NULL;
895	}
896	else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeTranslateError)) {
897	if (PyUnicodeTranslateError_GetStart(exc, &start))
898	return NULL;
899	if (PyUnicodeTranslateError_GetEnd(exc, &end))
900	return NULL;
901	if (!(object = PyUnicodeTranslateError_GetObject(exc)))
902	return NULL;
903	}
904	else {
905	wrong_exception_type(exc);
906	return NULL;
907	}
908
909	if (end - start > PY_SSIZE_T_MAX / (`1`+`1`+`8`))
910	end = start + PY_SSIZE_T_MAX / (`1`+`1`+`8`);
911	for (i = start, ressize = `0`; i < end; ++i) {
912	/ object is guaranteed to be "ready" /
913	c = PyUnicode_READ_CHAR(object, i);
914	if (c >= `0x10000`) {
915	ressize += `1`+`1`+`8`;
916	}
917	else if (c >= `0x100`) {
918	ressize += `1`+`1`+`4`;
919	}
920	else
921	ressize += `1`+`1`+`2`;
922	}
923	res = PyUnicode_New(ressize, `127`);
924	if (res == NULL) {
925	Py_DECREF(object);
926	return NULL;
927	}
928	outp = PyUnicode_1BYTE_DATA(res);
929	for (i = start; i < end; ++i) {
930	c = PyUnicode_READ_CHAR(object, i);
931	*outp++ = `'\\'`;
932	if (c >= `0x00010000`) {
933	*outp++ = `'U'`;
934	*outp++ = Py_hexdigits[(c>>`28`)&`0xf`];
935	*outp++ = Py_hexdigits[(c>>`24`)&`0xf`];
936	*outp++ = Py_hexdigits[(c>>`20`)&`0xf`];
937	*outp++ = Py_hexdigits[(c>>`16`)&`0xf`];
938	*outp++ = Py_hexdigits[(c>>`12`)&`0xf`];
939	*outp++ = Py_hexdigits[(c>>`8`)&`0xf`];
940	}
941	else if (c >= `0x100`) {
942	*outp++ = `'u'`;
943	*outp++ = Py_hexdigits[(c>>`12`)&`0xf`];
944	*outp++ = Py_hexdigits[(c>>`8`)&`0xf`];
945	}
946	else
947	*outp++ = `'x'`;
948	*outp++ = Py_hexdigits[(c>>`4`)&`0xf`];
949	*outp++ = Py_hexdigits[c&`0xf`];
950	}
951
952	assert(_PyUnicode_CheckConsistency(res, `1`));
953	Py_DECREF(object);
954	return Py_BuildValue("(Nn)", res, end);
955	}
956
957	static _PyUnicode_Name_CAPI *ucnhash_capi = NULL;
958
959	PyObject PyCodec_NameReplaceErrors(PyObject exc)
960	{
961	if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) {
962	PyObject *restuple;
963	PyObject *object;
964	Py_ssize_t i;
965	Py_ssize_t start;
966	Py_ssize_t end;
967	PyObject *res;
968	Py_UCS1 *outp;
969	Py_ssize_t ressize;
970	int replsize;
971	Py_UCS4 c;
972	char buffer[`256`]; / NAME_MAXLEN /
973	if (PyUnicodeEncodeError_GetStart(exc, &start))
974	return NULL;
975	if (PyUnicodeEncodeError_GetEnd(exc, &end))
976	return NULL;
977	if (!(object = PyUnicodeEncodeError_GetObject(exc)))
978	return NULL;
979	if (!ucnhash_capi) {
980	/ load the unicode data module /
981	ucnhash_capi = (_PyUnicode_Name_CAPI *)PyCapsule_Import(
982	PyUnicodeData_CAPSULE_NAME, `1`);
983	if (!ucnhash_capi) {
984	return NULL;
985	}
986	}
987	for (i = start, ressize = `0`; i < end; ++i) {
988	/ object is guaranteed to be "ready" /
989	c = PyUnicode_READ_CHAR(object, i);
990	if (ucnhash_capi->getname(c, buffer, sizeof(buffer), `1`)) {
991	replsize = `1`+`1`+`1`+(int)strlen(buffer)+`1`;
992	}
993	else if (c >= `0x10000`) {
994	replsize = `1`+`1`+`8`;
995	}
996	else if (c >= `0x100`) {
997	replsize = `1`+`1`+`4`;
998	}
999	else
1000	replsize = `1`+`1`+`2`;
1001	if (ressize > PY_SSIZE_T_MAX - replsize)
1002	break;
1003	ressize += replsize;
1004	}
1005	end = i;
1006	res = PyUnicode_New(ressize, `127`);
1007	if (res==NULL)
1008	return NULL;
1009	for (i = start, outp = PyUnicode_1BYTE_DATA(res);
1010	i < end; ++i) {
1011	c = PyUnicode_READ_CHAR(object, i);
1012	*outp++ = `'\\'`;
1013	if (ucnhash_capi->getname(c, buffer, sizeof(buffer), `1`)) {
1014	*outp++ = `'N'`;
1015	*outp++ = `'{'`;
1016	strcpy((char *)outp, buffer);
1017	outp += strlen(buffer);
1018	*outp++ = `'}'`;
1019	continue;
1020	}
1021	if (c >= `0x00010000`) {
1022	*outp++ = `'U'`;
1023	*outp++ = Py_hexdigits[(c>>`28`)&`0xf`];
1024	*outp++ = Py_hexdigits[(c>>`24`)&`0xf`];
1025	*outp++ = Py_hexdigits[(c>>`20`)&`0xf`];
1026	*outp++ = Py_hexdigits[(c>>`16`)&`0xf`];
1027	*outp++ = Py_hexdigits[(c>>`12`)&`0xf`];
1028	*outp++ = Py_hexdigits[(c>>`8`)&`0xf`];
1029	}
1030	else if (c >= `0x100`) {
1031	*outp++ = `'u'`;
1032	*outp++ = Py_hexdigits[(c>>`12`)&`0xf`];
1033	*outp++ = Py_hexdigits[(c>>`8`)&`0xf`];
1034	}
1035	else
1036	*outp++ = `'x'`;
1037	*outp++ = Py_hexdigits[(c>>`4`)&`0xf`];
1038	*outp++ = Py_hexdigits[c&`0xf`];
1039	}
1040
1041	assert(outp == PyUnicode_1BYTE_DATA(res) + ressize);
1042	assert(_PyUnicode_CheckConsistency(res, `1`));
1043	restuple = Py_BuildValue("(Nn)", res, end);
1044	Py_DECREF(object);
1045	return restuple;
1046	}
1047	else {
1048	wrong_exception_type(exc);
1049	return NULL;
1050	}
1051	}
1052
1053	#define ENC_UNKNOWN -1
1054	#define ENC_UTF8 0
1055	#define ENC_UTF16BE 1
1056	#define ENC_UTF16LE 2
1057	#define ENC_UTF32BE 3
1058	#define ENC_UTF32LE 4
1059
1060	static int
1061	get_standard_encoding(const char encoding, int* *bytelength)
1062	{
1063	if (Py_TOLOWER(encoding[`0`]) == `'u'` &&
1064	Py_TOLOWER(encoding[`1`]) == `'t'` &&
1065	Py_TOLOWER(encoding[`2`]) == `'f'`) {
1066	encoding += `3`;
1067	if (encoding == `'-'` \|\| encoding == `'_'` )
1068	encoding++;
1069	if (encoding[`0`] == `'8'` && encoding[`1`] == `'\0'`) {
1070	*bytelength = `3`;
1071	return ENC_UTF8;
1072	}
1073	else if (encoding[`0`] == `'1'` && encoding[`1`] == `'6'`) {
1074	encoding += `2`;
1075	*bytelength = `2`;
1076	if (*encoding == `'\0'`) {
1077	#ifdef WORDS_BIGENDIAN
1078	return ENC_UTF16BE;
1079	#else
1080	return ENC_UTF16LE;
1081	#endif
1082	}
1083	if (encoding == `'-'` \|\| encoding == `'_'` )
1084	encoding++;
1085	if (Py_TOLOWER(encoding[`1`]) == `'e'` && encoding[`2`] == `'\0'`) {
1086	if (Py_TOLOWER(encoding[`0`]) == `'b'`)
1087	return ENC_UTF16BE;
1088	if (Py_TOLOWER(encoding[`0`]) == `'l'`)
1089	return ENC_UTF16LE;
1090	}
1091	}
1092	else if (encoding[`0`] == `'3'` && encoding[`1`] == `'2'`) {
1093	encoding += `2`;
1094	*bytelength = `4`;
1095	if (*encoding == `'\0'`) {
1096	#ifdef WORDS_BIGENDIAN
1097	return ENC_UTF32BE;
1098	#else
1099	return ENC_UTF32LE;
1100	#endif
1101	}
1102	if (encoding == `'-'` \|\| encoding == `'_'` )
1103	encoding++;
1104	if (Py_TOLOWER(encoding[`1`]) == `'e'` && encoding[`2`] == `'\0'`) {
1105	if (Py_TOLOWER(encoding[`0`]) == `'b'`)
1106	return ENC_UTF32BE;
1107	if (Py_TOLOWER(encoding[`0`]) == `'l'`)
1108	return ENC_UTF32LE;
1109	}
1110	}
1111	}
1112	else if (strcmp(encoding, "CP_UTF8") == `0`) {
1113	*bytelength = `3`;
1114	return ENC_UTF8;
1115	}
1116	return ENC_UNKNOWN;
1117	}
1118
1119	/ This handler is declared static until someone demonstrates*
1120	a need to call it directly. /*
1121	static PyObject *
1122	PyCodec_SurrogatePassErrors(PyObject *exc)
1123	{
1124	PyObject *restuple;
1125	PyObject *object;
1126	PyObject *encode;
1127	const char *encoding;
1128	int code;
1129	int bytelength;
1130	Py_ssize_t i;
1131	Py_ssize_t start;
1132	Py_ssize_t end;
1133	PyObject *res;
1134
1135	if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) {
1136	unsigned char *outp;
1137	if (PyUnicodeEncodeError_GetStart(exc, &start))
1138	return NULL;
1139	if (PyUnicodeEncodeError_GetEnd(exc, &end))
1140	return NULL;
1141	if (!(object = PyUnicodeEncodeError_GetObject(exc)))
1142	return NULL;
1143	if (!(encode = PyUnicodeEncodeError_GetEncoding(exc))) {
1144	Py_DECREF(object);
1145	return NULL;
1146	}
1147	if (!(encoding = PyUnicode_AsUTF8(encode))) {
1148	Py_DECREF(object);
1149	Py_DECREF(encode);
1150	return NULL;
1151	}
1152	code = get_standard_encoding(encoding, &bytelength);
1153	Py_DECREF(encode);
1154	if (code == ENC_UNKNOWN) {
1155	/ Not supported, fail with original exception /
1156	PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
1157	Py_DECREF(object);
1158	return NULL;
1159	}
1160
1161	if (end - start > PY_SSIZE_T_MAX / bytelength)
1162	end = start + PY_SSIZE_T_MAX / bytelength;
1163	res = PyBytes_FromStringAndSize(NULL, bytelength*(end-start));
1164	if (!res) {
1165	Py_DECREF(object);
1166	return NULL;
1167	}
1168	outp = (unsigned char*)PyBytes_AsString(res);
1169	for (i = start; i < end; i++) {
1170	/ object is guaranteed to be "ready" /
1171	Py_UCS4 ch = PyUnicode_READ_CHAR(object, i);
1172	if (!Py_UNICODE_IS_SURROGATE(ch)) {
1173	/ Not a surrogate, fail with original exception /
1174	PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
1175	Py_DECREF(res);
1176	Py_DECREF(object);
1177	return NULL;
1178	}
1179	switch (code) {
1180	case ENC_UTF8:
1181	outp++ = (unsigned* char)(`0xe0` \| (ch >> `12`));
1182	outp++ = (unsigned* char)(`0x80` \| ((ch >> `6`) & `0x3f`));
1183	outp++ = (unsigned* char)(`0x80` \| (ch & `0x3f`));
1184	break;
1185	case ENC_UTF16LE:
1186	outp++ = (unsigned* char) ch;
1187	outp++ = (unsigned* char)(ch >> `8`);
1188	break;
1189	case ENC_UTF16BE:
1190	outp++ = (unsigned* char)(ch >> `8`);
1191	outp++ = (unsigned* char) ch;
1192	break;
1193	case ENC_UTF32LE:
1194	outp++ = (unsigned* char) ch;
1195	outp++ = (unsigned* char)(ch >> `8`);
1196	outp++ = (unsigned* char)(ch >> `16`);
1197	outp++ = (unsigned* char)(ch >> `24`);
1198	break;
1199	case ENC_UTF32BE:
1200	outp++ = (unsigned* char)(ch >> `24`);
1201	outp++ = (unsigned* char)(ch >> `16`);
1202	outp++ = (unsigned* char)(ch >> `8`);
1203	outp++ = (unsigned* char) ch;
1204	break;
1205	}
1206	}
1207	restuple = Py_BuildValue("(On)", res, end);
1208	Py_DECREF(res);
1209	Py_DECREF(object);
1210	return restuple;
1211	}
1212	else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) {
1213	const unsigned char *p;
1214	Py_UCS4 ch = `0`;
1215	if (PyUnicodeDecodeError_GetStart(exc, &start))
1216	return NULL;
1217	if (PyUnicodeDecodeError_GetEnd(exc, &end))
1218	return NULL;
1219	if (!(object = PyUnicodeDecodeError_GetObject(exc)))
1220	return NULL;
1221	p = (const unsigned char*)PyBytes_AS_STRING(object);
1222	if (!(encode = PyUnicodeDecodeError_GetEncoding(exc))) {
1223	Py_DECREF(object);
1224	return NULL;
1225	}
1226	if (!(encoding = PyUnicode_AsUTF8(encode))) {
1227	Py_DECREF(object);
1228	Py_DECREF(encode);
1229	return NULL;
1230	}
1231	code = get_standard_encoding(encoding, &bytelength);
1232	Py_DECREF(encode);
1233	if (code == ENC_UNKNOWN) {
1234	/ Not supported, fail with original exception /
1235	PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
1236	Py_DECREF(object);
1237	return NULL;
1238	}
1239
1240	/ Try decoding a single surrogate character. If*
1241	there are more, let the codec call us again. /*
1242	p += start;
1243	if (PyBytes_GET_SIZE(object) - start >= bytelength) {
1244	switch (code) {
1245	case ENC_UTF8:
1246	if ((p[`0`] & `0xf0`) == `0xe0` &&
1247	(p[`1`] & `0xc0`) == `0x80` &&
1248	(p[`2`] & `0xc0`) == `0x80`) {
1249	/ it's a three-byte code /
1250	ch = ((p[`0`] & `0x0f`) << `12`) + ((p[`1`] & `0x3f`) << `6`) + (p[`2`] & `0x3f`);
1251	}
1252	break;
1253	case ENC_UTF16LE:
1254	ch = p[`1`] << `8` \| p[`0`];
1255	break;
1256	case ENC_UTF16BE:
1257	ch = p[`0`] << `8` \| p[`1`];
1258	break;
1259	case ENC_UTF32LE:
1260	ch = (p[`3`] << `24`) \| (p[`2`] << `16`) \| (p[`1`] << `8`) \| p[`0`];
1261	break;
1262	case ENC_UTF32BE:
1263	ch = (p[`0`] << `24`) \| (p[`1`] << `16`) \| (p[`2`] << `8`) \| p[`3`];
1264	break;
1265	}
1266	}
1267
1268	Py_DECREF(object);
1269	if (!Py_UNICODE_IS_SURROGATE(ch)) {
1270	/ it's not a surrogate - fail /
1271	PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
1272	return NULL;
1273	}
1274	res = PyUnicode_FromOrdinal(ch);
1275	if (res == NULL)
1276	return NULL;
1277	return Py_BuildValue("(Nn)", res, start + bytelength);
1278	}
1279	else {
1280	wrong_exception_type(exc);
1281	return NULL;
1282	}
1283	}
1284
1285	static PyObject *
1286	PyCodec_SurrogateEscapeErrors(PyObject *exc)
1287	{
1288	PyObject *restuple;
1289	PyObject *object;
1290	Py_ssize_t i;
1291	Py_ssize_t start;
1292	Py_ssize_t end;
1293	PyObject *res;
1294
1295	if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) {
1296	char *outp;
1297	if (PyUnicodeEncodeError_GetStart(exc, &start))
1298	return NULL;
1299	if (PyUnicodeEncodeError_GetEnd(exc, &end))
1300	return NULL;
1301	if (!(object = PyUnicodeEncodeError_GetObject(exc)))
1302	return NULL;
1303	res = PyBytes_FromStringAndSize(NULL, end-start);
1304	if (!res) {
1305	Py_DECREF(object);
1306	return NULL;
1307	}
1308	outp = PyBytes_AsString(res);
1309	for (i = start; i < end; i++) {
1310	/ object is guaranteed to be "ready" /
1311	Py_UCS4 ch = PyUnicode_READ_CHAR(object, i);
1312	if (ch < `0xdc80` \|\| ch > `0xdcff`) {
1313	/ Not a UTF-8b surrogate, fail with original exception /
1314	PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
1315	Py_DECREF(res);
1316	Py_DECREF(object);
1317	return NULL;
1318	}
1319	*outp++ = ch - `0xdc00`;
1320	}
1321	restuple = Py_BuildValue("(On)", res, end);
1322	Py_DECREF(res);
1323	Py_DECREF(object);
1324	return restuple;
1325	}
1326	else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) {
1327	PyObject *str;
1328	const unsigned char *p;
1329	Py_UCS2 ch[`4`]; / decode up to 4 bad bytes. /
1330	int consumed = `0`;
1331	if (PyUnicodeDecodeError_GetStart(exc, &start))
1332	return NULL;
1333	if (PyUnicodeDecodeError_GetEnd(exc, &end))
1334	return NULL;
1335	if (!(object = PyUnicodeDecodeError_GetObject(exc)))
1336	return NULL;
1337	p = (const unsigned char*)PyBytes_AS_STRING(object);
1338	while (consumed < `4` && consumed < end-start) {
1339	/ Refuse to escape ASCII bytes. /
1340	if (p[start+consumed] < `128`)
1341	break;
1342	ch[consumed] = `0xdc00` + p[start+consumed];
1343	consumed++;
1344	}
1345	Py_DECREF(object);
1346	if (!consumed) {
1347	/ codec complained about ASCII byte. /
1348	PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
1349	return NULL;
1350	}
1351	str = PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, ch, consumed);
1352	if (str == NULL)
1353	return NULL;
1354	return Py_BuildValue("(Nn)", str, start+consumed);
1355	}
1356	else {
1357	wrong_exception_type(exc);
1358	return NULL;
1359	}
1360	}
1361
1362
1363	static PyObject strict_errors(PyObject self, PyObject *exc)
1364	{
1365	return PyCodec_StrictErrors(exc);
1366	}
1367
1368
1369	static PyObject ignore_errors(PyObject self, PyObject *exc)
1370	{
1371	return PyCodec_IgnoreErrors(exc);
1372	}
1373
1374
1375	static PyObject replace_errors(PyObject self, PyObject *exc)
1376	{
1377	return PyCodec_ReplaceErrors(exc);
1378	}
1379
1380
1381	static PyObject xmlcharrefreplace_errors(PyObject self, PyObject *exc)
1382	{
1383	return PyCodec_XMLCharRefReplaceErrors(exc);
1384	}
1385
1386
1387	static PyObject backslashreplace_errors(PyObject self, PyObject *exc)
1388	{
1389	return PyCodec_BackslashReplaceErrors(exc);
1390	}
1391
1392	static PyObject namereplace_errors(PyObject self, PyObject *exc)
1393	{
1394	return PyCodec_NameReplaceErrors(exc);
1395	}
1396
1397	static PyObject surrogatepass_errors(PyObject self, PyObject *exc)
1398	{
1399	return PyCodec_SurrogatePassErrors(exc);
1400	}
1401
1402	static PyObject surrogateescape_errors(PyObject self, PyObject *exc)
1403	{
1404	return PyCodec_SurrogateEscapeErrors(exc);
1405	}
1406
1407	static int _PyCodecRegistry_Init(void)
1408	{
1409	static struct {
1410	const char *name;
1411	PyMethodDef def;
1412	} methods[] =
1413	{
1414	{
1415	"strict",
1416	{
1417	"strict_errors",
1418	strict_errors,
1419	METH_O,
1420	PyDoc_STR("Implements the 'strict' error handling, which "
1421	"raises a UnicodeError on coding errors.")
1422	}
1423	},
1424	{
1425	"ignore",
1426	{
1427	"ignore_errors",
1428	ignore_errors,
1429	METH_O,
1430	PyDoc_STR("Implements the 'ignore' error handling, which "
1431	"ignores malformed data and continues.")
1432	}
1433	},
1434	{
1435	"replace",
1436	{
1437	"replace_errors",
1438	replace_errors,
1439	METH_O,
1440	PyDoc_STR("Implements the 'replace' error handling, which "
1441	"replaces malformed data with a replacement marker.")
1442	}
1443	},
1444	{
1445	"xmlcharrefreplace",
1446	{
1447	"xmlcharrefreplace_errors",
1448	xmlcharrefreplace_errors,
1449	METH_O,
1450	PyDoc_STR("Implements the 'xmlcharrefreplace' error handling, "
1451	"which replaces an unencodable character with the "
1452	"appropriate XML character reference.")
1453	}
1454	},
1455	{
1456	"backslashreplace",
1457	{
1458	"backslashreplace_errors",
1459	backslashreplace_errors,
1460	METH_O,
1461	PyDoc_STR("Implements the 'backslashreplace' error handling, "
1462	"which replaces malformed data with a backslashed "
1463	"escape sequence.")
1464	}
1465	},
1466	{
1467	"namereplace",
1468	{
1469	"namereplace_errors",
1470	namereplace_errors,
1471	METH_O,
1472	PyDoc_STR("Implements the 'namereplace' error handling, "
1473	"which replaces an unencodable character with a "
1474	"\\N{...} escape sequence.")
1475	}
1476	},
1477	{
1478	"surrogatepass",
1479	{
1480	"surrogatepass",
1481	surrogatepass_errors,
1482	METH_O
1483	}
1484	},
1485	{
1486	"surrogateescape",
1487	{
1488	"surrogateescape",
1489	surrogateescape_errors,
1490	METH_O
1491	}
1492	}
1493	};
1494
1495	PyInterpreterState *interp = _PyInterpreterState_GET();
1496	PyObject *mod;
1497
1498	if (interp->codec_search_path != NULL)
1499	return `0`;
1500
1501	interp->codec_search_path = PyList_New(`0`);
1502	if (interp->codec_search_path == NULL) {
1503	return -`1`;
1504	}
1505
1506	interp->codec_search_cache = PyDict_New();
1507	if (interp->codec_search_cache == NULL) {
1508	return -`1`;
1509	}
1510
1511	interp->codec_error_registry = PyDict_New();
1512	if (interp->codec_error_registry == NULL) {
1513	return -`1`;
1514	}
1515
1516	for (size_t i = `0`; i < Py_ARRAY_LENGTH(methods); ++i) {
1517	PyObject *func = PyCFunction_NewEx(&methods[i].def, NULL, NULL);
1518	if (!func) {
1519	return -`1`;
1520	}
1521
1522	int res = PyCodec_RegisterError(methods[i].name, func);
1523	Py_DECREF(func);
1524	if (res) {
1525	return -`1`;
1526	}
1527	}
1528
1529	mod = PyImport_ImportModuleNoBlock("encodings");
1530	if (mod == NULL) {
1531	return -`1`;
1532	}
1533	Py_DECREF(mod);
1534	interp->codecs_initialized = `1`;
1535	return `0`;
1536	}
1537

Browse the source code of python/Python/codecs.c