1/* ------------------------------------------------------------------------
2
3 _codecs -- Provides access to the codec registry and the builtin
4 codecs.
5
6 This module should never be imported directly. The standard library
7 module "codecs" wraps this builtin module for use within Python.
8
9 The codec registry is accessible via:
10
11 register(search_function) -> None
12
13 lookup(encoding) -> CodecInfo object
14
15 The builtin Unicode codecs use the following interface:
16
17 <encoding>_encode(Unicode_object[,errors='strict']) ->
18 (string object, bytes consumed)
19
20 <encoding>_decode(char_buffer_obj[,errors='strict']) ->
21 (Unicode object, bytes consumed)
22
23 These <encoding>s are available: utf_8, unicode_escape,
24 raw_unicode_escape, latin_1, ascii (7-bit), mbcs (on win32).
25
26
27Written by Marc-Andre Lemburg ([email protected]).
28
29Copyright (c) Corporation for National Research Initiatives.
30
31 ------------------------------------------------------------------------ */
32
33#define PY_SSIZE_T_CLEAN
34#include "Python.h"
35
36#ifdef MS_WINDOWS
37#include <windows.h>
38#endif
39
40/*[clinic input]
41module _codecs
42[clinic start generated code]*/
43/*[clinic end generated code: output=da39a3ee5e6b4b0d input=e1390e3da3cb9deb]*/
44
45#include "clinic/_codecsmodule.c.h"
46
47/* --- Registry ----------------------------------------------------------- */
48
49/*[clinic input]
50_codecs.register
51 search_function: object
52 /
53
54Register a codec search function.
55
56Search functions are expected to take one argument, the encoding name in
57all lower case letters, and either return None, or a tuple of functions
58(encoder, decoder, stream_reader, stream_writer) (or a CodecInfo object).
59[clinic start generated code]*/
60
61static PyObject *
62_codecs_register(PyObject *module, PyObject *search_function)
63/*[clinic end generated code: output=d1bf21e99db7d6d3 input=369578467955cae4]*/
64{
65 if (PyCodec_Register(search_function))
66 return NULL;
67
68 Py_RETURN_NONE;
69}
70
71/*[clinic input]
72_codecs.unregister
73 search_function: object
74 /
75
76Unregister a codec search function and clear the registry's cache.
77
78If the search function is not registered, do nothing.
79[clinic start generated code]*/
80
81static PyObject *
82_codecs_unregister(PyObject *module, PyObject *search_function)
83/*[clinic end generated code: output=1f0edee9cf246399 input=dd7c004c652d345e]*/
84{
85 if (PyCodec_Unregister(search_function) < 0) {
86 return NULL;
87 }
88
89 Py_RETURN_NONE;
90}
91
92/*[clinic input]
93_codecs.lookup
94 encoding: str
95 /
96
97Looks up a codec tuple in the Python codec registry and returns a CodecInfo object.
98[clinic start generated code]*/
99
100static PyObject *
101_codecs_lookup_impl(PyObject *module, const char *encoding)
102/*[clinic end generated code: output=9f0afa572080c36d input=3c572c0db3febe9c]*/
103{
104 return _PyCodec_Lookup(encoding);
105}
106
107/*[clinic input]
108_codecs.encode
109 obj: object
110 encoding: str(c_default="NULL") = "utf-8"
111 errors: str(c_default="NULL") = "strict"
112
113Encodes obj using the codec registered for encoding.
114
115The default encoding is 'utf-8'. errors may be given to set a
116different error handling scheme. Default is 'strict' meaning that encoding
117errors raise a ValueError. Other possible values are 'ignore', 'replace'
118and 'backslashreplace' as well as any other name registered with
119codecs.register_error that can handle ValueErrors.
120[clinic start generated code]*/
121
122static PyObject *
123_codecs_encode_impl(PyObject *module, PyObject *obj, const char *encoding,
124 const char *errors)
125/*[clinic end generated code: output=385148eb9a067c86 input=cd5b685040ff61f0]*/
126{
127 if (encoding == NULL)
128 encoding = PyUnicode_GetDefaultEncoding();
129
130 /* Encode via the codec registry */
131 return PyCodec_Encode(obj, encoding, errors);
132}
133
134/*[clinic input]
135_codecs.decode
136 obj: object
137 encoding: str(c_default="NULL") = "utf-8"
138 errors: str(c_default="NULL") = "strict"
139
140Decodes obj using the codec registered for encoding.
141
142Default encoding is 'utf-8'. errors may be given to set a
143different error handling scheme. Default is 'strict' meaning that encoding
144errors raise a ValueError. Other possible values are 'ignore', 'replace'
145and 'backslashreplace' as well as any other name registered with
146codecs.register_error that can handle ValueErrors.
147[clinic start generated code]*/
148
149static PyObject *
150_codecs_decode_impl(PyObject *module, PyObject *obj, const char *encoding,
151 const char *errors)
152/*[clinic end generated code: output=679882417dc3a0bd input=7702c0cc2fa1add6]*/
153{
154 if (encoding == NULL)
155 encoding = PyUnicode_GetDefaultEncoding();
156
157 /* Decode via the codec registry */
158 return PyCodec_Decode(obj, encoding, errors);
159}
160
161/* --- Helpers ------------------------------------------------------------ */
162
163static
164PyObject *codec_tuple(PyObject *decoded,
165 Py_ssize_t len)
166{
167 if (decoded == NULL)
168 return NULL;
169 return Py_BuildValue("Nn", decoded, len);
170}
171
172/* --- String codecs ------------------------------------------------------ */
173/*[clinic input]
174_codecs.escape_decode
175 data: Py_buffer(accept={str, buffer})
176 errors: str(accept={str, NoneType}) = None
177 /
178[clinic start generated code]*/
179
180static PyObject *
181_codecs_escape_decode_impl(PyObject *module, Py_buffer *data,
182 const char *errors)
183/*[clinic end generated code: output=505200ba8056979a input=77298a561c90bd82]*/
184{
185 PyObject *decoded = PyBytes_DecodeEscape(data->buf, data->len,
186 errors, 0, NULL);
187 return codec_tuple(decoded, data->len);
188}
189
190/*[clinic input]
191_codecs.escape_encode
192 data: object(subclass_of='&PyBytes_Type')
193 errors: str(accept={str, NoneType}) = None
194 /
195[clinic start generated code]*/
196
197static PyObject *
198_codecs_escape_encode_impl(PyObject *module, PyObject *data,
199 const char *errors)
200/*[clinic end generated code: output=4af1d477834bab34 input=8f4b144799a94245]*/
201{
202 Py_ssize_t size;
203 Py_ssize_t newsize;
204 PyObject *v;
205
206 size = PyBytes_GET_SIZE(data);
207 if (size > PY_SSIZE_T_MAX / 4) {
208 PyErr_SetString(PyExc_OverflowError,
209 "string is too large to encode");
210 return NULL;
211 }
212 newsize = 4*size;
213 v = PyBytes_FromStringAndSize(NULL, newsize);
214
215 if (v == NULL) {
216 return NULL;
217 }
218 else {
219 Py_ssize_t i;
220 char c;
221 char *p = PyBytes_AS_STRING(v);
222
223 for (i = 0; i < size; i++) {
224 /* There's at least enough room for a hex escape */
225 assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4);
226 c = PyBytes_AS_STRING(data)[i];
227 if (c == '\'' || c == '\\')
228 *p++ = '\\', *p++ = c;
229 else if (c == '\t')
230 *p++ = '\\', *p++ = 't';
231 else if (c == '\n')
232 *p++ = '\\', *p++ = 'n';
233 else if (c == '\r')
234 *p++ = '\\', *p++ = 'r';
235 else if (c < ' ' || c >= 0x7f) {
236 *p++ = '\\';
237 *p++ = 'x';
238 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
239 *p++ = Py_hexdigits[c & 0xf];
240 }
241 else
242 *p++ = c;
243 }
244 *p = '\0';
245 if (_PyBytes_Resize(&v, (p - PyBytes_AS_STRING(v)))) {
246 return NULL;
247 }
248 }
249
250 return codec_tuple(v, size);
251}
252
253/* --- Decoder ------------------------------------------------------------ */
254/*[clinic input]
255_codecs.utf_7_decode
256 data: Py_buffer
257 errors: str(accept={str, NoneType}) = None
258 final: bool(accept={int}) = False
259 /
260[clinic start generated code]*/
261
262static PyObject *
263_codecs_utf_7_decode_impl(PyObject *module, Py_buffer *data,
264 const char *errors, int final)
265/*[clinic end generated code: output=0cd3a944a32a4089 input=22c395d357815d26]*/
266{
267 Py_ssize_t consumed = data->len;
268 PyObject *decoded = PyUnicode_DecodeUTF7Stateful(data->buf, data->len,
269 errors,
270 final ? NULL : &consumed);
271 return codec_tuple(decoded, consumed);
272}
273
274/*[clinic input]
275_codecs.utf_8_decode
276 data: Py_buffer
277 errors: str(accept={str, NoneType}) = None
278 final: bool(accept={int}) = False
279 /
280[clinic start generated code]*/
281
282static PyObject *
283_codecs_utf_8_decode_impl(PyObject *module, Py_buffer *data,
284 const char *errors, int final)
285/*[clinic end generated code: output=10f74dec8d9bb8bf input=f611b3867352ba59]*/
286{
287 Py_ssize_t consumed = data->len;
288 PyObject *decoded = PyUnicode_DecodeUTF8Stateful(data->buf, data->len,
289 errors,
290 final ? NULL : &consumed);
291 return codec_tuple(decoded, consumed);
292}
293
294/*[clinic input]
295_codecs.utf_16_decode
296 data: Py_buffer
297 errors: str(accept={str, NoneType}) = None
298 final: bool(accept={int}) = False
299 /
300[clinic start generated code]*/
301
302static PyObject *
303_codecs_utf_16_decode_impl(PyObject *module, Py_buffer *data,
304 const char *errors, int final)
305/*[clinic end generated code: output=783b442abcbcc2d0 input=191d360bd7309180]*/
306{
307 int byteorder = 0;
308 /* This is overwritten unless final is true. */
309 Py_ssize_t consumed = data->len;
310 PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
311 errors, &byteorder,
312 final ? NULL : &consumed);
313 return codec_tuple(decoded, consumed);
314}
315
316/*[clinic input]
317_codecs.utf_16_le_decode
318 data: Py_buffer
319 errors: str(accept={str, NoneType}) = None
320 final: bool(accept={int}) = False
321 /
322[clinic start generated code]*/
323
324static PyObject *
325_codecs_utf_16_le_decode_impl(PyObject *module, Py_buffer *data,
326 const char *errors, int final)
327/*[clinic end generated code: output=899b9e6364379dcd input=c6904fdc27fb4724]*/
328{
329 int byteorder = -1;
330 /* This is overwritten unless final is true. */
331 Py_ssize_t consumed = data->len;
332 PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
333 errors, &byteorder,
334 final ? NULL : &consumed);
335 return codec_tuple(decoded, consumed);
336}
337
338/*[clinic input]
339_codecs.utf_16_be_decode
340 data: Py_buffer
341 errors: str(accept={str, NoneType}) = None
342 final: bool(accept={int}) = False
343 /
344[clinic start generated code]*/
345
346static PyObject *
347_codecs_utf_16_be_decode_impl(PyObject *module, Py_buffer *data,
348 const char *errors, int final)
349/*[clinic end generated code: output=49f6465ea07669c8 input=e49012400974649b]*/
350{
351 int byteorder = 1;
352 /* This is overwritten unless final is true. */
353 Py_ssize_t consumed = data->len;
354 PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
355 errors, &byteorder,
356 final ? NULL : &consumed);
357 return codec_tuple(decoded, consumed);
358}
359
360/* This non-standard version also provides access to the byteorder
361 parameter of the builtin UTF-16 codec.
362
363 It returns a tuple (unicode, bytesread, byteorder) with byteorder
364 being the value in effect at the end of data.
365
366*/
367/*[clinic input]
368_codecs.utf_16_ex_decode
369 data: Py_buffer
370 errors: str(accept={str, NoneType}) = None
371 byteorder: int = 0
372 final: bool(accept={int}) = False
373 /
374[clinic start generated code]*/
375
376static PyObject *
377_codecs_utf_16_ex_decode_impl(PyObject *module, Py_buffer *data,
378 const char *errors, int byteorder, int final)
379/*[clinic end generated code: output=0f385f251ecc1988 input=5a9c19f2e6b6cf0e]*/
380{
381 /* This is overwritten unless final is true. */
382 Py_ssize_t consumed = data->len;
383
384 PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
385 errors, &byteorder,
386 final ? NULL : &consumed);
387 if (decoded == NULL)
388 return NULL;
389 return Py_BuildValue("Nni", decoded, consumed, byteorder);
390}
391
392/*[clinic input]
393_codecs.utf_32_decode
394 data: Py_buffer
395 errors: str(accept={str, NoneType}) = None
396 final: bool(accept={int}) = False
397 /
398[clinic start generated code]*/
399
400static PyObject *
401_codecs_utf_32_decode_impl(PyObject *module, Py_buffer *data,
402 const char *errors, int final)
403/*[clinic end generated code: output=2fc961807f7b145f input=fd7193965627eb58]*/
404{
405 int byteorder = 0;
406 /* This is overwritten unless final is true. */
407 Py_ssize_t consumed = data->len;
408 PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
409 errors, &byteorder,
410 final ? NULL : &consumed);
411 return codec_tuple(decoded, consumed);
412}
413
414/*[clinic input]
415_codecs.utf_32_le_decode
416 data: Py_buffer
417 errors: str(accept={str, NoneType}) = None
418 final: bool(accept={int}) = False
419 /
420[clinic start generated code]*/
421
422static PyObject *
423_codecs_utf_32_le_decode_impl(PyObject *module, Py_buffer *data,
424 const char *errors, int final)
425/*[clinic end generated code: output=ec8f46b67a94f3e6 input=9078ec70acfe7613]*/
426{
427 int byteorder = -1;
428 /* This is overwritten unless final is true. */
429 Py_ssize_t consumed = data->len;
430 PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
431 errors, &byteorder,
432 final ? NULL : &consumed);
433 return codec_tuple(decoded, consumed);
434}
435
436/*[clinic input]
437_codecs.utf_32_be_decode
438 data: Py_buffer
439 errors: str(accept={str, NoneType}) = None
440 final: bool(accept={int}) = False
441 /
442[clinic start generated code]*/
443
444static PyObject *
445_codecs_utf_32_be_decode_impl(PyObject *module, Py_buffer *data,
446 const char *errors, int final)
447/*[clinic end generated code: output=ff82bae862c92c4e input=f1ae1bbbb86648ff]*/
448{
449 int byteorder = 1;
450 /* This is overwritten unless final is true. */
451 Py_ssize_t consumed = data->len;
452 PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
453 errors, &byteorder,
454 final ? NULL : &consumed);
455 return codec_tuple(decoded, consumed);
456}
457
458/* This non-standard version also provides access to the byteorder
459 parameter of the builtin UTF-32 codec.
460
461 It returns a tuple (unicode, bytesread, byteorder) with byteorder
462 being the value in effect at the end of data.
463
464*/
465/*[clinic input]
466_codecs.utf_32_ex_decode
467 data: Py_buffer
468 errors: str(accept={str, NoneType}) = None
469 byteorder: int = 0
470 final: bool(accept={int}) = False
471 /
472[clinic start generated code]*/
473
474static PyObject *
475_codecs_utf_32_ex_decode_impl(PyObject *module, Py_buffer *data,
476 const char *errors, int byteorder, int final)
477/*[clinic end generated code: output=6bfb177dceaf4848 input=e46a73bc859d0bd0]*/
478{
479 Py_ssize_t consumed = data->len;
480 PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
481 errors, &byteorder,
482 final ? NULL : &consumed);
483 if (decoded == NULL)
484 return NULL;
485 return Py_BuildValue("Nni", decoded, consumed, byteorder);
486}
487
488/*[clinic input]
489_codecs.unicode_escape_decode
490 data: Py_buffer(accept={str, buffer})
491 errors: str(accept={str, NoneType}) = None
492 final: bool(accept={int}) = True
493 /
494[clinic start generated code]*/
495
496static PyObject *
497_codecs_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
498 const char *errors, int final)
499/*[clinic end generated code: output=b284f97b12c635ee input=6154f039a9f7c639]*/
500{
501 Py_ssize_t consumed = data->len;
502 PyObject *decoded = _PyUnicode_DecodeUnicodeEscapeStateful(data->buf, data->len,
503 errors,
504 final ? NULL : &consumed);
505 return codec_tuple(decoded, consumed);
506}
507
508/*[clinic input]
509_codecs.raw_unicode_escape_decode
510 data: Py_buffer(accept={str, buffer})
511 errors: str(accept={str, NoneType}) = None
512 final: bool(accept={int}) = True
513 /
514[clinic start generated code]*/
515
516static PyObject *
517_codecs_raw_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
518 const char *errors, int final)
519/*[clinic end generated code: output=11dbd96301e2879e input=2d166191beb3235a]*/
520{
521 Py_ssize_t consumed = data->len;
522 PyObject *decoded = _PyUnicode_DecodeRawUnicodeEscapeStateful(data->buf, data->len,
523 errors,
524 final ? NULL : &consumed);
525 return codec_tuple(decoded, consumed);
526}
527
528/*[clinic input]
529_codecs.latin_1_decode
530 data: Py_buffer
531 errors: str(accept={str, NoneType}) = None
532 /
533[clinic start generated code]*/
534
535static PyObject *
536_codecs_latin_1_decode_impl(PyObject *module, Py_buffer *data,
537 const char *errors)
538/*[clinic end generated code: output=07f3dfa3f72c7d8f input=76ca58fd6dcd08c7]*/
539{
540 PyObject *decoded = PyUnicode_DecodeLatin1(data->buf, data->len, errors);
541 return codec_tuple(decoded, data->len);
542}
543
544/*[clinic input]
545_codecs.ascii_decode
546 data: Py_buffer
547 errors: str(accept={str, NoneType}) = None
548 /
549[clinic start generated code]*/
550
551static PyObject *
552_codecs_ascii_decode_impl(PyObject *module, Py_buffer *data,
553 const char *errors)
554/*[clinic end generated code: output=2627d72058d42429 input=e428a267a04b4481]*/
555{
556 PyObject *decoded = PyUnicode_DecodeASCII(data->buf, data->len, errors);
557 return codec_tuple(decoded, data->len);
558}
559
560/*[clinic input]
561_codecs.charmap_decode
562 data: Py_buffer
563 errors: str(accept={str, NoneType}) = None
564 mapping: object = None
565 /
566[clinic start generated code]*/
567
568static PyObject *
569_codecs_charmap_decode_impl(PyObject *module, Py_buffer *data,
570 const char *errors, PyObject *mapping)
571/*[clinic end generated code: output=2c335b09778cf895 input=15b69df43458eb40]*/
572{
573 PyObject *decoded;
574
575 if (mapping == Py_None)
576 mapping = NULL;
577
578 decoded = PyUnicode_DecodeCharmap(data->buf, data->len, mapping, errors);
579 return codec_tuple(decoded, data->len);
580}
581
582#ifdef MS_WINDOWS
583
584/*[clinic input]
585_codecs.mbcs_decode
586 data: Py_buffer
587 errors: str(accept={str, NoneType}) = None
588 final: bool(accept={int}) = False
589 /
590[clinic start generated code]*/
591
592static PyObject *
593_codecs_mbcs_decode_impl(PyObject *module, Py_buffer *data,
594 const char *errors, int final)
595/*[clinic end generated code: output=39b65b8598938c4b input=1c1d50f08fa53789]*/
596{
597 Py_ssize_t consumed = data->len;
598 PyObject *decoded = PyUnicode_DecodeMBCSStateful(data->buf, data->len,
599 errors, final ? NULL : &consumed);
600 return codec_tuple(decoded, consumed);
601}
602
603/*[clinic input]
604_codecs.oem_decode
605 data: Py_buffer
606 errors: str(accept={str, NoneType}) = None
607 final: bool(accept={int}) = False
608 /
609[clinic start generated code]*/
610
611static PyObject *
612_codecs_oem_decode_impl(PyObject *module, Py_buffer *data,
613 const char *errors, int final)
614/*[clinic end generated code: output=da1617612f3fcad8 input=81b67cba811022e5]*/
615{
616 Py_ssize_t consumed = data->len;
617 PyObject *decoded = PyUnicode_DecodeCodePageStateful(CP_OEMCP,
618 data->buf, data->len, errors, final ? NULL : &consumed);
619 return codec_tuple(decoded, consumed);
620}
621
622/*[clinic input]
623_codecs.code_page_decode
624 codepage: int
625 data: Py_buffer
626 errors: str(accept={str, NoneType}) = None
627 final: bool(accept={int}) = False
628 /
629[clinic start generated code]*/
630
631static PyObject *
632_codecs_code_page_decode_impl(PyObject *module, int codepage,
633 Py_buffer *data, const char *errors, int final)
634/*[clinic end generated code: output=53008ea967da3fff input=c5f58d036cb63575]*/
635{
636 Py_ssize_t consumed = data->len;
637 PyObject *decoded = PyUnicode_DecodeCodePageStateful(codepage,
638 data->buf, data->len,
639 errors,
640 final ? NULL : &consumed);
641 return codec_tuple(decoded, consumed);
642}
643
644#endif /* MS_WINDOWS */
645
646/* --- Encoder ------------------------------------------------------------ */
647
648/*[clinic input]
649_codecs.readbuffer_encode
650 data: Py_buffer(accept={str, buffer})
651 errors: str(accept={str, NoneType}) = None
652 /
653[clinic start generated code]*/
654
655static PyObject *
656_codecs_readbuffer_encode_impl(PyObject *module, Py_buffer *data,
657 const char *errors)
658/*[clinic end generated code: output=c645ea7cdb3d6e86 input=aa10cfdf252455c5]*/
659{
660 PyObject *result = PyBytes_FromStringAndSize(data->buf, data->len);
661 return codec_tuple(result, data->len);
662}
663
664/*[clinic input]
665_codecs.utf_7_encode
666 str: unicode
667 errors: str(accept={str, NoneType}) = None
668 /
669[clinic start generated code]*/
670
671static PyObject *
672_codecs_utf_7_encode_impl(PyObject *module, PyObject *str,
673 const char *errors)
674/*[clinic end generated code: output=0feda21ffc921bc8 input=2546dbbb3fa53114]*/
675{
676 return codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors),
677 PyUnicode_GET_LENGTH(str));
678}
679
680/*[clinic input]
681_codecs.utf_8_encode
682 str: unicode
683 errors: str(accept={str, NoneType}) = None
684 /
685[clinic start generated code]*/
686
687static PyObject *
688_codecs_utf_8_encode_impl(PyObject *module, PyObject *str,
689 const char *errors)
690/*[clinic end generated code: output=02bf47332b9c796c input=a3e71ae01c3f93f3]*/
691{
692 return codec_tuple(_PyUnicode_AsUTF8String(str, errors),
693 PyUnicode_GET_LENGTH(str));
694}
695
696/* This version provides access to the byteorder parameter of the
697 builtin UTF-16 codecs as optional third argument. It defaults to 0
698 which means: use the native byte order and prepend the data with a
699 BOM mark.
700
701*/
702
703/*[clinic input]
704_codecs.utf_16_encode
705 str: unicode
706 errors: str(accept={str, NoneType}) = None
707 byteorder: int = 0
708 /
709[clinic start generated code]*/
710
711static PyObject *
712_codecs_utf_16_encode_impl(PyObject *module, PyObject *str,
713 const char *errors, int byteorder)
714/*[clinic end generated code: output=c654e13efa2e64e4 input=68cdc2eb8338555d]*/
715{
716 return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, byteorder),
717 PyUnicode_GET_LENGTH(str));
718}
719
720/*[clinic input]
721_codecs.utf_16_le_encode
722 str: unicode
723 errors: str(accept={str, NoneType}) = None
724 /
725[clinic start generated code]*/
726
727static PyObject *
728_codecs_utf_16_le_encode_impl(PyObject *module, PyObject *str,
729 const char *errors)
730/*[clinic end generated code: output=431b01e55f2d4995 input=83d042706eed6798]*/
731{
732 return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, -1),
733 PyUnicode_GET_LENGTH(str));
734}
735
736/*[clinic input]
737_codecs.utf_16_be_encode
738 str: unicode
739 errors: str(accept={str, NoneType}) = None
740 /
741[clinic start generated code]*/
742
743static PyObject *
744_codecs_utf_16_be_encode_impl(PyObject *module, PyObject *str,
745 const char *errors)
746/*[clinic end generated code: output=96886a6fd54dcae3 input=6f1e9e623b03071b]*/
747{
748 return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, +1),
749 PyUnicode_GET_LENGTH(str));
750}
751
752/* This version provides access to the byteorder parameter of the
753 builtin UTF-32 codecs as optional third argument. It defaults to 0
754 which means: use the native byte order and prepend the data with a
755 BOM mark.
756
757*/
758
759/*[clinic input]
760_codecs.utf_32_encode
761 str: unicode
762 errors: str(accept={str, NoneType}) = None
763 byteorder: int = 0
764 /
765[clinic start generated code]*/
766
767static PyObject *
768_codecs_utf_32_encode_impl(PyObject *module, PyObject *str,
769 const char *errors, int byteorder)
770/*[clinic end generated code: output=5c760da0c09a8b83 input=8ec4c64d983bc52b]*/
771{
772 return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, byteorder),
773 PyUnicode_GET_LENGTH(str));
774}
775
776/*[clinic input]
777_codecs.utf_32_le_encode
778 str: unicode
779 errors: str(accept={str, NoneType}) = None
780 /
781[clinic start generated code]*/
782
783static PyObject *
784_codecs_utf_32_le_encode_impl(PyObject *module, PyObject *str,
785 const char *errors)
786/*[clinic end generated code: output=b65cd176de8e36d6 input=f0918d41de3eb1b1]*/
787{
788 return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, -1),
789 PyUnicode_GET_LENGTH(str));
790}
791
792/*[clinic input]
793_codecs.utf_32_be_encode
794 str: unicode
795 errors: str(accept={str, NoneType}) = None
796 /
797[clinic start generated code]*/
798
799static PyObject *
800_codecs_utf_32_be_encode_impl(PyObject *module, PyObject *str,
801 const char *errors)
802/*[clinic end generated code: output=1d9e71a9358709e9 input=967a99a95748b557]*/
803{
804 return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, +1),
805 PyUnicode_GET_LENGTH(str));
806}
807
808/*[clinic input]
809_codecs.unicode_escape_encode
810 str: unicode
811 errors: str(accept={str, NoneType}) = None
812 /
813[clinic start generated code]*/
814
815static PyObject *
816_codecs_unicode_escape_encode_impl(PyObject *module, PyObject *str,
817 const char *errors)
818/*[clinic end generated code: output=66271b30bc4f7a3c input=8c4de07597054e33]*/
819{
820 return codec_tuple(PyUnicode_AsUnicodeEscapeString(str),
821 PyUnicode_GET_LENGTH(str));
822}
823
824/*[clinic input]
825_codecs.raw_unicode_escape_encode
826 str: unicode
827 errors: str(accept={str, NoneType}) = None
828 /
829[clinic start generated code]*/
830
831static PyObject *
832_codecs_raw_unicode_escape_encode_impl(PyObject *module, PyObject *str,
833 const char *errors)
834/*[clinic end generated code: output=a66a806ed01c830a input=4aa6f280d78e4574]*/
835{
836 return codec_tuple(PyUnicode_AsRawUnicodeEscapeString(str),
837 PyUnicode_GET_LENGTH(str));
838}
839
840/*[clinic input]
841_codecs.latin_1_encode
842 str: unicode
843 errors: str(accept={str, NoneType}) = None
844 /
845[clinic start generated code]*/
846
847static PyObject *
848_codecs_latin_1_encode_impl(PyObject *module, PyObject *str,
849 const char *errors)
850/*[clinic end generated code: output=2c28c83a27884e08 input=ec3ef74bf85c5c5d]*/
851{
852 return codec_tuple(_PyUnicode_AsLatin1String(str, errors),
853 PyUnicode_GET_LENGTH(str));
854}
855
856/*[clinic input]
857_codecs.ascii_encode
858 str: unicode
859 errors: str(accept={str, NoneType}) = None
860 /
861[clinic start generated code]*/
862
863static PyObject *
864_codecs_ascii_encode_impl(PyObject *module, PyObject *str,
865 const char *errors)
866/*[clinic end generated code: output=b5e035182d33befc input=93e6e602838bd3de]*/
867{
868 return codec_tuple(_PyUnicode_AsASCIIString(str, errors),
869 PyUnicode_GET_LENGTH(str));
870}
871
872/*[clinic input]
873_codecs.charmap_encode
874 str: unicode
875 errors: str(accept={str, NoneType}) = None
876 mapping: object = None
877 /
878[clinic start generated code]*/
879
880static PyObject *
881_codecs_charmap_encode_impl(PyObject *module, PyObject *str,
882 const char *errors, PyObject *mapping)
883/*[clinic end generated code: output=047476f48495a9e9 input=2a98feae73dadce8]*/
884{
885 if (mapping == Py_None)
886 mapping = NULL;
887
888 return codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors),
889 PyUnicode_GET_LENGTH(str));
890}
891
892/*[clinic input]
893_codecs.charmap_build
894 map: unicode
895 /
896[clinic start generated code]*/
897
898static PyObject *
899_codecs_charmap_build_impl(PyObject *module, PyObject *map)
900/*[clinic end generated code: output=bb073c27031db9ac input=d91a91d1717dbc6d]*/
901{
902 return PyUnicode_BuildEncodingMap(map);
903}
904
905#ifdef MS_WINDOWS
906
907/*[clinic input]
908_codecs.mbcs_encode
909 str: unicode
910 errors: str(accept={str, NoneType}) = None
911 /
912[clinic start generated code]*/
913
914static PyObject *
915_codecs_mbcs_encode_impl(PyObject *module, PyObject *str, const char *errors)
916/*[clinic end generated code: output=76e2e170c966c080 input=2e932fc289ea5a5b]*/
917{
918 return codec_tuple(PyUnicode_EncodeCodePage(CP_ACP, str, errors),
919 PyUnicode_GET_LENGTH(str));
920}
921
922/*[clinic input]
923_codecs.oem_encode
924 str: unicode
925 errors: str(accept={str, NoneType}) = None
926 /
927[clinic start generated code]*/
928
929static PyObject *
930_codecs_oem_encode_impl(PyObject *module, PyObject *str, const char *errors)
931/*[clinic end generated code: output=65d5982c737de649 input=9eac86dc21eb14f2]*/
932{
933 return codec_tuple(PyUnicode_EncodeCodePage(CP_OEMCP, str, errors),
934 PyUnicode_GET_LENGTH(str));
935}
936
937/*[clinic input]
938_codecs.code_page_encode
939 code_page: int
940 str: unicode
941 errors: str(accept={str, NoneType}) = None
942 /
943[clinic start generated code]*/
944
945static PyObject *
946_codecs_code_page_encode_impl(PyObject *module, int code_page, PyObject *str,
947 const char *errors)
948/*[clinic end generated code: output=45673f6085657a9e input=7d18a33bc8cd0f94]*/
949{
950 return codec_tuple(PyUnicode_EncodeCodePage(code_page, str, errors),
951 PyUnicode_GET_LENGTH(str));
952}
953
954#endif /* MS_WINDOWS */
955
956/* --- Error handler registry --------------------------------------------- */
957
958/*[clinic input]
959_codecs.register_error
960 errors: str
961 handler: object
962 /
963
964Register the specified error handler under the name errors.
965
966handler must be a callable object, that will be called with an exception
967instance containing information about the location of the encoding/decoding
968error and must return a (replacement, new position) tuple.
969[clinic start generated code]*/
970
971static PyObject *
972_codecs_register_error_impl(PyObject *module, const char *errors,
973 PyObject *handler)
974/*[clinic end generated code: output=fa2f7d1879b3067d input=5e6709203c2e33fe]*/
975{
976 if (PyCodec_RegisterError(errors, handler))
977 return NULL;
978 Py_RETURN_NONE;
979}
980
981/*[clinic input]
982_codecs.lookup_error
983 name: str
984 /
985
986lookup_error(errors) -> handler
987
988Return the error handler for the specified error handling name or raise a
989LookupError, if no handler exists under this name.
990[clinic start generated code]*/
991
992static PyObject *
993_codecs_lookup_error_impl(PyObject *module, const char *name)
994/*[clinic end generated code: output=087f05dc0c9a98cc input=4775dd65e6235aba]*/
995{
996 return PyCodec_LookupError(name);
997}
998
999/* --- Module API --------------------------------------------------------- */
1000
1001static PyMethodDef _codecs_functions[] = {
1002 _CODECS_REGISTER_METHODDEF
1003 _CODECS_UNREGISTER_METHODDEF
1004 _CODECS_LOOKUP_METHODDEF
1005 _CODECS_ENCODE_METHODDEF
1006 _CODECS_DECODE_METHODDEF
1007 _CODECS_ESCAPE_ENCODE_METHODDEF
1008 _CODECS_ESCAPE_DECODE_METHODDEF
1009 _CODECS_UTF_8_ENCODE_METHODDEF
1010 _CODECS_UTF_8_DECODE_METHODDEF
1011 _CODECS_UTF_7_ENCODE_METHODDEF
1012 _CODECS_UTF_7_DECODE_METHODDEF
1013 _CODECS_UTF_16_ENCODE_METHODDEF
1014 _CODECS_UTF_16_LE_ENCODE_METHODDEF
1015 _CODECS_UTF_16_BE_ENCODE_METHODDEF
1016 _CODECS_UTF_16_DECODE_METHODDEF
1017 _CODECS_UTF_16_LE_DECODE_METHODDEF
1018 _CODECS_UTF_16_BE_DECODE_METHODDEF
1019 _CODECS_UTF_16_EX_DECODE_METHODDEF
1020 _CODECS_UTF_32_ENCODE_METHODDEF
1021 _CODECS_UTF_32_LE_ENCODE_METHODDEF
1022 _CODECS_UTF_32_BE_ENCODE_METHODDEF
1023 _CODECS_UTF_32_DECODE_METHODDEF
1024 _CODECS_UTF_32_LE_DECODE_METHODDEF
1025 _CODECS_UTF_32_BE_DECODE_METHODDEF
1026 _CODECS_UTF_32_EX_DECODE_METHODDEF
1027 _CODECS_UNICODE_ESCAPE_ENCODE_METHODDEF
1028 _CODECS_UNICODE_ESCAPE_DECODE_METHODDEF
1029 _CODECS_RAW_UNICODE_ESCAPE_ENCODE_METHODDEF
1030 _CODECS_RAW_UNICODE_ESCAPE_DECODE_METHODDEF
1031 _CODECS_LATIN_1_ENCODE_METHODDEF
1032 _CODECS_LATIN_1_DECODE_METHODDEF
1033 _CODECS_ASCII_ENCODE_METHODDEF
1034 _CODECS_ASCII_DECODE_METHODDEF
1035 _CODECS_CHARMAP_ENCODE_METHODDEF
1036 _CODECS_CHARMAP_DECODE_METHODDEF
1037 _CODECS_CHARMAP_BUILD_METHODDEF
1038 _CODECS_READBUFFER_ENCODE_METHODDEF
1039 _CODECS_MBCS_ENCODE_METHODDEF
1040 _CODECS_MBCS_DECODE_METHODDEF
1041 _CODECS_OEM_ENCODE_METHODDEF
1042 _CODECS_OEM_DECODE_METHODDEF
1043 _CODECS_CODE_PAGE_ENCODE_METHODDEF
1044 _CODECS_CODE_PAGE_DECODE_METHODDEF
1045 _CODECS_REGISTER_ERROR_METHODDEF
1046 _CODECS_LOOKUP_ERROR_METHODDEF
1047 {NULL, NULL} /* sentinel */
1048};
1049
1050static PyModuleDef_Slot _codecs_slots[] = {
1051 {0, NULL}
1052};
1053
1054static struct PyModuleDef codecsmodule = {
1055 PyModuleDef_HEAD_INIT,
1056 "_codecs",
1057 NULL,
1058 0,
1059 _codecs_functions,
1060 _codecs_slots,
1061 NULL,
1062 NULL,
1063 NULL
1064};
1065
1066PyMODINIT_FUNC
1067PyInit__codecs(void)
1068{
1069 return PyModuleDef_Init(&codecsmodule);
1070}
1071