1 | /* ------------------------------------------------------------------------ |
2 | |
3 | _codecs -- Provides access to the codec registry and the builtin |
4 | codecs. |
5 | |
6 | This module should never be imported directly. The standard library |
7 | module "codecs" wraps this builtin module for use within Python. |
8 | |
9 | The codec registry is accessible via: |
10 | |
11 | register(search_function) -> None |
12 | |
13 | lookup(encoding) -> CodecInfo object |
14 | |
15 | The builtin Unicode codecs use the following interface: |
16 | |
17 | <encoding>_encode(Unicode_object[,errors='strict']) -> |
18 | (string object, bytes consumed) |
19 | |
20 | <encoding>_decode(char_buffer_obj[,errors='strict']) -> |
21 | (Unicode object, bytes consumed) |
22 | |
23 | These <encoding>s are available: utf_8, unicode_escape, |
24 | raw_unicode_escape, latin_1, ascii (7-bit), mbcs (on win32). |
25 | |
26 | |
27 | Written by Marc-Andre Lemburg ([email protected]). |
28 | |
29 | Copyright (c) Corporation for National Research Initiatives. |
30 | |
31 | ------------------------------------------------------------------------ */ |
32 | |
33 | #define PY_SSIZE_T_CLEAN |
34 | #include "Python.h" |
35 | |
36 | #ifdef MS_WINDOWS |
37 | #include <windows.h> |
38 | #endif |
39 | |
40 | /*[clinic input] |
41 | module _codecs |
42 | [clinic start generated code]*/ |
43 | /*[clinic end generated code: output=da39a3ee5e6b4b0d input=e1390e3da3cb9deb]*/ |
44 | |
45 | #include "clinic/_codecsmodule.c.h" |
46 | |
47 | /* --- Registry ----------------------------------------------------------- */ |
48 | |
49 | /*[clinic input] |
50 | _codecs.register |
51 | search_function: object |
52 | / |
53 | |
54 | Register a codec search function. |
55 | |
56 | Search functions are expected to take one argument, the encoding name in |
57 | all lower case letters, and either return None, or a tuple of functions |
58 | (encoder, decoder, stream_reader, stream_writer) (or a CodecInfo object). |
59 | [clinic start generated code]*/ |
60 | |
61 | static PyObject * |
62 | _codecs_register(PyObject *module, PyObject *search_function) |
63 | /*[clinic end generated code: output=d1bf21e99db7d6d3 input=369578467955cae4]*/ |
64 | { |
65 | if (PyCodec_Register(search_function)) |
66 | return NULL; |
67 | |
68 | Py_RETURN_NONE; |
69 | } |
70 | |
71 | /*[clinic input] |
72 | _codecs.unregister |
73 | search_function: object |
74 | / |
75 | |
76 | Unregister a codec search function and clear the registry's cache. |
77 | |
78 | If the search function is not registered, do nothing. |
79 | [clinic start generated code]*/ |
80 | |
81 | static PyObject * |
82 | _codecs_unregister(PyObject *module, PyObject *search_function) |
83 | /*[clinic end generated code: output=1f0edee9cf246399 input=dd7c004c652d345e]*/ |
84 | { |
85 | if (PyCodec_Unregister(search_function) < 0) { |
86 | return NULL; |
87 | } |
88 | |
89 | Py_RETURN_NONE; |
90 | } |
91 | |
92 | /*[clinic input] |
93 | _codecs.lookup |
94 | encoding: str |
95 | / |
96 | |
97 | Looks up a codec tuple in the Python codec registry and returns a CodecInfo object. |
98 | [clinic start generated code]*/ |
99 | |
100 | static PyObject * |
101 | _codecs_lookup_impl(PyObject *module, const char *encoding) |
102 | /*[clinic end generated code: output=9f0afa572080c36d input=3c572c0db3febe9c]*/ |
103 | { |
104 | return _PyCodec_Lookup(encoding); |
105 | } |
106 | |
107 | /*[clinic input] |
108 | _codecs.encode |
109 | obj: object |
110 | encoding: str(c_default="NULL") = "utf-8" |
111 | errors: str(c_default="NULL") = "strict" |
112 | |
113 | Encodes obj using the codec registered for encoding. |
114 | |
115 | The default encoding is 'utf-8'. errors may be given to set a |
116 | different error handling scheme. Default is 'strict' meaning that encoding |
117 | errors raise a ValueError. Other possible values are 'ignore', 'replace' |
118 | and 'backslashreplace' as well as any other name registered with |
119 | codecs.register_error that can handle ValueErrors. |
120 | [clinic start generated code]*/ |
121 | |
122 | static PyObject * |
123 | _codecs_encode_impl(PyObject *module, PyObject *obj, const char *encoding, |
124 | const char *errors) |
125 | /*[clinic end generated code: output=385148eb9a067c86 input=cd5b685040ff61f0]*/ |
126 | { |
127 | if (encoding == NULL) |
128 | encoding = PyUnicode_GetDefaultEncoding(); |
129 | |
130 | /* Encode via the codec registry */ |
131 | return PyCodec_Encode(obj, encoding, errors); |
132 | } |
133 | |
134 | /*[clinic input] |
135 | _codecs.decode |
136 | obj: object |
137 | encoding: str(c_default="NULL") = "utf-8" |
138 | errors: str(c_default="NULL") = "strict" |
139 | |
140 | Decodes obj using the codec registered for encoding. |
141 | |
142 | Default encoding is 'utf-8'. errors may be given to set a |
143 | different error handling scheme. Default is 'strict' meaning that encoding |
144 | errors raise a ValueError. Other possible values are 'ignore', 'replace' |
145 | and 'backslashreplace' as well as any other name registered with |
146 | codecs.register_error that can handle ValueErrors. |
147 | [clinic start generated code]*/ |
148 | |
149 | static PyObject * |
150 | _codecs_decode_impl(PyObject *module, PyObject *obj, const char *encoding, |
151 | const char *errors) |
152 | /*[clinic end generated code: output=679882417dc3a0bd input=7702c0cc2fa1add6]*/ |
153 | { |
154 | if (encoding == NULL) |
155 | encoding = PyUnicode_GetDefaultEncoding(); |
156 | |
157 | /* Decode via the codec registry */ |
158 | return PyCodec_Decode(obj, encoding, errors); |
159 | } |
160 | |
161 | /* --- Helpers ------------------------------------------------------------ */ |
162 | |
163 | static |
164 | PyObject *codec_tuple(PyObject *decoded, |
165 | Py_ssize_t len) |
166 | { |
167 | if (decoded == NULL) |
168 | return NULL; |
169 | return Py_BuildValue("Nn" , decoded, len); |
170 | } |
171 | |
172 | /* --- String codecs ------------------------------------------------------ */ |
173 | /*[clinic input] |
174 | _codecs.escape_decode |
175 | data: Py_buffer(accept={str, buffer}) |
176 | errors: str(accept={str, NoneType}) = None |
177 | / |
178 | [clinic start generated code]*/ |
179 | |
180 | static PyObject * |
181 | _codecs_escape_decode_impl(PyObject *module, Py_buffer *data, |
182 | const char *errors) |
183 | /*[clinic end generated code: output=505200ba8056979a input=77298a561c90bd82]*/ |
184 | { |
185 | PyObject *decoded = PyBytes_DecodeEscape(data->buf, data->len, |
186 | errors, 0, NULL); |
187 | return codec_tuple(decoded, data->len); |
188 | } |
189 | |
190 | /*[clinic input] |
191 | _codecs.escape_encode |
192 | data: object(subclass_of='&PyBytes_Type') |
193 | errors: str(accept={str, NoneType}) = None |
194 | / |
195 | [clinic start generated code]*/ |
196 | |
197 | static PyObject * |
198 | _codecs_escape_encode_impl(PyObject *module, PyObject *data, |
199 | const char *errors) |
200 | /*[clinic end generated code: output=4af1d477834bab34 input=8f4b144799a94245]*/ |
201 | { |
202 | Py_ssize_t size; |
203 | Py_ssize_t newsize; |
204 | PyObject *v; |
205 | |
206 | size = PyBytes_GET_SIZE(data); |
207 | if (size > PY_SSIZE_T_MAX / 4) { |
208 | PyErr_SetString(PyExc_OverflowError, |
209 | "string is too large to encode" ); |
210 | return NULL; |
211 | } |
212 | newsize = 4*size; |
213 | v = PyBytes_FromStringAndSize(NULL, newsize); |
214 | |
215 | if (v == NULL) { |
216 | return NULL; |
217 | } |
218 | else { |
219 | Py_ssize_t i; |
220 | char c; |
221 | char *p = PyBytes_AS_STRING(v); |
222 | |
223 | for (i = 0; i < size; i++) { |
224 | /* There's at least enough room for a hex escape */ |
225 | assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4); |
226 | c = PyBytes_AS_STRING(data)[i]; |
227 | if (c == '\'' || c == '\\') |
228 | *p++ = '\\', *p++ = c; |
229 | else if (c == '\t') |
230 | *p++ = '\\', *p++ = 't'; |
231 | else if (c == '\n') |
232 | *p++ = '\\', *p++ = 'n'; |
233 | else if (c == '\r') |
234 | *p++ = '\\', *p++ = 'r'; |
235 | else if (c < ' ' || c >= 0x7f) { |
236 | *p++ = '\\'; |
237 | *p++ = 'x'; |
238 | *p++ = Py_hexdigits[(c & 0xf0) >> 4]; |
239 | *p++ = Py_hexdigits[c & 0xf]; |
240 | } |
241 | else |
242 | *p++ = c; |
243 | } |
244 | *p = '\0'; |
245 | if (_PyBytes_Resize(&v, (p - PyBytes_AS_STRING(v)))) { |
246 | return NULL; |
247 | } |
248 | } |
249 | |
250 | return codec_tuple(v, size); |
251 | } |
252 | |
253 | /* --- Decoder ------------------------------------------------------------ */ |
254 | /*[clinic input] |
255 | _codecs.utf_7_decode |
256 | data: Py_buffer |
257 | errors: str(accept={str, NoneType}) = None |
258 | final: bool(accept={int}) = False |
259 | / |
260 | [clinic start generated code]*/ |
261 | |
262 | static PyObject * |
263 | _codecs_utf_7_decode_impl(PyObject *module, Py_buffer *data, |
264 | const char *errors, int final) |
265 | /*[clinic end generated code: output=0cd3a944a32a4089 input=22c395d357815d26]*/ |
266 | { |
267 | Py_ssize_t consumed = data->len; |
268 | PyObject *decoded = PyUnicode_DecodeUTF7Stateful(data->buf, data->len, |
269 | errors, |
270 | final ? NULL : &consumed); |
271 | return codec_tuple(decoded, consumed); |
272 | } |
273 | |
274 | /*[clinic input] |
275 | _codecs.utf_8_decode |
276 | data: Py_buffer |
277 | errors: str(accept={str, NoneType}) = None |
278 | final: bool(accept={int}) = False |
279 | / |
280 | [clinic start generated code]*/ |
281 | |
282 | static PyObject * |
283 | _codecs_utf_8_decode_impl(PyObject *module, Py_buffer *data, |
284 | const char *errors, int final) |
285 | /*[clinic end generated code: output=10f74dec8d9bb8bf input=f611b3867352ba59]*/ |
286 | { |
287 | Py_ssize_t consumed = data->len; |
288 | PyObject *decoded = PyUnicode_DecodeUTF8Stateful(data->buf, data->len, |
289 | errors, |
290 | final ? NULL : &consumed); |
291 | return codec_tuple(decoded, consumed); |
292 | } |
293 | |
294 | /*[clinic input] |
295 | _codecs.utf_16_decode |
296 | data: Py_buffer |
297 | errors: str(accept={str, NoneType}) = None |
298 | final: bool(accept={int}) = False |
299 | / |
300 | [clinic start generated code]*/ |
301 | |
302 | static PyObject * |
303 | _codecs_utf_16_decode_impl(PyObject *module, Py_buffer *data, |
304 | const char *errors, int final) |
305 | /*[clinic end generated code: output=783b442abcbcc2d0 input=191d360bd7309180]*/ |
306 | { |
307 | int byteorder = 0; |
308 | /* This is overwritten unless final is true. */ |
309 | Py_ssize_t consumed = data->len; |
310 | PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len, |
311 | errors, &byteorder, |
312 | final ? NULL : &consumed); |
313 | return codec_tuple(decoded, consumed); |
314 | } |
315 | |
316 | /*[clinic input] |
317 | _codecs.utf_16_le_decode |
318 | data: Py_buffer |
319 | errors: str(accept={str, NoneType}) = None |
320 | final: bool(accept={int}) = False |
321 | / |
322 | [clinic start generated code]*/ |
323 | |
324 | static PyObject * |
325 | _codecs_utf_16_le_decode_impl(PyObject *module, Py_buffer *data, |
326 | const char *errors, int final) |
327 | /*[clinic end generated code: output=899b9e6364379dcd input=c6904fdc27fb4724]*/ |
328 | { |
329 | int byteorder = -1; |
330 | /* This is overwritten unless final is true. */ |
331 | Py_ssize_t consumed = data->len; |
332 | PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len, |
333 | errors, &byteorder, |
334 | final ? NULL : &consumed); |
335 | return codec_tuple(decoded, consumed); |
336 | } |
337 | |
338 | /*[clinic input] |
339 | _codecs.utf_16_be_decode |
340 | data: Py_buffer |
341 | errors: str(accept={str, NoneType}) = None |
342 | final: bool(accept={int}) = False |
343 | / |
344 | [clinic start generated code]*/ |
345 | |
346 | static PyObject * |
347 | _codecs_utf_16_be_decode_impl(PyObject *module, Py_buffer *data, |
348 | const char *errors, int final) |
349 | /*[clinic end generated code: output=49f6465ea07669c8 input=e49012400974649b]*/ |
350 | { |
351 | int byteorder = 1; |
352 | /* This is overwritten unless final is true. */ |
353 | Py_ssize_t consumed = data->len; |
354 | PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len, |
355 | errors, &byteorder, |
356 | final ? NULL : &consumed); |
357 | return codec_tuple(decoded, consumed); |
358 | } |
359 | |
360 | /* This non-standard version also provides access to the byteorder |
361 | parameter of the builtin UTF-16 codec. |
362 | |
363 | It returns a tuple (unicode, bytesread, byteorder) with byteorder |
364 | being the value in effect at the end of data. |
365 | |
366 | */ |
367 | /*[clinic input] |
368 | _codecs.utf_16_ex_decode |
369 | data: Py_buffer |
370 | errors: str(accept={str, NoneType}) = None |
371 | byteorder: int = 0 |
372 | final: bool(accept={int}) = False |
373 | / |
374 | [clinic start generated code]*/ |
375 | |
376 | static PyObject * |
377 | _codecs_utf_16_ex_decode_impl(PyObject *module, Py_buffer *data, |
378 | const char *errors, int byteorder, int final) |
379 | /*[clinic end generated code: output=0f385f251ecc1988 input=5a9c19f2e6b6cf0e]*/ |
380 | { |
381 | /* This is overwritten unless final is true. */ |
382 | Py_ssize_t consumed = data->len; |
383 | |
384 | PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len, |
385 | errors, &byteorder, |
386 | final ? NULL : &consumed); |
387 | if (decoded == NULL) |
388 | return NULL; |
389 | return Py_BuildValue("Nni" , decoded, consumed, byteorder); |
390 | } |
391 | |
392 | /*[clinic input] |
393 | _codecs.utf_32_decode |
394 | data: Py_buffer |
395 | errors: str(accept={str, NoneType}) = None |
396 | final: bool(accept={int}) = False |
397 | / |
398 | [clinic start generated code]*/ |
399 | |
400 | static PyObject * |
401 | _codecs_utf_32_decode_impl(PyObject *module, Py_buffer *data, |
402 | const char *errors, int final) |
403 | /*[clinic end generated code: output=2fc961807f7b145f input=fd7193965627eb58]*/ |
404 | { |
405 | int byteorder = 0; |
406 | /* This is overwritten unless final is true. */ |
407 | Py_ssize_t consumed = data->len; |
408 | PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len, |
409 | errors, &byteorder, |
410 | final ? NULL : &consumed); |
411 | return codec_tuple(decoded, consumed); |
412 | } |
413 | |
414 | /*[clinic input] |
415 | _codecs.utf_32_le_decode |
416 | data: Py_buffer |
417 | errors: str(accept={str, NoneType}) = None |
418 | final: bool(accept={int}) = False |
419 | / |
420 | [clinic start generated code]*/ |
421 | |
422 | static PyObject * |
423 | _codecs_utf_32_le_decode_impl(PyObject *module, Py_buffer *data, |
424 | const char *errors, int final) |
425 | /*[clinic end generated code: output=ec8f46b67a94f3e6 input=9078ec70acfe7613]*/ |
426 | { |
427 | int byteorder = -1; |
428 | /* This is overwritten unless final is true. */ |
429 | Py_ssize_t consumed = data->len; |
430 | PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len, |
431 | errors, &byteorder, |
432 | final ? NULL : &consumed); |
433 | return codec_tuple(decoded, consumed); |
434 | } |
435 | |
436 | /*[clinic input] |
437 | _codecs.utf_32_be_decode |
438 | data: Py_buffer |
439 | errors: str(accept={str, NoneType}) = None |
440 | final: bool(accept={int}) = False |
441 | / |
442 | [clinic start generated code]*/ |
443 | |
444 | static PyObject * |
445 | _codecs_utf_32_be_decode_impl(PyObject *module, Py_buffer *data, |
446 | const char *errors, int final) |
447 | /*[clinic end generated code: output=ff82bae862c92c4e input=f1ae1bbbb86648ff]*/ |
448 | { |
449 | int byteorder = 1; |
450 | /* This is overwritten unless final is true. */ |
451 | Py_ssize_t consumed = data->len; |
452 | PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len, |
453 | errors, &byteorder, |
454 | final ? NULL : &consumed); |
455 | return codec_tuple(decoded, consumed); |
456 | } |
457 | |
458 | /* This non-standard version also provides access to the byteorder |
459 | parameter of the builtin UTF-32 codec. |
460 | |
461 | It returns a tuple (unicode, bytesread, byteorder) with byteorder |
462 | being the value in effect at the end of data. |
463 | |
464 | */ |
465 | /*[clinic input] |
466 | _codecs.utf_32_ex_decode |
467 | data: Py_buffer |
468 | errors: str(accept={str, NoneType}) = None |
469 | byteorder: int = 0 |
470 | final: bool(accept={int}) = False |
471 | / |
472 | [clinic start generated code]*/ |
473 | |
474 | static PyObject * |
475 | _codecs_utf_32_ex_decode_impl(PyObject *module, Py_buffer *data, |
476 | const char *errors, int byteorder, int final) |
477 | /*[clinic end generated code: output=6bfb177dceaf4848 input=e46a73bc859d0bd0]*/ |
478 | { |
479 | Py_ssize_t consumed = data->len; |
480 | PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len, |
481 | errors, &byteorder, |
482 | final ? NULL : &consumed); |
483 | if (decoded == NULL) |
484 | return NULL; |
485 | return Py_BuildValue("Nni" , decoded, consumed, byteorder); |
486 | } |
487 | |
488 | /*[clinic input] |
489 | _codecs.unicode_escape_decode |
490 | data: Py_buffer(accept={str, buffer}) |
491 | errors: str(accept={str, NoneType}) = None |
492 | final: bool(accept={int}) = True |
493 | / |
494 | [clinic start generated code]*/ |
495 | |
496 | static PyObject * |
497 | _codecs_unicode_escape_decode_impl(PyObject *module, Py_buffer *data, |
498 | const char *errors, int final) |
499 | /*[clinic end generated code: output=b284f97b12c635ee input=6154f039a9f7c639]*/ |
500 | { |
501 | Py_ssize_t consumed = data->len; |
502 | PyObject *decoded = _PyUnicode_DecodeUnicodeEscapeStateful(data->buf, data->len, |
503 | errors, |
504 | final ? NULL : &consumed); |
505 | return codec_tuple(decoded, consumed); |
506 | } |
507 | |
508 | /*[clinic input] |
509 | _codecs.raw_unicode_escape_decode |
510 | data: Py_buffer(accept={str, buffer}) |
511 | errors: str(accept={str, NoneType}) = None |
512 | final: bool(accept={int}) = True |
513 | / |
514 | [clinic start generated code]*/ |
515 | |
516 | static PyObject * |
517 | _codecs_raw_unicode_escape_decode_impl(PyObject *module, Py_buffer *data, |
518 | const char *errors, int final) |
519 | /*[clinic end generated code: output=11dbd96301e2879e input=2d166191beb3235a]*/ |
520 | { |
521 | Py_ssize_t consumed = data->len; |
522 | PyObject *decoded = _PyUnicode_DecodeRawUnicodeEscapeStateful(data->buf, data->len, |
523 | errors, |
524 | final ? NULL : &consumed); |
525 | return codec_tuple(decoded, consumed); |
526 | } |
527 | |
528 | /*[clinic input] |
529 | _codecs.latin_1_decode |
530 | data: Py_buffer |
531 | errors: str(accept={str, NoneType}) = None |
532 | / |
533 | [clinic start generated code]*/ |
534 | |
535 | static PyObject * |
536 | _codecs_latin_1_decode_impl(PyObject *module, Py_buffer *data, |
537 | const char *errors) |
538 | /*[clinic end generated code: output=07f3dfa3f72c7d8f input=76ca58fd6dcd08c7]*/ |
539 | { |
540 | PyObject *decoded = PyUnicode_DecodeLatin1(data->buf, data->len, errors); |
541 | return codec_tuple(decoded, data->len); |
542 | } |
543 | |
544 | /*[clinic input] |
545 | _codecs.ascii_decode |
546 | data: Py_buffer |
547 | errors: str(accept={str, NoneType}) = None |
548 | / |
549 | [clinic start generated code]*/ |
550 | |
551 | static PyObject * |
552 | _codecs_ascii_decode_impl(PyObject *module, Py_buffer *data, |
553 | const char *errors) |
554 | /*[clinic end generated code: output=2627d72058d42429 input=e428a267a04b4481]*/ |
555 | { |
556 | PyObject *decoded = PyUnicode_DecodeASCII(data->buf, data->len, errors); |
557 | return codec_tuple(decoded, data->len); |
558 | } |
559 | |
560 | /*[clinic input] |
561 | _codecs.charmap_decode |
562 | data: Py_buffer |
563 | errors: str(accept={str, NoneType}) = None |
564 | mapping: object = None |
565 | / |
566 | [clinic start generated code]*/ |
567 | |
568 | static PyObject * |
569 | _codecs_charmap_decode_impl(PyObject *module, Py_buffer *data, |
570 | const char *errors, PyObject *mapping) |
571 | /*[clinic end generated code: output=2c335b09778cf895 input=15b69df43458eb40]*/ |
572 | { |
573 | PyObject *decoded; |
574 | |
575 | if (mapping == Py_None) |
576 | mapping = NULL; |
577 | |
578 | decoded = PyUnicode_DecodeCharmap(data->buf, data->len, mapping, errors); |
579 | return codec_tuple(decoded, data->len); |
580 | } |
581 | |
582 | #ifdef MS_WINDOWS |
583 | |
584 | /*[clinic input] |
585 | _codecs.mbcs_decode |
586 | data: Py_buffer |
587 | errors: str(accept={str, NoneType}) = None |
588 | final: bool(accept={int}) = False |
589 | / |
590 | [clinic start generated code]*/ |
591 | |
592 | static PyObject * |
593 | _codecs_mbcs_decode_impl(PyObject *module, Py_buffer *data, |
594 | const char *errors, int final) |
595 | /*[clinic end generated code: output=39b65b8598938c4b input=1c1d50f08fa53789]*/ |
596 | { |
597 | Py_ssize_t consumed = data->len; |
598 | PyObject *decoded = PyUnicode_DecodeMBCSStateful(data->buf, data->len, |
599 | errors, final ? NULL : &consumed); |
600 | return codec_tuple(decoded, consumed); |
601 | } |
602 | |
603 | /*[clinic input] |
604 | _codecs.oem_decode |
605 | data: Py_buffer |
606 | errors: str(accept={str, NoneType}) = None |
607 | final: bool(accept={int}) = False |
608 | / |
609 | [clinic start generated code]*/ |
610 | |
611 | static PyObject * |
612 | _codecs_oem_decode_impl(PyObject *module, Py_buffer *data, |
613 | const char *errors, int final) |
614 | /*[clinic end generated code: output=da1617612f3fcad8 input=81b67cba811022e5]*/ |
615 | { |
616 | Py_ssize_t consumed = data->len; |
617 | PyObject *decoded = PyUnicode_DecodeCodePageStateful(CP_OEMCP, |
618 | data->buf, data->len, errors, final ? NULL : &consumed); |
619 | return codec_tuple(decoded, consumed); |
620 | } |
621 | |
622 | /*[clinic input] |
623 | _codecs.code_page_decode |
624 | codepage: int |
625 | data: Py_buffer |
626 | errors: str(accept={str, NoneType}) = None |
627 | final: bool(accept={int}) = False |
628 | / |
629 | [clinic start generated code]*/ |
630 | |
631 | static PyObject * |
632 | _codecs_code_page_decode_impl(PyObject *module, int codepage, |
633 | Py_buffer *data, const char *errors, int final) |
634 | /*[clinic end generated code: output=53008ea967da3fff input=c5f58d036cb63575]*/ |
635 | { |
636 | Py_ssize_t consumed = data->len; |
637 | PyObject *decoded = PyUnicode_DecodeCodePageStateful(codepage, |
638 | data->buf, data->len, |
639 | errors, |
640 | final ? NULL : &consumed); |
641 | return codec_tuple(decoded, consumed); |
642 | } |
643 | |
644 | #endif /* MS_WINDOWS */ |
645 | |
646 | /* --- Encoder ------------------------------------------------------------ */ |
647 | |
648 | /*[clinic input] |
649 | _codecs.readbuffer_encode |
650 | data: Py_buffer(accept={str, buffer}) |
651 | errors: str(accept={str, NoneType}) = None |
652 | / |
653 | [clinic start generated code]*/ |
654 | |
655 | static PyObject * |
656 | _codecs_readbuffer_encode_impl(PyObject *module, Py_buffer *data, |
657 | const char *errors) |
658 | /*[clinic end generated code: output=c645ea7cdb3d6e86 input=aa10cfdf252455c5]*/ |
659 | { |
660 | PyObject *result = PyBytes_FromStringAndSize(data->buf, data->len); |
661 | return codec_tuple(result, data->len); |
662 | } |
663 | |
664 | /*[clinic input] |
665 | _codecs.utf_7_encode |
666 | str: unicode |
667 | errors: str(accept={str, NoneType}) = None |
668 | / |
669 | [clinic start generated code]*/ |
670 | |
671 | static PyObject * |
672 | _codecs_utf_7_encode_impl(PyObject *module, PyObject *str, |
673 | const char *errors) |
674 | /*[clinic end generated code: output=0feda21ffc921bc8 input=2546dbbb3fa53114]*/ |
675 | { |
676 | return codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors), |
677 | PyUnicode_GET_LENGTH(str)); |
678 | } |
679 | |
680 | /*[clinic input] |
681 | _codecs.utf_8_encode |
682 | str: unicode |
683 | errors: str(accept={str, NoneType}) = None |
684 | / |
685 | [clinic start generated code]*/ |
686 | |
687 | static PyObject * |
688 | _codecs_utf_8_encode_impl(PyObject *module, PyObject *str, |
689 | const char *errors) |
690 | /*[clinic end generated code: output=02bf47332b9c796c input=a3e71ae01c3f93f3]*/ |
691 | { |
692 | return codec_tuple(_PyUnicode_AsUTF8String(str, errors), |
693 | PyUnicode_GET_LENGTH(str)); |
694 | } |
695 | |
696 | /* This version provides access to the byteorder parameter of the |
697 | builtin UTF-16 codecs as optional third argument. It defaults to 0 |
698 | which means: use the native byte order and prepend the data with a |
699 | BOM mark. |
700 | |
701 | */ |
702 | |
703 | /*[clinic input] |
704 | _codecs.utf_16_encode |
705 | str: unicode |
706 | errors: str(accept={str, NoneType}) = None |
707 | byteorder: int = 0 |
708 | / |
709 | [clinic start generated code]*/ |
710 | |
711 | static PyObject * |
712 | _codecs_utf_16_encode_impl(PyObject *module, PyObject *str, |
713 | const char *errors, int byteorder) |
714 | /*[clinic end generated code: output=c654e13efa2e64e4 input=68cdc2eb8338555d]*/ |
715 | { |
716 | return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, byteorder), |
717 | PyUnicode_GET_LENGTH(str)); |
718 | } |
719 | |
720 | /*[clinic input] |
721 | _codecs.utf_16_le_encode |
722 | str: unicode |
723 | errors: str(accept={str, NoneType}) = None |
724 | / |
725 | [clinic start generated code]*/ |
726 | |
727 | static PyObject * |
728 | _codecs_utf_16_le_encode_impl(PyObject *module, PyObject *str, |
729 | const char *errors) |
730 | /*[clinic end generated code: output=431b01e55f2d4995 input=83d042706eed6798]*/ |
731 | { |
732 | return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, -1), |
733 | PyUnicode_GET_LENGTH(str)); |
734 | } |
735 | |
736 | /*[clinic input] |
737 | _codecs.utf_16_be_encode |
738 | str: unicode |
739 | errors: str(accept={str, NoneType}) = None |
740 | / |
741 | [clinic start generated code]*/ |
742 | |
743 | static PyObject * |
744 | _codecs_utf_16_be_encode_impl(PyObject *module, PyObject *str, |
745 | const char *errors) |
746 | /*[clinic end generated code: output=96886a6fd54dcae3 input=6f1e9e623b03071b]*/ |
747 | { |
748 | return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, +1), |
749 | PyUnicode_GET_LENGTH(str)); |
750 | } |
751 | |
752 | /* This version provides access to the byteorder parameter of the |
753 | builtin UTF-32 codecs as optional third argument. It defaults to 0 |
754 | which means: use the native byte order and prepend the data with a |
755 | BOM mark. |
756 | |
757 | */ |
758 | |
759 | /*[clinic input] |
760 | _codecs.utf_32_encode |
761 | str: unicode |
762 | errors: str(accept={str, NoneType}) = None |
763 | byteorder: int = 0 |
764 | / |
765 | [clinic start generated code]*/ |
766 | |
767 | static PyObject * |
768 | _codecs_utf_32_encode_impl(PyObject *module, PyObject *str, |
769 | const char *errors, int byteorder) |
770 | /*[clinic end generated code: output=5c760da0c09a8b83 input=8ec4c64d983bc52b]*/ |
771 | { |
772 | return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, byteorder), |
773 | PyUnicode_GET_LENGTH(str)); |
774 | } |
775 | |
776 | /*[clinic input] |
777 | _codecs.utf_32_le_encode |
778 | str: unicode |
779 | errors: str(accept={str, NoneType}) = None |
780 | / |
781 | [clinic start generated code]*/ |
782 | |
783 | static PyObject * |
784 | _codecs_utf_32_le_encode_impl(PyObject *module, PyObject *str, |
785 | const char *errors) |
786 | /*[clinic end generated code: output=b65cd176de8e36d6 input=f0918d41de3eb1b1]*/ |
787 | { |
788 | return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, -1), |
789 | PyUnicode_GET_LENGTH(str)); |
790 | } |
791 | |
792 | /*[clinic input] |
793 | _codecs.utf_32_be_encode |
794 | str: unicode |
795 | errors: str(accept={str, NoneType}) = None |
796 | / |
797 | [clinic start generated code]*/ |
798 | |
799 | static PyObject * |
800 | _codecs_utf_32_be_encode_impl(PyObject *module, PyObject *str, |
801 | const char *errors) |
802 | /*[clinic end generated code: output=1d9e71a9358709e9 input=967a99a95748b557]*/ |
803 | { |
804 | return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, +1), |
805 | PyUnicode_GET_LENGTH(str)); |
806 | } |
807 | |
808 | /*[clinic input] |
809 | _codecs.unicode_escape_encode |
810 | str: unicode |
811 | errors: str(accept={str, NoneType}) = None |
812 | / |
813 | [clinic start generated code]*/ |
814 | |
815 | static PyObject * |
816 | _codecs_unicode_escape_encode_impl(PyObject *module, PyObject *str, |
817 | const char *errors) |
818 | /*[clinic end generated code: output=66271b30bc4f7a3c input=8c4de07597054e33]*/ |
819 | { |
820 | return codec_tuple(PyUnicode_AsUnicodeEscapeString(str), |
821 | PyUnicode_GET_LENGTH(str)); |
822 | } |
823 | |
824 | /*[clinic input] |
825 | _codecs.raw_unicode_escape_encode |
826 | str: unicode |
827 | errors: str(accept={str, NoneType}) = None |
828 | / |
829 | [clinic start generated code]*/ |
830 | |
831 | static PyObject * |
832 | _codecs_raw_unicode_escape_encode_impl(PyObject *module, PyObject *str, |
833 | const char *errors) |
834 | /*[clinic end generated code: output=a66a806ed01c830a input=4aa6f280d78e4574]*/ |
835 | { |
836 | return codec_tuple(PyUnicode_AsRawUnicodeEscapeString(str), |
837 | PyUnicode_GET_LENGTH(str)); |
838 | } |
839 | |
840 | /*[clinic input] |
841 | _codecs.latin_1_encode |
842 | str: unicode |
843 | errors: str(accept={str, NoneType}) = None |
844 | / |
845 | [clinic start generated code]*/ |
846 | |
847 | static PyObject * |
848 | _codecs_latin_1_encode_impl(PyObject *module, PyObject *str, |
849 | const char *errors) |
850 | /*[clinic end generated code: output=2c28c83a27884e08 input=ec3ef74bf85c5c5d]*/ |
851 | { |
852 | return codec_tuple(_PyUnicode_AsLatin1String(str, errors), |
853 | PyUnicode_GET_LENGTH(str)); |
854 | } |
855 | |
856 | /*[clinic input] |
857 | _codecs.ascii_encode |
858 | str: unicode |
859 | errors: str(accept={str, NoneType}) = None |
860 | / |
861 | [clinic start generated code]*/ |
862 | |
863 | static PyObject * |
864 | _codecs_ascii_encode_impl(PyObject *module, PyObject *str, |
865 | const char *errors) |
866 | /*[clinic end generated code: output=b5e035182d33befc input=93e6e602838bd3de]*/ |
867 | { |
868 | return codec_tuple(_PyUnicode_AsASCIIString(str, errors), |
869 | PyUnicode_GET_LENGTH(str)); |
870 | } |
871 | |
872 | /*[clinic input] |
873 | _codecs.charmap_encode |
874 | str: unicode |
875 | errors: str(accept={str, NoneType}) = None |
876 | mapping: object = None |
877 | / |
878 | [clinic start generated code]*/ |
879 | |
880 | static PyObject * |
881 | _codecs_charmap_encode_impl(PyObject *module, PyObject *str, |
882 | const char *errors, PyObject *mapping) |
883 | /*[clinic end generated code: output=047476f48495a9e9 input=2a98feae73dadce8]*/ |
884 | { |
885 | if (mapping == Py_None) |
886 | mapping = NULL; |
887 | |
888 | return codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors), |
889 | PyUnicode_GET_LENGTH(str)); |
890 | } |
891 | |
892 | /*[clinic input] |
893 | _codecs.charmap_build |
894 | map: unicode |
895 | / |
896 | [clinic start generated code]*/ |
897 | |
898 | static PyObject * |
899 | _codecs_charmap_build_impl(PyObject *module, PyObject *map) |
900 | /*[clinic end generated code: output=bb073c27031db9ac input=d91a91d1717dbc6d]*/ |
901 | { |
902 | return PyUnicode_BuildEncodingMap(map); |
903 | } |
904 | |
905 | #ifdef MS_WINDOWS |
906 | |
907 | /*[clinic input] |
908 | _codecs.mbcs_encode |
909 | str: unicode |
910 | errors: str(accept={str, NoneType}) = None |
911 | / |
912 | [clinic start generated code]*/ |
913 | |
914 | static PyObject * |
915 | _codecs_mbcs_encode_impl(PyObject *module, PyObject *str, const char *errors) |
916 | /*[clinic end generated code: output=76e2e170c966c080 input=2e932fc289ea5a5b]*/ |
917 | { |
918 | return codec_tuple(PyUnicode_EncodeCodePage(CP_ACP, str, errors), |
919 | PyUnicode_GET_LENGTH(str)); |
920 | } |
921 | |
922 | /*[clinic input] |
923 | _codecs.oem_encode |
924 | str: unicode |
925 | errors: str(accept={str, NoneType}) = None |
926 | / |
927 | [clinic start generated code]*/ |
928 | |
929 | static PyObject * |
930 | _codecs_oem_encode_impl(PyObject *module, PyObject *str, const char *errors) |
931 | /*[clinic end generated code: output=65d5982c737de649 input=9eac86dc21eb14f2]*/ |
932 | { |
933 | return codec_tuple(PyUnicode_EncodeCodePage(CP_OEMCP, str, errors), |
934 | PyUnicode_GET_LENGTH(str)); |
935 | } |
936 | |
937 | /*[clinic input] |
938 | _codecs.code_page_encode |
939 | code_page: int |
940 | str: unicode |
941 | errors: str(accept={str, NoneType}) = None |
942 | / |
943 | [clinic start generated code]*/ |
944 | |
945 | static PyObject * |
946 | _codecs_code_page_encode_impl(PyObject *module, int code_page, PyObject *str, |
947 | const char *errors) |
948 | /*[clinic end generated code: output=45673f6085657a9e input=7d18a33bc8cd0f94]*/ |
949 | { |
950 | return codec_tuple(PyUnicode_EncodeCodePage(code_page, str, errors), |
951 | PyUnicode_GET_LENGTH(str)); |
952 | } |
953 | |
954 | #endif /* MS_WINDOWS */ |
955 | |
956 | /* --- Error handler registry --------------------------------------------- */ |
957 | |
958 | /*[clinic input] |
959 | _codecs.register_error |
960 | errors: str |
961 | handler: object |
962 | / |
963 | |
964 | Register the specified error handler under the name errors. |
965 | |
966 | handler must be a callable object, that will be called with an exception |
967 | instance containing information about the location of the encoding/decoding |
968 | error and must return a (replacement, new position) tuple. |
969 | [clinic start generated code]*/ |
970 | |
971 | static PyObject * |
972 | _codecs_register_error_impl(PyObject *module, const char *errors, |
973 | PyObject *handler) |
974 | /*[clinic end generated code: output=fa2f7d1879b3067d input=5e6709203c2e33fe]*/ |
975 | { |
976 | if (PyCodec_RegisterError(errors, handler)) |
977 | return NULL; |
978 | Py_RETURN_NONE; |
979 | } |
980 | |
981 | /*[clinic input] |
982 | _codecs.lookup_error |
983 | name: str |
984 | / |
985 | |
986 | lookup_error(errors) -> handler |
987 | |
988 | Return the error handler for the specified error handling name or raise a |
989 | LookupError, if no handler exists under this name. |
990 | [clinic start generated code]*/ |
991 | |
992 | static PyObject * |
993 | _codecs_lookup_error_impl(PyObject *module, const char *name) |
994 | /*[clinic end generated code: output=087f05dc0c9a98cc input=4775dd65e6235aba]*/ |
995 | { |
996 | return PyCodec_LookupError(name); |
997 | } |
998 | |
999 | /* --- Module API --------------------------------------------------------- */ |
1000 | |
1001 | static PyMethodDef _codecs_functions[] = { |
1002 | _CODECS_REGISTER_METHODDEF |
1003 | _CODECS_UNREGISTER_METHODDEF |
1004 | _CODECS_LOOKUP_METHODDEF |
1005 | _CODECS_ENCODE_METHODDEF |
1006 | _CODECS_DECODE_METHODDEF |
1007 | _CODECS_ESCAPE_ENCODE_METHODDEF |
1008 | _CODECS_ESCAPE_DECODE_METHODDEF |
1009 | _CODECS_UTF_8_ENCODE_METHODDEF |
1010 | _CODECS_UTF_8_DECODE_METHODDEF |
1011 | _CODECS_UTF_7_ENCODE_METHODDEF |
1012 | _CODECS_UTF_7_DECODE_METHODDEF |
1013 | _CODECS_UTF_16_ENCODE_METHODDEF |
1014 | _CODECS_UTF_16_LE_ENCODE_METHODDEF |
1015 | _CODECS_UTF_16_BE_ENCODE_METHODDEF |
1016 | _CODECS_UTF_16_DECODE_METHODDEF |
1017 | _CODECS_UTF_16_LE_DECODE_METHODDEF |
1018 | _CODECS_UTF_16_BE_DECODE_METHODDEF |
1019 | _CODECS_UTF_16_EX_DECODE_METHODDEF |
1020 | _CODECS_UTF_32_ENCODE_METHODDEF |
1021 | _CODECS_UTF_32_LE_ENCODE_METHODDEF |
1022 | _CODECS_UTF_32_BE_ENCODE_METHODDEF |
1023 | _CODECS_UTF_32_DECODE_METHODDEF |
1024 | _CODECS_UTF_32_LE_DECODE_METHODDEF |
1025 | _CODECS_UTF_32_BE_DECODE_METHODDEF |
1026 | _CODECS_UTF_32_EX_DECODE_METHODDEF |
1027 | _CODECS_UNICODE_ESCAPE_ENCODE_METHODDEF |
1028 | _CODECS_UNICODE_ESCAPE_DECODE_METHODDEF |
1029 | _CODECS_RAW_UNICODE_ESCAPE_ENCODE_METHODDEF |
1030 | _CODECS_RAW_UNICODE_ESCAPE_DECODE_METHODDEF |
1031 | _CODECS_LATIN_1_ENCODE_METHODDEF |
1032 | _CODECS_LATIN_1_DECODE_METHODDEF |
1033 | _CODECS_ASCII_ENCODE_METHODDEF |
1034 | _CODECS_ASCII_DECODE_METHODDEF |
1035 | _CODECS_CHARMAP_ENCODE_METHODDEF |
1036 | _CODECS_CHARMAP_DECODE_METHODDEF |
1037 | _CODECS_CHARMAP_BUILD_METHODDEF |
1038 | _CODECS_READBUFFER_ENCODE_METHODDEF |
1039 | _CODECS_MBCS_ENCODE_METHODDEF |
1040 | _CODECS_MBCS_DECODE_METHODDEF |
1041 | _CODECS_OEM_ENCODE_METHODDEF |
1042 | _CODECS_OEM_DECODE_METHODDEF |
1043 | _CODECS_CODE_PAGE_ENCODE_METHODDEF |
1044 | _CODECS_CODE_PAGE_DECODE_METHODDEF |
1045 | _CODECS_REGISTER_ERROR_METHODDEF |
1046 | _CODECS_LOOKUP_ERROR_METHODDEF |
1047 | {NULL, NULL} /* sentinel */ |
1048 | }; |
1049 | |
1050 | static PyModuleDef_Slot _codecs_slots[] = { |
1051 | {0, NULL} |
1052 | }; |
1053 | |
1054 | static struct PyModuleDef codecsmodule = { |
1055 | PyModuleDef_HEAD_INIT, |
1056 | "_codecs" , |
1057 | NULL, |
1058 | 0, |
1059 | _codecs_functions, |
1060 | _codecs_slots, |
1061 | NULL, |
1062 | NULL, |
1063 | NULL |
1064 | }; |
1065 | |
1066 | PyMODINIT_FUNC |
1067 | PyInit__codecs(void) |
1068 | { |
1069 | return PyModuleDef_Init(&codecsmodule); |
1070 | } |
1071 | |