1/*
2** Routines to represent binary data in ASCII and vice-versa
3**
4** This module currently supports the following encodings:
5** uuencode:
6** each line encodes 45 bytes (except possibly the last)
7** First char encodes (binary) length, rest data
8** each char encodes 6 bits, as follows:
9** binary: 01234567 abcdefgh ijklmnop
10** ascii: 012345 67abcd efghij klmnop
11** ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12** short binary data is zero-extended (so the bits are always in the
13** right place), this does *not* reflect in the length.
14** base64:
15** Line breaks are insignificant, but lines are at most 76 chars
16** each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17** is done via a table.
18** Short binary data is filled (in ASCII) with '='.
19** hqx:
20** File starts with introductory text, real data starts and ends
21** with colons.
22** Data consists of three similar parts: info, datafork, resourcefork.
23** Each part is protected (at the end) with a 16-bit crc
24** The binary data is run-length encoded, and then ascii-fied:
25** binary: 01234567 abcdefgh ijklmnop
26** ascii: 012345 67abcd efghij klmnop
27** ASCII encoding is table-driven, see the code.
28** Short binary data results in the runt ascii-byte being output with
29** the bits in the right place.
30**
31** While I was reading dozens of programs that encode or decode the formats
32** here (documentation? hihi:-) I have formulated Jansen's Observation:
33**
34** Programs that encode binary data in ASCII are written in
35** such a style that they are as unreadable as possible. Devices used
36** include unnecessary global variables, burying important tables
37** in unrelated sourcefiles, putting functions in include files,
38** using seemingly-descriptive variable names for different purposes,
39** calls to empty subroutines and a host of others.
40**
41** I have attempted to break with this tradition, but I guess that that
42** does make the performance sub-optimal. Oh well, too bad...
43**
44** Jack Jansen, CWI, July 1995.
45**
46** Added support for quoted-printable encoding, based on rfc 1521 et al
47** quoted-printable encoding specifies that non printable characters (anything
48** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
49** of the character. It also specifies some other behavior to enable 8bit data
50** in a mail message with little difficulty (maximum line sizes, protecting
51** some cases of whitespace, etc).
52**
53** Brandon Long, September 2001.
54*/
55
56#define PY_SSIZE_T_CLEAN
57
58#include "Python.h"
59#include "pystrhex.h"
60#ifdef USE_ZLIB_CRC32
61#include "zlib.h"
62#endif
63
64typedef struct binascii_state {
65 PyObject *Error;
66 PyObject *Incomplete;
67} binascii_state;
68
69static binascii_state *
70get_binascii_state(PyObject *module)
71{
72 return (binascii_state *)PyModule_GetState(module);
73}
74
75/*
76** hqx lookup table, ascii->binary.
77*/
78
79#define RUNCHAR 0x90
80
81#define DONE 0x7F
82#define SKIP 0x7E
83#define FAIL 0x7D
84
85static const unsigned char table_a2b_hqx[256] = {
86/* ^@ ^A ^B ^C ^D ^E ^F ^G */
87/* 0*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
88/* \b \t \n ^K ^L \r ^N ^O */
89/* 1*/ FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
90/* ^P ^Q ^R ^S ^T ^U ^V ^W */
91/* 2*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
92/* ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ */
93/* 3*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
94/* ! " # $ % & ' */
95/* 4*/ FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
96/* ( ) * + , - . / */
97/* 5*/ 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
98/* 0 1 2 3 4 5 6 7 */
99/* 6*/ 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
100/* 8 9 : ; < = > ? */
101/* 7*/ 0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
102/* @ A B C D E F G */
103/* 8*/ 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
104/* H I J K L M N O */
105/* 9*/ 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
106/* P Q R S T U V W */
107/*10*/ 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
108/* X Y Z [ \ ] ^ _ */
109/*11*/ 0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
110/* ` a b c d e f g */
111/*12*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
112/* h i j k l m n o */
113/*13*/ 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
114/* p q r s t u v w */
115/*14*/ 0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
116/* x y z { | } ~ ^? */
117/*15*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
118/*16*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
119 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
120 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
121 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
122 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
123 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
124 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
125 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
126 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
127 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
128 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
129 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
130 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
131 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
132 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
133 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
134};
135
136static const unsigned char table_b2a_hqx[] =
137"!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
138
139static const unsigned char table_a2b_base64[] = {
140 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
141 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
142 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
143 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
144 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
145 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
146 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
147 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1,
148
149 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
150 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
151 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
152 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
153 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
154 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
155 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
156 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
157};
158
159#define BASE64_PAD '='
160
161/* Max binary chunk size; limited only by available memory */
162#define BASE64_MAXBIN ((PY_SSIZE_T_MAX - 3) / 2)
163
164static const unsigned char table_b2a_base64[] =
165"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
166
167
168
169static const unsigned short crctab_hqx[256] = {
170 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
171 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
172 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
173 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
174 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
175 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
176 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
177 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
178 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
179 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
180 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
181 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
182 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
183 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
184 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
185 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
186 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
187 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
188 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
189 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
190 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
191 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
192 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
193 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
194 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
195 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
196 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
197 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
198 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
199 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
200 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
201 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
202};
203
204/*[clinic input]
205module binascii
206[clinic start generated code]*/
207/*[clinic end generated code: output=da39a3ee5e6b4b0d input=de89fb46bcaf3fec]*/
208
209/*[python input]
210
211class ascii_buffer_converter(CConverter):
212 type = 'Py_buffer'
213 converter = 'ascii_buffer_converter'
214 impl_by_reference = True
215 c_default = "{NULL, NULL}"
216
217 def cleanup(self):
218 name = self.name
219 return "".join(["if (", name, ".obj)\n PyBuffer_Release(&", name, ");\n"])
220
221[python start generated code]*/
222/*[python end generated code: output=da39a3ee5e6b4b0d input=3eb7b63610da92cd]*/
223
224static int
225ascii_buffer_converter(PyObject *arg, Py_buffer *buf)
226{
227 if (arg == NULL) {
228 PyBuffer_Release(buf);
229 return 1;
230 }
231 if (PyUnicode_Check(arg)) {
232 if (PyUnicode_READY(arg) < 0)
233 return 0;
234 if (!PyUnicode_IS_ASCII(arg)) {
235 PyErr_SetString(PyExc_ValueError,
236 "string argument should contain only ASCII characters");
237 return 0;
238 }
239 assert(PyUnicode_KIND(arg) == PyUnicode_1BYTE_KIND);
240 buf->buf = (void *) PyUnicode_1BYTE_DATA(arg);
241 buf->len = PyUnicode_GET_LENGTH(arg);
242 buf->obj = NULL;
243 return 1;
244 }
245 if (PyObject_GetBuffer(arg, buf, PyBUF_SIMPLE) != 0) {
246 PyErr_Format(PyExc_TypeError,
247 "argument should be bytes, buffer or ASCII string, "
248 "not '%.100s'", Py_TYPE(arg)->tp_name);
249 return 0;
250 }
251 if (!PyBuffer_IsContiguous(buf, 'C')) {
252 PyErr_Format(PyExc_TypeError,
253 "argument should be a contiguous buffer, "
254 "not '%.100s'", Py_TYPE(arg)->tp_name);
255 PyBuffer_Release(buf);
256 return 0;
257 }
258 return Py_CLEANUP_SUPPORTED;
259}
260
261#include "clinic/binascii.c.h"
262
263/*[clinic input]
264binascii.a2b_uu
265
266 data: ascii_buffer
267 /
268
269Decode a line of uuencoded data.
270[clinic start generated code]*/
271
272static PyObject *
273binascii_a2b_uu_impl(PyObject *module, Py_buffer *data)
274/*[clinic end generated code: output=e027f8e0b0598742 input=7cafeaf73df63d1c]*/
275{
276 const unsigned char *ascii_data;
277 unsigned char *bin_data;
278 int leftbits = 0;
279 unsigned char this_ch;
280 unsigned int leftchar = 0;
281 PyObject *rv;
282 Py_ssize_t ascii_len, bin_len;
283 binascii_state *state;
284
285 ascii_data = data->buf;
286 ascii_len = data->len;
287
288 assert(ascii_len >= 0);
289
290 /* First byte: binary data length (in bytes) */
291 bin_len = (*ascii_data++ - ' ') & 077;
292 ascii_len--;
293
294 /* Allocate the buffer */
295 if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL )
296 return NULL;
297 bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
298
299 for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
300 /* XXX is it really best to add NULs if there's no more data */
301 this_ch = (ascii_len > 0) ? *ascii_data : 0;
302 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
303 /*
304 ** Whitespace. Assume some spaces got eaten at
305 ** end-of-line. (We check this later)
306 */
307 this_ch = 0;
308 } else {
309 /* Check the character for legality
310 ** The 64 in stead of the expected 63 is because
311 ** there are a few uuencodes out there that use
312 ** '`' as zero instead of space.
313 */
314 if ( this_ch < ' ' || this_ch > (' ' + 64)) {
315 state = PyModule_GetState(module);
316 if (state == NULL) {
317 return NULL;
318 }
319 PyErr_SetString(state->Error, "Illegal char");
320 Py_DECREF(rv);
321 return NULL;
322 }
323 this_ch = (this_ch - ' ') & 077;
324 }
325 /*
326 ** Shift it in on the low end, and see if there's
327 ** a byte ready for output.
328 */
329 leftchar = (leftchar << 6) | (this_ch);
330 leftbits += 6;
331 if ( leftbits >= 8 ) {
332 leftbits -= 8;
333 *bin_data++ = (leftchar >> leftbits) & 0xff;
334 leftchar &= ((1 << leftbits) - 1);
335 bin_len--;
336 }
337 }
338 /*
339 ** Finally, check that if there's anything left on the line
340 ** that it's whitespace only.
341 */
342 while( ascii_len-- > 0 ) {
343 this_ch = *ascii_data++;
344 /* Extra '`' may be written as padding in some cases */
345 if ( this_ch != ' ' && this_ch != ' '+64 &&
346 this_ch != '\n' && this_ch != '\r' ) {
347 state = PyModule_GetState(module);
348 if (state == NULL) {
349 return NULL;
350 }
351 PyErr_SetString(state->Error, "Trailing garbage");
352 Py_DECREF(rv);
353 return NULL;
354 }
355 }
356 return rv;
357}
358
359/*[clinic input]
360binascii.b2a_uu
361
362 data: Py_buffer
363 /
364 *
365 backtick: bool(accept={int}) = False
366
367Uuencode line of data.
368[clinic start generated code]*/
369
370static PyObject *
371binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick)
372/*[clinic end generated code: output=b1b99de62d9bbeb8 input=b26bc8d32b6ed2f6]*/
373{
374 unsigned char *ascii_data;
375 const unsigned char *bin_data;
376 int leftbits = 0;
377 unsigned char this_ch;
378 unsigned int leftchar = 0;
379 binascii_state *state;
380 Py_ssize_t bin_len, out_len;
381 _PyBytesWriter writer;
382
383 _PyBytesWriter_Init(&writer);
384 bin_data = data->buf;
385 bin_len = data->len;
386 if ( bin_len > 45 ) {
387 /* The 45 is a limit that appears in all uuencode's */
388 state = PyModule_GetState(module);
389 if (state == NULL) {
390 return NULL;
391 }
392 PyErr_SetString(state->Error, "At most 45 bytes at once");
393 return NULL;
394 }
395
396 /* We're lazy and allocate to much (fixed up later) */
397 out_len = 2 + (bin_len + 2) / 3 * 4;
398 ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
399 if (ascii_data == NULL)
400 return NULL;
401
402 /* Store the length */
403 if (backtick && !bin_len)
404 *ascii_data++ = '`';
405 else
406 *ascii_data++ = ' ' + (unsigned char)bin_len;
407
408 for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
409 /* Shift the data (or padding) into our buffer */
410 if ( bin_len > 0 ) /* Data */
411 leftchar = (leftchar << 8) | *bin_data;
412 else /* Padding */
413 leftchar <<= 8;
414 leftbits += 8;
415
416 /* See if there are 6-bit groups ready */
417 while ( leftbits >= 6 ) {
418 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
419 leftbits -= 6;
420 if (backtick && !this_ch)
421 *ascii_data++ = '`';
422 else
423 *ascii_data++ = this_ch + ' ';
424 }
425 }
426 *ascii_data++ = '\n'; /* Append a courtesy newline */
427
428 return _PyBytesWriter_Finish(&writer, ascii_data);
429}
430
431/*[clinic input]
432binascii.a2b_base64
433
434 data: ascii_buffer
435 /
436
437Decode a line of base64 data.
438[clinic start generated code]*/
439
440static PyObject *
441binascii_a2b_base64_impl(PyObject *module, Py_buffer *data)
442/*[clinic end generated code: output=0628223f19fd3f9b input=5872acf6e1cac243]*/
443{
444 assert(data->len >= 0);
445
446 const unsigned char *ascii_data = data->buf;
447 size_t ascii_len = data->len;
448
449 /* Allocate the buffer */
450 Py_ssize_t bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
451 _PyBytesWriter writer;
452 _PyBytesWriter_Init(&writer);
453 unsigned char *bin_data = _PyBytesWriter_Alloc(&writer, bin_len);
454 if (bin_data == NULL)
455 return NULL;
456 unsigned char *bin_data_start = bin_data;
457
458 int quad_pos = 0;
459 unsigned char leftchar = 0;
460 int pads = 0;
461 for (size_t i = 0; i < ascii_len; i++) {
462 unsigned char this_ch = ascii_data[i];
463
464 /* Check for pad sequences and ignore
465 ** the invalid ones.
466 */
467 if (this_ch == BASE64_PAD) {
468 if (quad_pos >= 2 && quad_pos + ++pads >= 4) {
469 /* A pad sequence means no more input.
470 ** We've already interpreted the data
471 ** from the quad at this point.
472 */
473 goto done;
474 }
475 continue;
476 }
477
478 this_ch = table_a2b_base64[this_ch];
479 if (this_ch >= 64) {
480 continue;
481 }
482 pads = 0;
483
484 switch (quad_pos) {
485 case 0:
486 quad_pos = 1;
487 leftchar = this_ch;
488 break;
489 case 1:
490 quad_pos = 2;
491 *bin_data++ = (leftchar << 2) | (this_ch >> 4);
492 leftchar = this_ch & 0x0f;
493 break;
494 case 2:
495 quad_pos = 3;
496 *bin_data++ = (leftchar << 4) | (this_ch >> 2);
497 leftchar = this_ch & 0x03;
498 break;
499 case 3:
500 quad_pos = 0;
501 *bin_data++ = (leftchar << 6) | (this_ch);
502 leftchar = 0;
503 break;
504 }
505 }
506
507 if (quad_pos != 0) {
508 binascii_state *state = PyModule_GetState(module);
509 if (state == NULL) {
510 /* error already set, from PyModule_GetState */
511 } else if (quad_pos == 1) {
512 /*
513 ** There is exactly one extra valid, non-padding, base64 character.
514 ** This is an invalid length, as there is no possible input that
515 ** could encoded into such a base64 string.
516 */
517 PyErr_Format(state->Error,
518 "Invalid base64-encoded string: "
519 "number of data characters (%zd) cannot be 1 more "
520 "than a multiple of 4",
521 (bin_data - bin_data_start) / 3 * 4 + 1);
522 } else {
523 PyErr_SetString(state->Error, "Incorrect padding");
524 }
525 _PyBytesWriter_Dealloc(&writer);
526 return NULL;
527 }
528
529done:
530 return _PyBytesWriter_Finish(&writer, bin_data);
531}
532
533
534/*[clinic input]
535binascii.b2a_base64
536
537 data: Py_buffer
538 /
539 *
540 newline: bool(accept={int}) = True
541
542Base64-code line of data.
543[clinic start generated code]*/
544
545static PyObject *
546binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline)
547/*[clinic end generated code: output=4ad62c8e8485d3b3 input=6083dac5777fa45d]*/
548{
549 unsigned char *ascii_data;
550 const unsigned char *bin_data;
551 int leftbits = 0;
552 unsigned char this_ch;
553 unsigned int leftchar = 0;
554 Py_ssize_t bin_len, out_len;
555 _PyBytesWriter writer;
556 binascii_state *state;
557
558 bin_data = data->buf;
559 bin_len = data->len;
560 _PyBytesWriter_Init(&writer);
561
562 assert(bin_len >= 0);
563
564 if ( bin_len > BASE64_MAXBIN ) {
565 state = PyModule_GetState(module);
566 if (state == NULL) {
567 return NULL;
568 }
569 PyErr_SetString(state->Error, "Too much data for base64 line");
570 return NULL;
571 }
572
573 /* We're lazy and allocate too much (fixed up later).
574 "+2" leaves room for up to two pad characters.
575 Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
576 out_len = bin_len*2 + 2;
577 if (newline)
578 out_len++;
579 ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
580 if (ascii_data == NULL)
581 return NULL;
582
583 for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
584 /* Shift the data into our buffer */
585 leftchar = (leftchar << 8) | *bin_data;
586 leftbits += 8;
587
588 /* See if there are 6-bit groups ready */
589 while ( leftbits >= 6 ) {
590 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
591 leftbits -= 6;
592 *ascii_data++ = table_b2a_base64[this_ch];
593 }
594 }
595 if ( leftbits == 2 ) {
596 *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
597 *ascii_data++ = BASE64_PAD;
598 *ascii_data++ = BASE64_PAD;
599 } else if ( leftbits == 4 ) {
600 *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
601 *ascii_data++ = BASE64_PAD;
602 }
603 if (newline)
604 *ascii_data++ = '\n'; /* Append a courtesy newline */
605
606 return _PyBytesWriter_Finish(&writer, ascii_data);
607}
608
609/*[clinic input]
610binascii.a2b_hqx
611
612 data: ascii_buffer
613 /
614
615Decode .hqx coding.
616[clinic start generated code]*/
617
618static PyObject *
619binascii_a2b_hqx_impl(PyObject *module, Py_buffer *data)
620/*[clinic end generated code: output=4d6d8c54d54ea1c1 input=0d914c680e0eed55]*/
621{
622 if (PyErr_WarnEx(PyExc_DeprecationWarning,
623 "binascii.a2b_hqx() is deprecated", 1) < 0) {
624 return NULL;
625 }
626
627 const unsigned char *ascii_data;
628 unsigned char *bin_data;
629 int leftbits = 0;
630 unsigned char this_ch;
631 unsigned int leftchar = 0;
632 PyObject *res;
633 Py_ssize_t len;
634 int done = 0;
635 _PyBytesWriter writer;
636 binascii_state *state;
637
638 ascii_data = data->buf;
639 len = data->len;
640 _PyBytesWriter_Init(&writer);
641
642 assert(len >= 0);
643
644 if (len > PY_SSIZE_T_MAX - 2)
645 return PyErr_NoMemory();
646
647 /* Allocate a string that is too big (fixed later)
648 Add two to the initial length to prevent interning which
649 would preclude subsequent resizing. */
650 bin_data = _PyBytesWriter_Alloc(&writer, len + 2);
651 if (bin_data == NULL)
652 return NULL;
653
654 for( ; len > 0 ; len--, ascii_data++ ) {
655 /* Get the byte and look it up */
656 this_ch = table_a2b_hqx[*ascii_data];
657 if ( this_ch == SKIP )
658 continue;
659 if ( this_ch == FAIL ) {
660 state = PyModule_GetState(module);
661 if (state == NULL) {
662 return NULL;
663 }
664 PyErr_SetString(state->Error, "Illegal char");
665 _PyBytesWriter_Dealloc(&writer);
666 return NULL;
667 }
668 if ( this_ch == DONE ) {
669 /* The terminating colon */
670 done = 1;
671 break;
672 }
673
674 /* Shift it into the buffer and see if any bytes are ready */
675 leftchar = (leftchar << 6) | (this_ch);
676 leftbits += 6;
677 if ( leftbits >= 8 ) {
678 leftbits -= 8;
679 *bin_data++ = (leftchar >> leftbits) & 0xff;
680 leftchar &= ((1 << leftbits) - 1);
681 }
682 }
683
684 if ( leftbits && !done ) {
685 state = PyModule_GetState(module);
686 if (state == NULL) {
687 return NULL;
688 }
689 PyErr_SetString(state->Incomplete,
690 "String has incomplete number of bytes");
691 _PyBytesWriter_Dealloc(&writer);
692 return NULL;
693 }
694
695 res = _PyBytesWriter_Finish(&writer, bin_data);
696 if (res == NULL)
697 return NULL;
698 return Py_BuildValue("Ni", res, done);
699}
700
701
702/*[clinic input]
703binascii.rlecode_hqx
704
705 data: Py_buffer
706 /
707
708Binhex RLE-code binary data.
709[clinic start generated code]*/
710
711static PyObject *
712binascii_rlecode_hqx_impl(PyObject *module, Py_buffer *data)
713/*[clinic end generated code: output=393d79338f5f5629 input=e1f1712447a82b09]*/
714{
715 if (PyErr_WarnEx(PyExc_DeprecationWarning,
716 "binascii.rlecode_hqx() is deprecated", 1) < 0) {
717 return NULL;
718 }
719
720 const unsigned char *in_data;
721 unsigned char *out_data;
722 unsigned char ch;
723 Py_ssize_t in, inend, len;
724 _PyBytesWriter writer;
725
726 _PyBytesWriter_Init(&writer);
727 in_data = data->buf;
728 len = data->len;
729
730 assert(len >= 0);
731
732 if (len > PY_SSIZE_T_MAX / 2 - 2)
733 return PyErr_NoMemory();
734
735 /* Worst case: output is twice as big as input (fixed later) */
736 out_data = _PyBytesWriter_Alloc(&writer, len * 2 + 2);
737 if (out_data == NULL)
738 return NULL;
739
740 for( in=0; in<len; in++) {
741 ch = in_data[in];
742 if ( ch == RUNCHAR ) {
743 /* RUNCHAR. Escape it. */
744 *out_data++ = RUNCHAR;
745 *out_data++ = 0;
746 } else {
747 /* Check how many following are the same */
748 for(inend=in+1;
749 inend<len && in_data[inend] == ch &&
750 inend < in+255;
751 inend++) ;
752 if ( inend - in > 3 ) {
753 /* More than 3 in a row. Output RLE. */
754 *out_data++ = ch;
755 *out_data++ = RUNCHAR;
756 *out_data++ = (unsigned char) (inend-in);
757 in = inend-1;
758 } else {
759 /* Less than 3. Output the byte itself */
760 *out_data++ = ch;
761 }
762 }
763 }
764
765 return _PyBytesWriter_Finish(&writer, out_data);
766}
767
768
769/*[clinic input]
770binascii.b2a_hqx
771
772 data: Py_buffer
773 /
774
775Encode .hqx data.
776[clinic start generated code]*/
777
778static PyObject *
779binascii_b2a_hqx_impl(PyObject *module, Py_buffer *data)
780/*[clinic end generated code: output=d0aa5a704bc9f7de input=9596ebe019fe12ba]*/
781{
782 if (PyErr_WarnEx(PyExc_DeprecationWarning,
783 "binascii.b2a_hqx() is deprecated", 1) < 0) {
784 return NULL;
785 }
786
787 unsigned char *ascii_data;
788 const unsigned char *bin_data;
789 int leftbits = 0;
790 unsigned char this_ch;
791 unsigned int leftchar = 0;
792 Py_ssize_t len;
793 _PyBytesWriter writer;
794
795 bin_data = data->buf;
796 len = data->len;
797 _PyBytesWriter_Init(&writer);
798
799 assert(len >= 0);
800
801 if (len > PY_SSIZE_T_MAX / 2 - 2)
802 return PyErr_NoMemory();
803
804 /* Allocate a buffer that is at least large enough */
805 ascii_data = _PyBytesWriter_Alloc(&writer, len * 2 + 2);
806 if (ascii_data == NULL)
807 return NULL;
808
809 for( ; len > 0 ; len--, bin_data++ ) {
810 /* Shift into our buffer, and output any 6bits ready */
811 leftchar = (leftchar << 8) | *bin_data;
812 leftbits += 8;
813 while ( leftbits >= 6 ) {
814 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
815 leftbits -= 6;
816 *ascii_data++ = table_b2a_hqx[this_ch];
817 }
818 }
819 /* Output a possible runt byte */
820 if ( leftbits ) {
821 leftchar <<= (6-leftbits);
822 *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
823 }
824
825 return _PyBytesWriter_Finish(&writer, ascii_data);
826}
827
828
829/*[clinic input]
830binascii.rledecode_hqx
831
832 data: Py_buffer
833 /
834
835Decode hexbin RLE-coded string.
836[clinic start generated code]*/
837
838static PyObject *
839binascii_rledecode_hqx_impl(PyObject *module, Py_buffer *data)
840/*[clinic end generated code: output=9826619565de1c6c input=54cdd49fc014402c]*/
841{
842 if (PyErr_WarnEx(PyExc_DeprecationWarning,
843 "binascii.rledecode_hqx() is deprecated", 1) < 0) {
844 return NULL;
845 }
846
847 const unsigned char *in_data;
848 unsigned char *out_data;
849 unsigned char in_byte, in_repeat;
850 Py_ssize_t in_len;
851 _PyBytesWriter writer;
852
853 in_data = data->buf;
854 in_len = data->len;
855 _PyBytesWriter_Init(&writer);
856 binascii_state *state;
857
858 assert(in_len >= 0);
859
860 /* Empty string is a special case */
861 if ( in_len == 0 )
862 return PyBytes_FromStringAndSize("", 0);
863 else if (in_len > PY_SSIZE_T_MAX / 2)
864 return PyErr_NoMemory();
865
866 /* Allocate a buffer of reasonable size. Resized when needed */
867 out_data = _PyBytesWriter_Alloc(&writer, in_len);
868 if (out_data == NULL)
869 return NULL;
870
871 /* Use overallocation */
872 writer.overallocate = 1;
873
874 /*
875 ** We need two macros here to get/put bytes and handle
876 ** end-of-buffer for input and output strings.
877 */
878#define INBYTE(b) \
879 do { \
880 if ( --in_len < 0 ) { \
881 state = PyModule_GetState(module); \
882 if (state == NULL) { \
883 return NULL; \
884 } \
885 PyErr_SetString(state->Incomplete, ""); \
886 goto error; \
887 } \
888 b = *in_data++; \
889 } while(0)
890
891 /*
892 ** Handle first byte separately (since we have to get angry
893 ** in case of an orphaned RLE code).
894 */
895 INBYTE(in_byte);
896
897 if (in_byte == RUNCHAR) {
898 INBYTE(in_repeat);
899 /* only 1 byte will be written, but 2 bytes were preallocated:
900 subtract 1 byte to prevent overallocation */
901 writer.min_size--;
902
903 if (in_repeat != 0) {
904 /* Note Error, not Incomplete (which is at the end
905 ** of the string only). This is a programmer error.
906 */
907 state = PyModule_GetState(module);
908 if (state == NULL) {
909 return NULL;
910 }
911 PyErr_SetString(state->Error, "Orphaned RLE code at start");
912 goto error;
913 }
914 *out_data++ = RUNCHAR;
915 } else {
916 *out_data++ = in_byte;
917 }
918
919 while( in_len > 0 ) {
920 INBYTE(in_byte);
921
922 if (in_byte == RUNCHAR) {
923 INBYTE(in_repeat);
924 /* only 1 byte will be written, but 2 bytes were preallocated:
925 subtract 1 byte to prevent overallocation */
926 writer.min_size--;
927
928 if ( in_repeat == 0 ) {
929 /* Just an escaped RUNCHAR value */
930 *out_data++ = RUNCHAR;
931 } else {
932 /* Pick up value and output a sequence of it */
933 in_byte = out_data[-1];
934
935 /* enlarge the buffer if needed */
936 if (in_repeat > 1) {
937 /* -1 because we already preallocated 1 byte */
938 out_data = _PyBytesWriter_Prepare(&writer, out_data,
939 in_repeat - 1);
940 if (out_data == NULL)
941 goto error;
942 }
943
944 while ( --in_repeat > 0 )
945 *out_data++ = in_byte;
946 }
947 } else {
948 /* Normal byte */
949 *out_data++ = in_byte;
950 }
951 }
952 return _PyBytesWriter_Finish(&writer, out_data);
953
954error:
955 _PyBytesWriter_Dealloc(&writer);
956 return NULL;
957}
958
959
960/*[clinic input]
961binascii.crc_hqx
962
963 data: Py_buffer
964 crc: unsigned_int(bitwise=True)
965 /
966
967Compute CRC-CCITT incrementally.
968[clinic start generated code]*/
969
970static PyObject *
971binascii_crc_hqx_impl(PyObject *module, Py_buffer *data, unsigned int crc)
972/*[clinic end generated code: output=2fde213d0f547a98 input=56237755370a951c]*/
973{
974 const unsigned char *bin_data;
975 Py_ssize_t len;
976
977 crc &= 0xffff;
978 bin_data = data->buf;
979 len = data->len;
980
981 while(len-- > 0) {
982 crc = ((crc<<8)&0xff00) ^ crctab_hqx[(crc>>8)^*bin_data++];
983 }
984
985 return PyLong_FromUnsignedLong(crc);
986}
987
988#ifndef USE_ZLIB_CRC32
989/* Crc - 32 BIT ANSI X3.66 CRC checksum files
990 Also known as: ISO 3307
991**********************************************************************|
992* *|
993* Demonstration program to compute the 32-bit CRC used as the frame *|
994* check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71 *|
995* and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level *|
996* protocol). The 32-bit FCS was added via the Federal Register, *|
997* 1 June 1982, p.23798. I presume but don't know for certain that *|
998* this polynomial is or will be included in CCITT V.41, which *|
999* defines the 16-bit CRC (often called CRC-CCITT) polynomial. FIPS *|
1000* PUB 78 says that the 32-bit FCS reduces otherwise undetected *|
1001* errors by a factor of 10^-5 over 16-bit FCS. *|
1002* *|
1003**********************************************************************|
1004
1005 Copyright (C) 1986 Gary S. Brown. You may use this program, or
1006 code or tables extracted from it, as desired without restriction.
1007
1008 First, the polynomial itself and its table of feedback terms. The
1009 polynomial is
1010 X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
1011 Note that we take it "backwards" and put the highest-order term in
1012 the lowest-order bit. The X^32 term is "implied"; the LSB is the
1013 X^31 term, etc. The X^0 term (usually shown as "+1") results in
1014 the MSB being 1.
1015
1016 Note that the usual hardware shift register implementation, which
1017 is what we're using (we're merely optimizing it by doing eight-bit
1018 chunks at a time) shifts bits into the lowest-order term. In our
1019 implementation, that means shifting towards the right. Why do we
1020 do it this way? Because the calculated CRC must be transmitted in
1021 order from highest-order term to lowest-order term. UARTs transmit
1022 characters in order from LSB to MSB. By storing the CRC this way,
1023 we hand it to the UART in the order low-byte to high-byte; the UART
1024 sends each low-bit to hight-bit; and the result is transmission bit
1025 by bit from highest- to lowest-order term without requiring any bit
1026 shuffling on our part. Reception works similarly.
1027
1028 The feedback terms table consists of 256, 32-bit entries. Notes:
1029
1030 1. The table can be generated at runtime if desired; code to do so
1031 is shown later. It might not be obvious, but the feedback
1032 terms simply represent the results of eight shift/xor opera-
1033 tions for all combinations of data and CRC register values.
1034
1035 2. The CRC accumulation logic is the same for all CRC polynomials,
1036 be they sixteen or thirty-two bits wide. You simply choose the
1037 appropriate table. Alternatively, because the table can be
1038 generated at runtime, you can start by generating the table for
1039 the polynomial in question and use exactly the same "updcrc",
1040 if your application needn't simultaneously handle two CRC
1041 polynomials. (Note, however, that XMODEM is strange.)
1042
1043 3. For 16-bit CRCs, the table entries need be only 16 bits wide;
1044 of course, 32-bit entries work OK if the high 16 bits are zero.
1045
1046 4. The values must be right-shifted by eight bits by the "updcrc"
1047 logic; the shift must be unsigned (bring in zeroes). On some
1048 hardware you could probably optimize the shift in assembler by
1049 using byte-swap instructions.
1050********************************************************************/
1051
1052static const unsigned int crc_32_tab[256] = {
10530x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U,
10540x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U,
10550xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U,
10560x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU,
10570x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U,
10580x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
10590xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U,
10600xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU,
10610x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U,
10620x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU,
10630xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U,
10640xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
10650x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U,
10660x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU,
10670x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU,
10680xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U,
10690x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU,
10700x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
10710x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U,
10720xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U,
10730x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU,
10740x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U,
10750xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U,
10760xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
10770x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U,
10780x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U,
10790x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U,
10800x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U,
10810xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U,
10820x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
10830x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU,
10840x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U,
10850xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U,
10860xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU,
10870x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU,
10880x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
10890xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU,
10900xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U,
10910x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU,
10920x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U,
10930x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU,
10940xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
10950x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U,
10960x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU,
10970x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U,
10980xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U,
10990x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U,
11000x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
11010xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U,
11020xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U,
11030x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
11040x2d02ef8dU
1105};
1106#endif /* USE_ZLIB_CRC32 */
1107
1108/*[clinic input]
1109binascii.crc32 -> unsigned_int
1110
1111 data: Py_buffer
1112 crc: unsigned_int(bitwise=True) = 0
1113 /
1114
1115Compute CRC-32 incrementally.
1116[clinic start generated code]*/
1117
1118static unsigned int
1119binascii_crc32_impl(PyObject *module, Py_buffer *data, unsigned int crc)
1120/*[clinic end generated code: output=52cf59056a78593b input=bbe340bc99d25aa8]*/
1121
1122#ifdef USE_ZLIB_CRC32
1123/* The same core as zlibmodule.c zlib_crc32_impl. */
1124{
1125 unsigned char *buf = data->buf;
1126 Py_ssize_t len = data->len;
1127
1128 /* Avoid truncation of length for very large buffers. crc32() takes
1129 length as an unsigned int, which may be narrower than Py_ssize_t. */
1130 while ((size_t)len > UINT_MAX) {
1131 crc = crc32(crc, buf, UINT_MAX);
1132 buf += (size_t) UINT_MAX;
1133 len -= (size_t) UINT_MAX;
1134 }
1135 crc = crc32(crc, buf, (unsigned int)len);
1136 return crc & 0xffffffff;
1137}
1138#else /* USE_ZLIB_CRC32 */
1139{ /* By Jim Ahlstrom; All rights transferred to CNRI */
1140 const unsigned char *bin_data;
1141 Py_ssize_t len;
1142 unsigned int result;
1143
1144 bin_data = data->buf;
1145 len = data->len;
1146
1147 crc = ~ crc;
1148 while (len-- > 0) {
1149 crc = crc_32_tab[(crc ^ *bin_data++) & 0xff] ^ (crc >> 8);
1150 /* Note: (crc >> 8) MUST zero fill on left */
1151 }
1152
1153 result = (crc ^ 0xFFFFFFFF);
1154 return result & 0xffffffff;
1155}
1156#endif /* USE_ZLIB_CRC32 */
1157
1158/*[clinic input]
1159binascii.b2a_hex
1160
1161 data: Py_buffer
1162 sep: object = NULL
1163 An optional single character or byte to separate hex bytes.
1164 bytes_per_sep: int = 1
1165 How many bytes between separators. Positive values count from the
1166 right, negative values count from the left.
1167
1168Hexadecimal representation of binary data.
1169
1170The return value is a bytes object. This function is also
1171available as "hexlify()".
1172
1173Example:
1174>>> binascii.b2a_hex(b'\xb9\x01\xef')
1175b'b901ef'
1176>>> binascii.hexlify(b'\xb9\x01\xef', ':')
1177b'b9:01:ef'
1178>>> binascii.b2a_hex(b'\xb9\x01\xef', b'_', 2)
1179b'b9_01ef'
1180[clinic start generated code]*/
1181
1182static PyObject *
1183binascii_b2a_hex_impl(PyObject *module, Py_buffer *data, PyObject *sep,
1184 int bytes_per_sep)
1185/*[clinic end generated code: output=a26937946a81d2c7 input=ec0ade6ba2e43543]*/
1186{
1187 return _Py_strhex_bytes_with_sep((const char *)data->buf, data->len,
1188 sep, bytes_per_sep);
1189}
1190
1191/*[clinic input]
1192binascii.hexlify = binascii.b2a_hex
1193
1194Hexadecimal representation of binary data.
1195
1196The return value is a bytes object. This function is also
1197available as "b2a_hex()".
1198[clinic start generated code]*/
1199
1200static PyObject *
1201binascii_hexlify_impl(PyObject *module, Py_buffer *data, PyObject *sep,
1202 int bytes_per_sep)
1203/*[clinic end generated code: output=d12aa1b001b15199 input=bc317bd4e241f76b]*/
1204{
1205 return _Py_strhex_bytes_with_sep((const char *)data->buf, data->len,
1206 sep, bytes_per_sep);
1207}
1208
1209/*[clinic input]
1210binascii.a2b_hex
1211
1212 hexstr: ascii_buffer
1213 /
1214
1215Binary data of hexadecimal representation.
1216
1217hexstr must contain an even number of hex digits (upper or lower case).
1218This function is also available as "unhexlify()".
1219[clinic start generated code]*/
1220
1221static PyObject *
1222binascii_a2b_hex_impl(PyObject *module, Py_buffer *hexstr)
1223/*[clinic end generated code: output=0cc1a139af0eeecb input=9e1e7f2f94db24fd]*/
1224{
1225 const char* argbuf;
1226 Py_ssize_t arglen;
1227 PyObject *retval;
1228 char* retbuf;
1229 Py_ssize_t i, j;
1230 binascii_state *state;
1231
1232 argbuf = hexstr->buf;
1233 arglen = hexstr->len;
1234
1235 assert(arglen >= 0);
1236
1237 /* XXX What should we do about strings with an odd length? Should
1238 * we add an implicit leading zero, or a trailing zero? For now,
1239 * raise an exception.
1240 */
1241 if (arglen % 2) {
1242 state = PyModule_GetState(module);
1243 if (state == NULL) {
1244 return NULL;
1245 }
1246 PyErr_SetString(state->Error, "Odd-length string");
1247 return NULL;
1248 }
1249
1250 retval = PyBytes_FromStringAndSize(NULL, (arglen/2));
1251 if (!retval)
1252 return NULL;
1253 retbuf = PyBytes_AS_STRING(retval);
1254
1255 for (i=j=0; i < arglen; i += 2) {
1256 unsigned int top = _PyLong_DigitValue[Py_CHARMASK(argbuf[i])];
1257 unsigned int bot = _PyLong_DigitValue[Py_CHARMASK(argbuf[i+1])];
1258 if (top >= 16 || bot >= 16) {
1259 state = PyModule_GetState(module);
1260 if (state == NULL) {
1261 return NULL;
1262 }
1263 PyErr_SetString(state->Error,
1264 "Non-hexadecimal digit found");
1265 goto finally;
1266 }
1267 retbuf[j++] = (top << 4) + bot;
1268 }
1269 return retval;
1270
1271 finally:
1272 Py_DECREF(retval);
1273 return NULL;
1274}
1275
1276/*[clinic input]
1277binascii.unhexlify = binascii.a2b_hex
1278
1279Binary data of hexadecimal representation.
1280
1281hexstr must contain an even number of hex digits (upper or lower case).
1282[clinic start generated code]*/
1283
1284static PyObject *
1285binascii_unhexlify_impl(PyObject *module, Py_buffer *hexstr)
1286/*[clinic end generated code: output=51a64c06c79629e3 input=dd8c012725f462da]*/
1287{
1288 return binascii_a2b_hex_impl(module, hexstr);
1289}
1290
1291#define MAXLINESIZE 76
1292
1293
1294/*[clinic input]
1295binascii.a2b_qp
1296
1297 data: ascii_buffer
1298 header: bool(accept={int}) = False
1299
1300Decode a string of qp-encoded data.
1301[clinic start generated code]*/
1302
1303static PyObject *
1304binascii_a2b_qp_impl(PyObject *module, Py_buffer *data, int header)
1305/*[clinic end generated code: output=e99f7846cfb9bc53 input=bf6766fea76cce8f]*/
1306{
1307 Py_ssize_t in, out;
1308 char ch;
1309 const unsigned char *ascii_data;
1310 unsigned char *odata;
1311 Py_ssize_t datalen = 0;
1312 PyObject *rv;
1313
1314 ascii_data = data->buf;
1315 datalen = data->len;
1316
1317 /* We allocate the output same size as input, this is overkill.
1318 */
1319 odata = (unsigned char *) PyMem_Calloc(1, datalen);
1320 if (odata == NULL) {
1321 PyErr_NoMemory();
1322 return NULL;
1323 }
1324
1325 in = out = 0;
1326 while (in < datalen) {
1327 if (ascii_data[in] == '=') {
1328 in++;
1329 if (in >= datalen) break;
1330 /* Soft line breaks */
1331 if ((ascii_data[in] == '\n') || (ascii_data[in] == '\r')) {
1332 if (ascii_data[in] != '\n') {
1333 while (in < datalen && ascii_data[in] != '\n') in++;
1334 }
1335 if (in < datalen) in++;
1336 }
1337 else if (ascii_data[in] == '=') {
1338 /* broken case from broken python qp */
1339 odata[out++] = '=';
1340 in++;
1341 }
1342 else if ((in + 1 < datalen) &&
1343 ((ascii_data[in] >= 'A' && ascii_data[in] <= 'F') ||
1344 (ascii_data[in] >= 'a' && ascii_data[in] <= 'f') ||
1345 (ascii_data[in] >= '0' && ascii_data[in] <= '9')) &&
1346 ((ascii_data[in+1] >= 'A' && ascii_data[in+1] <= 'F') ||
1347 (ascii_data[in+1] >= 'a' && ascii_data[in+1] <= 'f') ||
1348 (ascii_data[in+1] >= '0' && ascii_data[in+1] <= '9'))) {
1349 /* hexval */
1350 ch = _PyLong_DigitValue[ascii_data[in]] << 4;
1351 in++;
1352 ch |= _PyLong_DigitValue[ascii_data[in]];
1353 in++;
1354 odata[out++] = ch;
1355 }
1356 else {
1357 odata[out++] = '=';
1358 }
1359 }
1360 else if (header && ascii_data[in] == '_') {
1361 odata[out++] = ' ';
1362 in++;
1363 }
1364 else {
1365 odata[out] = ascii_data[in];
1366 in++;
1367 out++;
1368 }
1369 }
1370 if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
1371 PyMem_Free(odata);
1372 return NULL;
1373 }
1374 PyMem_Free(odata);
1375 return rv;
1376}
1377
1378static int
1379to_hex (unsigned char ch, unsigned char *s)
1380{
1381 unsigned int uvalue = ch;
1382
1383 s[1] = "0123456789ABCDEF"[uvalue % 16];
1384 uvalue = (uvalue / 16);
1385 s[0] = "0123456789ABCDEF"[uvalue % 16];
1386 return 0;
1387}
1388
1389/* XXX: This is ridiculously complicated to be backward compatible
1390 * (mostly) with the quopri module. It doesn't re-create the quopri
1391 * module bug where text ending in CRLF has the CR encoded */
1392
1393/*[clinic input]
1394binascii.b2a_qp
1395
1396 data: Py_buffer
1397 quotetabs: bool(accept={int}) = False
1398 istext: bool(accept={int}) = True
1399 header: bool(accept={int}) = False
1400
1401Encode a string using quoted-printable encoding.
1402
1403On encoding, when istext is set, newlines are not encoded, and white
1404space at end of lines is. When istext is not set, \r and \n (CR/LF)
1405are both encoded. When quotetabs is set, space and tabs are encoded.
1406[clinic start generated code]*/
1407
1408static PyObject *
1409binascii_b2a_qp_impl(PyObject *module, Py_buffer *data, int quotetabs,
1410 int istext, int header)
1411/*[clinic end generated code: output=e9884472ebb1a94c input=21fb7eea4a184ba6]*/
1412{
1413 Py_ssize_t in, out;
1414 const unsigned char *databuf;
1415 unsigned char *odata;
1416 Py_ssize_t datalen = 0, odatalen = 0;
1417 PyObject *rv;
1418 unsigned int linelen = 0;
1419 unsigned char ch;
1420 int crlf = 0;
1421 const unsigned char *p;
1422
1423 databuf = data->buf;
1424 datalen = data->len;
1425
1426 /* See if this string is using CRLF line ends */
1427 /* XXX: this function has the side effect of converting all of
1428 * the end of lines to be the same depending on this detection
1429 * here */
1430 p = (const unsigned char *) memchr(databuf, '\n', datalen);
1431 if ((p != NULL) && (p > databuf) && (*(p-1) == '\r'))
1432 crlf = 1;
1433
1434 /* First, scan to see how many characters need to be encoded */
1435 in = 0;
1436 while (in < datalen) {
1437 Py_ssize_t delta = 0;
1438 if ((databuf[in] > 126) ||
1439 (databuf[in] == '=') ||
1440 (header && databuf[in] == '_') ||
1441 ((databuf[in] == '.') && (linelen == 0) &&
1442 (in + 1 == datalen || databuf[in+1] == '\n' ||
1443 databuf[in+1] == '\r' || databuf[in+1] == 0)) ||
1444 (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) ||
1445 ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) ||
1446 ((databuf[in] < 33) &&
1447 (databuf[in] != '\r') && (databuf[in] != '\n') &&
1448 (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' ')))))
1449 {
1450 if ((linelen + 3) >= MAXLINESIZE) {
1451 linelen = 0;
1452 if (crlf)
1453 delta += 3;
1454 else
1455 delta += 2;
1456 }
1457 linelen += 3;
1458 delta += 3;
1459 in++;
1460 }
1461 else {
1462 if (istext &&
1463 ((databuf[in] == '\n') ||
1464 ((in+1 < datalen) && (databuf[in] == '\r') &&
1465 (databuf[in+1] == '\n'))))
1466 {
1467 linelen = 0;
1468 /* Protect against whitespace on end of line */
1469 if (in && ((databuf[in-1] == ' ') || (databuf[in-1] == '\t')))
1470 delta += 2;
1471 if (crlf)
1472 delta += 2;
1473 else
1474 delta += 1;
1475 if (databuf[in] == '\r')
1476 in += 2;
1477 else
1478 in++;
1479 }
1480 else {
1481 if ((in + 1 != datalen) &&
1482 (databuf[in+1] != '\n') &&
1483 (linelen + 1) >= MAXLINESIZE) {
1484 linelen = 0;
1485 if (crlf)
1486 delta += 3;
1487 else
1488 delta += 2;
1489 }
1490 linelen++;
1491 delta++;
1492 in++;
1493 }
1494 }
1495 if (PY_SSIZE_T_MAX - delta < odatalen) {
1496 PyErr_NoMemory();
1497 return NULL;
1498 }
1499 odatalen += delta;
1500 }
1501
1502 /* We allocate the output same size as input, this is overkill.
1503 */
1504 odata = (unsigned char *) PyMem_Calloc(1, odatalen);
1505 if (odata == NULL) {
1506 PyErr_NoMemory();
1507 return NULL;
1508 }
1509
1510 in = out = linelen = 0;
1511 while (in < datalen) {
1512 if ((databuf[in] > 126) ||
1513 (databuf[in] == '=') ||
1514 (header && databuf[in] == '_') ||
1515 ((databuf[in] == '.') && (linelen == 0) &&
1516 (in + 1 == datalen || databuf[in+1] == '\n' ||
1517 databuf[in+1] == '\r' || databuf[in+1] == 0)) ||
1518 (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) ||
1519 ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) ||
1520 ((databuf[in] < 33) &&
1521 (databuf[in] != '\r') && (databuf[in] != '\n') &&
1522 (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' ')))))
1523 {
1524 if ((linelen + 3 )>= MAXLINESIZE) {
1525 odata[out++] = '=';
1526 if (crlf) odata[out++] = '\r';
1527 odata[out++] = '\n';
1528 linelen = 0;
1529 }
1530 odata[out++] = '=';
1531 to_hex(databuf[in], &odata[out]);
1532 out += 2;
1533 in++;
1534 linelen += 3;
1535 }
1536 else {
1537 if (istext &&
1538 ((databuf[in] == '\n') ||
1539 ((in+1 < datalen) && (databuf[in] == '\r') &&
1540 (databuf[in+1] == '\n'))))
1541 {
1542 linelen = 0;
1543 /* Protect against whitespace on end of line */
1544 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1545 ch = odata[out-1];
1546 odata[out-1] = '=';
1547 to_hex(ch, &odata[out]);
1548 out += 2;
1549 }
1550
1551 if (crlf) odata[out++] = '\r';
1552 odata[out++] = '\n';
1553 if (databuf[in] == '\r')
1554 in += 2;
1555 else
1556 in++;
1557 }
1558 else {
1559 if ((in + 1 != datalen) &&
1560 (databuf[in+1] != '\n') &&
1561 (linelen + 1) >= MAXLINESIZE) {
1562 odata[out++] = '=';
1563 if (crlf) odata[out++] = '\r';
1564 odata[out++] = '\n';
1565 linelen = 0;
1566 }
1567 linelen++;
1568 if (header && databuf[in] == ' ') {
1569 odata[out++] = '_';
1570 in++;
1571 }
1572 else {
1573 odata[out++] = databuf[in++];
1574 }
1575 }
1576 }
1577 }
1578 if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
1579 PyMem_Free(odata);
1580 return NULL;
1581 }
1582 PyMem_Free(odata);
1583 return rv;
1584}
1585
1586/* List of functions defined in the module */
1587
1588static struct PyMethodDef binascii_module_methods[] = {
1589 BINASCII_A2B_UU_METHODDEF
1590 BINASCII_B2A_UU_METHODDEF
1591 BINASCII_A2B_BASE64_METHODDEF
1592 BINASCII_B2A_BASE64_METHODDEF
1593 BINASCII_A2B_HQX_METHODDEF
1594 BINASCII_B2A_HQX_METHODDEF
1595 BINASCII_A2B_HEX_METHODDEF
1596 BINASCII_B2A_HEX_METHODDEF
1597 BINASCII_HEXLIFY_METHODDEF
1598 BINASCII_UNHEXLIFY_METHODDEF
1599 BINASCII_RLECODE_HQX_METHODDEF
1600 BINASCII_RLEDECODE_HQX_METHODDEF
1601 BINASCII_CRC_HQX_METHODDEF
1602 BINASCII_CRC32_METHODDEF
1603 BINASCII_A2B_QP_METHODDEF
1604 BINASCII_B2A_QP_METHODDEF
1605 {NULL, NULL} /* sentinel */
1606};
1607
1608
1609/* Initialization function for the module (*must* be called PyInit_binascii) */
1610PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
1611
1612static int
1613binascii_exec(PyObject *module) {
1614 int result;
1615 binascii_state *state = PyModule_GetState(module);
1616 if (state == NULL) {
1617 return -1;
1618 }
1619
1620 state->Error = PyErr_NewException("binascii.Error", PyExc_ValueError, NULL);
1621 if (state->Error == NULL) {
1622 return -1;
1623 }
1624 Py_INCREF(state->Error);
1625 result = PyModule_AddObject(module, "Error", state->Error);
1626 if (result == -1) {
1627 Py_DECREF(state->Error);
1628 return -1;
1629 }
1630
1631 state->Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
1632 if (state->Incomplete == NULL) {
1633 return -1;
1634 }
1635 Py_INCREF(state->Incomplete);
1636 result = PyModule_AddObject(module, "Incomplete", state->Incomplete);
1637 if (result == -1) {
1638 Py_DECREF(state->Incomplete);
1639 return -1;
1640 }
1641
1642 return 0;
1643}
1644
1645static PyModuleDef_Slot binascii_slots[] = {
1646 {Py_mod_exec, binascii_exec},
1647 {0, NULL}
1648};
1649
1650static int
1651binascii_traverse(PyObject *module, visitproc visit, void *arg)
1652{
1653 binascii_state *state = get_binascii_state(module);
1654 Py_VISIT(state->Error);
1655 Py_VISIT(state->Incomplete);
1656 return 0;
1657}
1658
1659static int
1660binascii_clear(PyObject *module)
1661{
1662 binascii_state *state = get_binascii_state(module);
1663 Py_CLEAR(state->Error);
1664 Py_CLEAR(state->Incomplete);
1665 return 0;
1666}
1667
1668static void
1669binascii_free(void *module)
1670{
1671 binascii_clear((PyObject *)module);
1672}
1673
1674static struct PyModuleDef binasciimodule = {
1675 PyModuleDef_HEAD_INIT,
1676 "binascii",
1677 doc_binascii,
1678 sizeof(binascii_state),
1679 binascii_module_methods,
1680 binascii_slots,
1681 binascii_traverse,
1682 binascii_clear,
1683 binascii_free
1684};
1685
1686PyMODINIT_FUNC
1687PyInit_binascii(void)
1688{
1689 return PyModuleDef_Init(&binasciimodule);
1690}
1691