1 | /* |
2 | An implementation of Text I/O as defined by PEP 3116 - "New I/O" |
3 | |
4 | Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper. |
5 | |
6 | Written by Amaury Forgeot d'Arc and Antoine Pitrou |
7 | */ |
8 | |
9 | #define PY_SSIZE_T_CLEAN |
10 | #include "Python.h" |
11 | #include "pycore_interp.h" // PyInterpreterState.fs_codec |
12 | #include "pycore_long.h" // _PyLong_GetZero() |
13 | #include "pycore_fileutils.h" // _Py_GetLocaleEncoding() |
14 | #include "pycore_object.h" |
15 | #include "pycore_pystate.h" // _PyInterpreterState_GET() |
16 | #include "structmember.h" // PyMemberDef |
17 | #include "_iomodule.h" |
18 | |
19 | /*[clinic input] |
20 | module _io |
21 | class _io.IncrementalNewlineDecoder "nldecoder_object *" "&PyIncrementalNewlineDecoder_Type" |
22 | class _io.TextIOWrapper "textio *" "&TextIOWrapper_TYpe" |
23 | [clinic start generated code]*/ |
24 | /*[clinic end generated code: output=da39a3ee5e6b4b0d input=2097a4fc85670c26]*/ |
25 | |
26 | _Py_IDENTIFIER(close); |
27 | _Py_IDENTIFIER(_dealloc_warn); |
28 | _Py_IDENTIFIER(decode); |
29 | _Py_IDENTIFIER(fileno); |
30 | _Py_IDENTIFIER(flush); |
31 | _Py_IDENTIFIER(isatty); |
32 | _Py_IDENTIFIER(mode); |
33 | _Py_IDENTIFIER(name); |
34 | _Py_IDENTIFIER(raw); |
35 | _Py_IDENTIFIER(read); |
36 | _Py_IDENTIFIER(readable); |
37 | _Py_IDENTIFIER(replace); |
38 | _Py_IDENTIFIER(reset); |
39 | _Py_IDENTIFIER(seek); |
40 | _Py_IDENTIFIER(seekable); |
41 | _Py_IDENTIFIER(setstate); |
42 | _Py_IDENTIFIER(strict); |
43 | _Py_IDENTIFIER(tell); |
44 | _Py_IDENTIFIER(writable); |
45 | |
46 | /* TextIOBase */ |
47 | |
48 | PyDoc_STRVAR(textiobase_doc, |
49 | "Base class for text I/O.\n" |
50 | "\n" |
51 | "This class provides a character and line based interface to stream\n" |
52 | "I/O. There is no readinto method because Python's character strings\n" |
53 | "are immutable.\n" |
54 | ); |
55 | |
56 | static PyObject * |
57 | _unsupported(const char *message) |
58 | { |
59 | _PyIO_State *state = IO_STATE(); |
60 | if (state != NULL) |
61 | PyErr_SetString(state->unsupported_operation, message); |
62 | return NULL; |
63 | } |
64 | |
65 | PyDoc_STRVAR(textiobase_detach_doc, |
66 | "Separate the underlying buffer from the TextIOBase and return it.\n" |
67 | "\n" |
68 | "After the underlying buffer has been detached, the TextIO is in an\n" |
69 | "unusable state.\n" |
70 | ); |
71 | |
72 | static PyObject * |
73 | textiobase_detach(PyObject *self, PyObject *Py_UNUSED(ignored)) |
74 | { |
75 | return _unsupported("detach" ); |
76 | } |
77 | |
78 | PyDoc_STRVAR(textiobase_read_doc, |
79 | "Read at most n characters from stream.\n" |
80 | "\n" |
81 | "Read from underlying buffer until we have n characters or we hit EOF.\n" |
82 | "If n is negative or omitted, read until EOF.\n" |
83 | ); |
84 | |
85 | static PyObject * |
86 | textiobase_read(PyObject *self, PyObject *args) |
87 | { |
88 | return _unsupported("read" ); |
89 | } |
90 | |
91 | PyDoc_STRVAR(textiobase_readline_doc, |
92 | "Read until newline or EOF.\n" |
93 | "\n" |
94 | "Returns an empty string if EOF is hit immediately.\n" |
95 | ); |
96 | |
97 | static PyObject * |
98 | textiobase_readline(PyObject *self, PyObject *args) |
99 | { |
100 | return _unsupported("readline" ); |
101 | } |
102 | |
103 | PyDoc_STRVAR(textiobase_write_doc, |
104 | "Write string to stream.\n" |
105 | "Returns the number of characters written (which is always equal to\n" |
106 | "the length of the string).\n" |
107 | ); |
108 | |
109 | static PyObject * |
110 | textiobase_write(PyObject *self, PyObject *args) |
111 | { |
112 | return _unsupported("write" ); |
113 | } |
114 | |
115 | PyDoc_STRVAR(textiobase_encoding_doc, |
116 | "Encoding of the text stream.\n" |
117 | "\n" |
118 | "Subclasses should override.\n" |
119 | ); |
120 | |
121 | static PyObject * |
122 | textiobase_encoding_get(PyObject *self, void *context) |
123 | { |
124 | Py_RETURN_NONE; |
125 | } |
126 | |
127 | PyDoc_STRVAR(textiobase_newlines_doc, |
128 | "Line endings translated so far.\n" |
129 | "\n" |
130 | "Only line endings translated during reading are considered.\n" |
131 | "\n" |
132 | "Subclasses should override.\n" |
133 | ); |
134 | |
135 | static PyObject * |
136 | textiobase_newlines_get(PyObject *self, void *context) |
137 | { |
138 | Py_RETURN_NONE; |
139 | } |
140 | |
141 | PyDoc_STRVAR(textiobase_errors_doc, |
142 | "The error setting of the decoder or encoder.\n" |
143 | "\n" |
144 | "Subclasses should override.\n" |
145 | ); |
146 | |
147 | static PyObject * |
148 | textiobase_errors_get(PyObject *self, void *context) |
149 | { |
150 | Py_RETURN_NONE; |
151 | } |
152 | |
153 | |
154 | static PyMethodDef textiobase_methods[] = { |
155 | {"detach" , textiobase_detach, METH_NOARGS, textiobase_detach_doc}, |
156 | {"read" , textiobase_read, METH_VARARGS, textiobase_read_doc}, |
157 | {"readline" , textiobase_readline, METH_VARARGS, textiobase_readline_doc}, |
158 | {"write" , textiobase_write, METH_VARARGS, textiobase_write_doc}, |
159 | {NULL, NULL} |
160 | }; |
161 | |
162 | static PyGetSetDef textiobase_getset[] = { |
163 | {"encoding" , (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc}, |
164 | {"newlines" , (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc}, |
165 | {"errors" , (getter)textiobase_errors_get, NULL, textiobase_errors_doc}, |
166 | {NULL} |
167 | }; |
168 | |
169 | PyTypeObject PyTextIOBase_Type = { |
170 | PyVarObject_HEAD_INIT(NULL, 0) |
171 | "_io._TextIOBase" , /*tp_name*/ |
172 | 0, /*tp_basicsize*/ |
173 | 0, /*tp_itemsize*/ |
174 | 0, /*tp_dealloc*/ |
175 | 0, /*tp_vectorcall_offset*/ |
176 | 0, /*tp_getattr*/ |
177 | 0, /*tp_setattr*/ |
178 | 0, /*tp_as_async*/ |
179 | 0, /*tp_repr*/ |
180 | 0, /*tp_as_number*/ |
181 | 0, /*tp_as_sequence*/ |
182 | 0, /*tp_as_mapping*/ |
183 | 0, /*tp_hash */ |
184 | 0, /*tp_call*/ |
185 | 0, /*tp_str*/ |
186 | 0, /*tp_getattro*/ |
187 | 0, /*tp_setattro*/ |
188 | 0, /*tp_as_buffer*/ |
189 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ |
190 | textiobase_doc, /* tp_doc */ |
191 | 0, /* tp_traverse */ |
192 | 0, /* tp_clear */ |
193 | 0, /* tp_richcompare */ |
194 | 0, /* tp_weaklistoffset */ |
195 | 0, /* tp_iter */ |
196 | 0, /* tp_iternext */ |
197 | textiobase_methods, /* tp_methods */ |
198 | 0, /* tp_members */ |
199 | textiobase_getset, /* tp_getset */ |
200 | &PyIOBase_Type, /* tp_base */ |
201 | 0, /* tp_dict */ |
202 | 0, /* tp_descr_get */ |
203 | 0, /* tp_descr_set */ |
204 | 0, /* tp_dictoffset */ |
205 | 0, /* tp_init */ |
206 | 0, /* tp_alloc */ |
207 | 0, /* tp_new */ |
208 | 0, /* tp_free */ |
209 | 0, /* tp_is_gc */ |
210 | 0, /* tp_bases */ |
211 | 0, /* tp_mro */ |
212 | 0, /* tp_cache */ |
213 | 0, /* tp_subclasses */ |
214 | 0, /* tp_weaklist */ |
215 | 0, /* tp_del */ |
216 | 0, /* tp_version_tag */ |
217 | 0, /* tp_finalize */ |
218 | }; |
219 | |
220 | |
221 | /* IncrementalNewlineDecoder */ |
222 | |
223 | typedef struct { |
224 | PyObject_HEAD |
225 | PyObject *decoder; |
226 | PyObject *errors; |
227 | unsigned int pendingcr: 1; |
228 | unsigned int translate: 1; |
229 | unsigned int seennl: 3; |
230 | } nldecoder_object; |
231 | |
232 | /*[clinic input] |
233 | _io.IncrementalNewlineDecoder.__init__ |
234 | decoder: object |
235 | translate: int |
236 | errors: object(c_default="NULL") = "strict" |
237 | |
238 | Codec used when reading a file in universal newlines mode. |
239 | |
240 | It wraps another incremental decoder, translating \r\n and \r into \n. |
241 | It also records the types of newlines encountered. When used with |
242 | translate=False, it ensures that the newline sequence is returned in |
243 | one piece. When used with decoder=None, it expects unicode strings as |
244 | decode input and translates newlines without first invoking an external |
245 | decoder. |
246 | [clinic start generated code]*/ |
247 | |
248 | static int |
249 | _io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self, |
250 | PyObject *decoder, int translate, |
251 | PyObject *errors) |
252 | /*[clinic end generated code: output=fbd04d443e764ec2 input=89db6b19c6b126bf]*/ |
253 | { |
254 | self->decoder = decoder; |
255 | Py_INCREF(decoder); |
256 | |
257 | if (errors == NULL) { |
258 | self->errors = _PyUnicode_FromId(&PyId_strict); |
259 | if (self->errors == NULL) |
260 | return -1; |
261 | } |
262 | else { |
263 | self->errors = errors; |
264 | } |
265 | Py_INCREF(self->errors); |
266 | |
267 | self->translate = translate ? 1 : 0; |
268 | self->seennl = 0; |
269 | self->pendingcr = 0; |
270 | |
271 | return 0; |
272 | } |
273 | |
274 | static void |
275 | incrementalnewlinedecoder_dealloc(nldecoder_object *self) |
276 | { |
277 | Py_CLEAR(self->decoder); |
278 | Py_CLEAR(self->errors); |
279 | Py_TYPE(self)->tp_free((PyObject *)self); |
280 | } |
281 | |
282 | static int |
283 | check_decoded(PyObject *decoded) |
284 | { |
285 | if (decoded == NULL) |
286 | return -1; |
287 | if (!PyUnicode_Check(decoded)) { |
288 | PyErr_Format(PyExc_TypeError, |
289 | "decoder should return a string result, not '%.200s'" , |
290 | Py_TYPE(decoded)->tp_name); |
291 | Py_DECREF(decoded); |
292 | return -1; |
293 | } |
294 | if (PyUnicode_READY(decoded) < 0) { |
295 | Py_DECREF(decoded); |
296 | return -1; |
297 | } |
298 | return 0; |
299 | } |
300 | |
301 | #define SEEN_CR 1 |
302 | #define SEEN_LF 2 |
303 | #define SEEN_CRLF 4 |
304 | #define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF) |
305 | |
306 | PyObject * |
307 | _PyIncrementalNewlineDecoder_decode(PyObject *myself, |
308 | PyObject *input, int final) |
309 | { |
310 | PyObject *output; |
311 | Py_ssize_t output_len; |
312 | nldecoder_object *self = (nldecoder_object *) myself; |
313 | |
314 | if (self->decoder == NULL) { |
315 | PyErr_SetString(PyExc_ValueError, |
316 | "IncrementalNewlineDecoder.__init__ not called" ); |
317 | return NULL; |
318 | } |
319 | |
320 | /* decode input (with the eventual \r from a previous pass) */ |
321 | if (self->decoder != Py_None) { |
322 | output = PyObject_CallMethodObjArgs(self->decoder, |
323 | _PyIO_str_decode, input, final ? Py_True : Py_False, NULL); |
324 | } |
325 | else { |
326 | output = input; |
327 | Py_INCREF(output); |
328 | } |
329 | |
330 | if (check_decoded(output) < 0) |
331 | return NULL; |
332 | |
333 | output_len = PyUnicode_GET_LENGTH(output); |
334 | if (self->pendingcr && (final || output_len > 0)) { |
335 | /* Prefix output with CR */ |
336 | int kind; |
337 | PyObject *modified; |
338 | char *out; |
339 | |
340 | modified = PyUnicode_New(output_len + 1, |
341 | PyUnicode_MAX_CHAR_VALUE(output)); |
342 | if (modified == NULL) |
343 | goto error; |
344 | kind = PyUnicode_KIND(modified); |
345 | out = PyUnicode_DATA(modified); |
346 | PyUnicode_WRITE(kind, out, 0, '\r'); |
347 | memcpy(out + kind, PyUnicode_DATA(output), kind * output_len); |
348 | Py_DECREF(output); |
349 | output = modified; /* output remains ready */ |
350 | self->pendingcr = 0; |
351 | output_len++; |
352 | } |
353 | |
354 | /* retain last \r even when not translating data: |
355 | * then readline() is sure to get \r\n in one pass |
356 | */ |
357 | if (!final) { |
358 | if (output_len > 0 |
359 | && PyUnicode_READ_CHAR(output, output_len - 1) == '\r') |
360 | { |
361 | PyObject *modified = PyUnicode_Substring(output, 0, output_len -1); |
362 | if (modified == NULL) |
363 | goto error; |
364 | Py_DECREF(output); |
365 | output = modified; |
366 | self->pendingcr = 1; |
367 | } |
368 | } |
369 | |
370 | /* Record which newlines are read and do newline translation if desired, |
371 | all in one pass. */ |
372 | { |
373 | const void *in_str; |
374 | Py_ssize_t len; |
375 | int seennl = self->seennl; |
376 | int only_lf = 0; |
377 | int kind; |
378 | |
379 | in_str = PyUnicode_DATA(output); |
380 | len = PyUnicode_GET_LENGTH(output); |
381 | kind = PyUnicode_KIND(output); |
382 | |
383 | if (len == 0) |
384 | return output; |
385 | |
386 | /* If, up to now, newlines are consistently \n, do a quick check |
387 | for the \r *byte* with the libc's optimized memchr. |
388 | */ |
389 | if (seennl == SEEN_LF || seennl == 0) { |
390 | only_lf = (memchr(in_str, '\r', kind * len) == NULL); |
391 | } |
392 | |
393 | if (only_lf) { |
394 | /* If not already seen, quick scan for a possible "\n" character. |
395 | (there's nothing else to be done, even when in translation mode) |
396 | */ |
397 | if (seennl == 0 && |
398 | memchr(in_str, '\n', kind * len) != NULL) { |
399 | if (kind == PyUnicode_1BYTE_KIND) |
400 | seennl |= SEEN_LF; |
401 | else { |
402 | Py_ssize_t i = 0; |
403 | for (;;) { |
404 | Py_UCS4 c; |
405 | /* Fast loop for non-control characters */ |
406 | while (PyUnicode_READ(kind, in_str, i) > '\n') |
407 | i++; |
408 | c = PyUnicode_READ(kind, in_str, i++); |
409 | if (c == '\n') { |
410 | seennl |= SEEN_LF; |
411 | break; |
412 | } |
413 | if (i >= len) |
414 | break; |
415 | } |
416 | } |
417 | } |
418 | /* Finished: we have scanned for newlines, and none of them |
419 | need translating */ |
420 | } |
421 | else if (!self->translate) { |
422 | Py_ssize_t i = 0; |
423 | /* We have already seen all newline types, no need to scan again */ |
424 | if (seennl == SEEN_ALL) |
425 | goto endscan; |
426 | for (;;) { |
427 | Py_UCS4 c; |
428 | /* Fast loop for non-control characters */ |
429 | while (PyUnicode_READ(kind, in_str, i) > '\r') |
430 | i++; |
431 | c = PyUnicode_READ(kind, in_str, i++); |
432 | if (c == '\n') |
433 | seennl |= SEEN_LF; |
434 | else if (c == '\r') { |
435 | if (PyUnicode_READ(kind, in_str, i) == '\n') { |
436 | seennl |= SEEN_CRLF; |
437 | i++; |
438 | } |
439 | else |
440 | seennl |= SEEN_CR; |
441 | } |
442 | if (i >= len) |
443 | break; |
444 | if (seennl == SEEN_ALL) |
445 | break; |
446 | } |
447 | endscan: |
448 | ; |
449 | } |
450 | else { |
451 | void *translated; |
452 | int kind = PyUnicode_KIND(output); |
453 | const void *in_str = PyUnicode_DATA(output); |
454 | Py_ssize_t in, out; |
455 | /* XXX: Previous in-place translation here is disabled as |
456 | resizing is not possible anymore */ |
457 | /* We could try to optimize this so that we only do a copy |
458 | when there is something to translate. On the other hand, |
459 | we already know there is a \r byte, so chances are high |
460 | that something needs to be done. */ |
461 | translated = PyMem_Malloc(kind * len); |
462 | if (translated == NULL) { |
463 | PyErr_NoMemory(); |
464 | goto error; |
465 | } |
466 | in = out = 0; |
467 | for (;;) { |
468 | Py_UCS4 c; |
469 | /* Fast loop for non-control characters */ |
470 | while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r') |
471 | PyUnicode_WRITE(kind, translated, out++, c); |
472 | if (c == '\n') { |
473 | PyUnicode_WRITE(kind, translated, out++, c); |
474 | seennl |= SEEN_LF; |
475 | continue; |
476 | } |
477 | if (c == '\r') { |
478 | if (PyUnicode_READ(kind, in_str, in) == '\n') { |
479 | in++; |
480 | seennl |= SEEN_CRLF; |
481 | } |
482 | else |
483 | seennl |= SEEN_CR; |
484 | PyUnicode_WRITE(kind, translated, out++, '\n'); |
485 | continue; |
486 | } |
487 | if (in > len) |
488 | break; |
489 | PyUnicode_WRITE(kind, translated, out++, c); |
490 | } |
491 | Py_DECREF(output); |
492 | output = PyUnicode_FromKindAndData(kind, translated, out); |
493 | PyMem_Free(translated); |
494 | if (!output) |
495 | return NULL; |
496 | } |
497 | self->seennl |= seennl; |
498 | } |
499 | |
500 | return output; |
501 | |
502 | error: |
503 | Py_DECREF(output); |
504 | return NULL; |
505 | } |
506 | |
507 | /*[clinic input] |
508 | _io.IncrementalNewlineDecoder.decode |
509 | input: object |
510 | final: bool(accept={int}) = False |
511 | [clinic start generated code]*/ |
512 | |
513 | static PyObject * |
514 | _io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self, |
515 | PyObject *input, int final) |
516 | /*[clinic end generated code: output=0d486755bb37a66e input=a4ea97f26372d866]*/ |
517 | { |
518 | return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final); |
519 | } |
520 | |
521 | /*[clinic input] |
522 | _io.IncrementalNewlineDecoder.getstate |
523 | [clinic start generated code]*/ |
524 | |
525 | static PyObject * |
526 | _io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self) |
527 | /*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/ |
528 | { |
529 | PyObject *buffer; |
530 | unsigned long long flag; |
531 | |
532 | if (self->decoder != Py_None) { |
533 | PyObject *state = PyObject_CallMethodNoArgs(self->decoder, |
534 | _PyIO_str_getstate); |
535 | if (state == NULL) |
536 | return NULL; |
537 | if (!PyTuple_Check(state)) { |
538 | PyErr_SetString(PyExc_TypeError, |
539 | "illegal decoder state" ); |
540 | Py_DECREF(state); |
541 | return NULL; |
542 | } |
543 | if (!PyArg_ParseTuple(state, "OK;illegal decoder state" , |
544 | &buffer, &flag)) |
545 | { |
546 | Py_DECREF(state); |
547 | return NULL; |
548 | } |
549 | Py_INCREF(buffer); |
550 | Py_DECREF(state); |
551 | } |
552 | else { |
553 | buffer = PyBytes_FromString("" ); |
554 | flag = 0; |
555 | } |
556 | flag <<= 1; |
557 | if (self->pendingcr) |
558 | flag |= 1; |
559 | return Py_BuildValue("NK" , buffer, flag); |
560 | } |
561 | |
562 | /*[clinic input] |
563 | _io.IncrementalNewlineDecoder.setstate |
564 | state: object |
565 | / |
566 | [clinic start generated code]*/ |
567 | |
568 | static PyObject * |
569 | _io_IncrementalNewlineDecoder_setstate(nldecoder_object *self, |
570 | PyObject *state) |
571 | /*[clinic end generated code: output=c10c622508b576cb input=c53fb505a76dbbe2]*/ |
572 | { |
573 | PyObject *buffer; |
574 | unsigned long long flag; |
575 | |
576 | if (!PyTuple_Check(state)) { |
577 | PyErr_SetString(PyExc_TypeError, "state argument must be a tuple" ); |
578 | return NULL; |
579 | } |
580 | if (!PyArg_ParseTuple(state, "OK;setstate(): illegal state argument" , |
581 | &buffer, &flag)) |
582 | { |
583 | return NULL; |
584 | } |
585 | |
586 | self->pendingcr = (int) (flag & 1); |
587 | flag >>= 1; |
588 | |
589 | if (self->decoder != Py_None) |
590 | return _PyObject_CallMethodId(self->decoder, |
591 | &PyId_setstate, "((OK))" , buffer, flag); |
592 | else |
593 | Py_RETURN_NONE; |
594 | } |
595 | |
596 | /*[clinic input] |
597 | _io.IncrementalNewlineDecoder.reset |
598 | [clinic start generated code]*/ |
599 | |
600 | static PyObject * |
601 | _io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self) |
602 | /*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/ |
603 | { |
604 | self->seennl = 0; |
605 | self->pendingcr = 0; |
606 | if (self->decoder != Py_None) |
607 | return PyObject_CallMethodNoArgs(self->decoder, _PyIO_str_reset); |
608 | else |
609 | Py_RETURN_NONE; |
610 | } |
611 | |
612 | static PyObject * |
613 | incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context) |
614 | { |
615 | switch (self->seennl) { |
616 | case SEEN_CR: |
617 | return PyUnicode_FromString("\r" ); |
618 | case SEEN_LF: |
619 | return PyUnicode_FromString("\n" ); |
620 | case SEEN_CRLF: |
621 | return PyUnicode_FromString("\r\n" ); |
622 | case SEEN_CR | SEEN_LF: |
623 | return Py_BuildValue("ss" , "\r" , "\n" ); |
624 | case SEEN_CR | SEEN_CRLF: |
625 | return Py_BuildValue("ss" , "\r" , "\r\n" ); |
626 | case SEEN_LF | SEEN_CRLF: |
627 | return Py_BuildValue("ss" , "\n" , "\r\n" ); |
628 | case SEEN_CR | SEEN_LF | SEEN_CRLF: |
629 | return Py_BuildValue("sss" , "\r" , "\n" , "\r\n" ); |
630 | default: |
631 | Py_RETURN_NONE; |
632 | } |
633 | |
634 | } |
635 | |
636 | /* TextIOWrapper */ |
637 | |
638 | typedef PyObject * |
639 | (*encodefunc_t)(PyObject *, PyObject *); |
640 | |
641 | typedef struct |
642 | { |
643 | PyObject_HEAD |
644 | int ok; /* initialized? */ |
645 | int detached; |
646 | Py_ssize_t chunk_size; |
647 | PyObject *buffer; |
648 | PyObject *encoding; |
649 | PyObject *encoder; |
650 | PyObject *decoder; |
651 | PyObject *readnl; |
652 | PyObject *errors; |
653 | const char *writenl; /* ASCII-encoded; NULL stands for \n */ |
654 | char line_buffering; |
655 | char write_through; |
656 | char readuniversal; |
657 | char readtranslate; |
658 | char writetranslate; |
659 | char seekable; |
660 | char has_read1; |
661 | char telling; |
662 | char finalizing; |
663 | /* Specialized encoding func (see below) */ |
664 | encodefunc_t encodefunc; |
665 | /* Whether or not it's the start of the stream */ |
666 | char encoding_start_of_stream; |
667 | |
668 | /* Reads and writes are internally buffered in order to speed things up. |
669 | However, any read will first flush the write buffer if itsn't empty. |
670 | |
671 | Please also note that text to be written is first encoded before being |
672 | buffered. This is necessary so that encoding errors are immediately |
673 | reported to the caller, but it unfortunately means that the |
674 | IncrementalEncoder (whose encode() method is always written in Python) |
675 | becomes a bottleneck for small writes. |
676 | */ |
677 | PyObject *decoded_chars; /* buffer for text returned from decoder */ |
678 | Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */ |
679 | PyObject *pending_bytes; // data waiting to be written. |
680 | // ascii unicode, bytes, or list of them. |
681 | Py_ssize_t pending_bytes_count; |
682 | |
683 | /* snapshot is either NULL, or a tuple (dec_flags, next_input) where |
684 | * dec_flags is the second (integer) item of the decoder state and |
685 | * next_input is the chunk of input bytes that comes next after the |
686 | * snapshot point. We use this to reconstruct decoder states in tell(). |
687 | */ |
688 | PyObject *snapshot; |
689 | /* Bytes-to-characters ratio for the current chunk. Serves as input for |
690 | the heuristic in tell(). */ |
691 | double b2cratio; |
692 | |
693 | /* Cache raw object if it's a FileIO object */ |
694 | PyObject *raw; |
695 | |
696 | PyObject *weakreflist; |
697 | PyObject *dict; |
698 | } textio; |
699 | |
700 | static void |
701 | textiowrapper_set_decoded_chars(textio *self, PyObject *chars); |
702 | |
703 | /* A couple of specialized cases in order to bypass the slow incremental |
704 | encoding methods for the most popular encodings. */ |
705 | |
706 | static PyObject * |
707 | ascii_encode(textio *self, PyObject *text) |
708 | { |
709 | return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors)); |
710 | } |
711 | |
712 | static PyObject * |
713 | utf16be_encode(textio *self, PyObject *text) |
714 | { |
715 | return _PyUnicode_EncodeUTF16(text, |
716 | PyUnicode_AsUTF8(self->errors), 1); |
717 | } |
718 | |
719 | static PyObject * |
720 | utf16le_encode(textio *self, PyObject *text) |
721 | { |
722 | return _PyUnicode_EncodeUTF16(text, |
723 | PyUnicode_AsUTF8(self->errors), -1); |
724 | } |
725 | |
726 | static PyObject * |
727 | utf16_encode(textio *self, PyObject *text) |
728 | { |
729 | if (!self->encoding_start_of_stream) { |
730 | /* Skip the BOM and use native byte ordering */ |
731 | #if PY_BIG_ENDIAN |
732 | return utf16be_encode(self, text); |
733 | #else |
734 | return utf16le_encode(self, text); |
735 | #endif |
736 | } |
737 | return _PyUnicode_EncodeUTF16(text, |
738 | PyUnicode_AsUTF8(self->errors), 0); |
739 | } |
740 | |
741 | static PyObject * |
742 | utf32be_encode(textio *self, PyObject *text) |
743 | { |
744 | return _PyUnicode_EncodeUTF32(text, |
745 | PyUnicode_AsUTF8(self->errors), 1); |
746 | } |
747 | |
748 | static PyObject * |
749 | utf32le_encode(textio *self, PyObject *text) |
750 | { |
751 | return _PyUnicode_EncodeUTF32(text, |
752 | PyUnicode_AsUTF8(self->errors), -1); |
753 | } |
754 | |
755 | static PyObject * |
756 | utf32_encode(textio *self, PyObject *text) |
757 | { |
758 | if (!self->encoding_start_of_stream) { |
759 | /* Skip the BOM and use native byte ordering */ |
760 | #if PY_BIG_ENDIAN |
761 | return utf32be_encode(self, text); |
762 | #else |
763 | return utf32le_encode(self, text); |
764 | #endif |
765 | } |
766 | return _PyUnicode_EncodeUTF32(text, |
767 | PyUnicode_AsUTF8(self->errors), 0); |
768 | } |
769 | |
770 | static PyObject * |
771 | utf8_encode(textio *self, PyObject *text) |
772 | { |
773 | return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors)); |
774 | } |
775 | |
776 | static PyObject * |
777 | latin1_encode(textio *self, PyObject *text) |
778 | { |
779 | return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors)); |
780 | } |
781 | |
782 | // Return true when encoding can be skipped when text is ascii. |
783 | static inline int |
784 | is_asciicompat_encoding(encodefunc_t f) |
785 | { |
786 | return f == (encodefunc_t) ascii_encode |
787 | || f == (encodefunc_t) latin1_encode |
788 | || f == (encodefunc_t) utf8_encode; |
789 | } |
790 | |
791 | /* Map normalized encoding names onto the specialized encoding funcs */ |
792 | |
793 | typedef struct { |
794 | const char *name; |
795 | encodefunc_t encodefunc; |
796 | } encodefuncentry; |
797 | |
798 | static const encodefuncentry encodefuncs[] = { |
799 | {"ascii" , (encodefunc_t) ascii_encode}, |
800 | {"iso8859-1" , (encodefunc_t) latin1_encode}, |
801 | {"utf-8" , (encodefunc_t) utf8_encode}, |
802 | {"utf-16-be" , (encodefunc_t) utf16be_encode}, |
803 | {"utf-16-le" , (encodefunc_t) utf16le_encode}, |
804 | {"utf-16" , (encodefunc_t) utf16_encode}, |
805 | {"utf-32-be" , (encodefunc_t) utf32be_encode}, |
806 | {"utf-32-le" , (encodefunc_t) utf32le_encode}, |
807 | {"utf-32" , (encodefunc_t) utf32_encode}, |
808 | {NULL, NULL} |
809 | }; |
810 | |
811 | static int |
812 | validate_newline(const char *newline) |
813 | { |
814 | if (newline && newline[0] != '\0' |
815 | && !(newline[0] == '\n' && newline[1] == '\0') |
816 | && !(newline[0] == '\r' && newline[1] == '\0') |
817 | && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) { |
818 | PyErr_Format(PyExc_ValueError, |
819 | "illegal newline value: %s" , newline); |
820 | return -1; |
821 | } |
822 | return 0; |
823 | } |
824 | |
825 | static int |
826 | set_newline(textio *self, const char *newline) |
827 | { |
828 | PyObject *old = self->readnl; |
829 | if (newline == NULL) { |
830 | self->readnl = NULL; |
831 | } |
832 | else { |
833 | self->readnl = PyUnicode_FromString(newline); |
834 | if (self->readnl == NULL) { |
835 | self->readnl = old; |
836 | return -1; |
837 | } |
838 | } |
839 | self->readuniversal = (newline == NULL || newline[0] == '\0'); |
840 | self->readtranslate = (newline == NULL); |
841 | self->writetranslate = (newline == NULL || newline[0] != '\0'); |
842 | if (!self->readuniversal && self->readnl != NULL) { |
843 | // validate_newline() accepts only ASCII newlines. |
844 | assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND); |
845 | self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl); |
846 | if (strcmp(self->writenl, "\n" ) == 0) { |
847 | self->writenl = NULL; |
848 | } |
849 | } |
850 | else { |
851 | #ifdef MS_WINDOWS |
852 | self->writenl = "\r\n" ; |
853 | #else |
854 | self->writenl = NULL; |
855 | #endif |
856 | } |
857 | Py_XDECREF(old); |
858 | return 0; |
859 | } |
860 | |
861 | static int |
862 | _textiowrapper_set_decoder(textio *self, PyObject *codec_info, |
863 | const char *errors) |
864 | { |
865 | PyObject *res; |
866 | int r; |
867 | |
868 | res = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_readable); |
869 | if (res == NULL) |
870 | return -1; |
871 | |
872 | r = PyObject_IsTrue(res); |
873 | Py_DECREF(res); |
874 | if (r == -1) |
875 | return -1; |
876 | |
877 | if (r != 1) |
878 | return 0; |
879 | |
880 | Py_CLEAR(self->decoder); |
881 | self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors); |
882 | if (self->decoder == NULL) |
883 | return -1; |
884 | |
885 | if (self->readuniversal) { |
886 | PyObject *incrementalDecoder = PyObject_CallFunctionObjArgs( |
887 | (PyObject *)&PyIncrementalNewlineDecoder_Type, |
888 | self->decoder, self->readtranslate ? Py_True : Py_False, NULL); |
889 | if (incrementalDecoder == NULL) |
890 | return -1; |
891 | Py_CLEAR(self->decoder); |
892 | self->decoder = incrementalDecoder; |
893 | } |
894 | |
895 | return 0; |
896 | } |
897 | |
898 | static PyObject* |
899 | _textiowrapper_decode(PyObject *decoder, PyObject *bytes, int eof) |
900 | { |
901 | PyObject *chars; |
902 | |
903 | if (Py_IS_TYPE(decoder, &PyIncrementalNewlineDecoder_Type)) |
904 | chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof); |
905 | else |
906 | chars = PyObject_CallMethodObjArgs(decoder, _PyIO_str_decode, bytes, |
907 | eof ? Py_True : Py_False, NULL); |
908 | |
909 | if (check_decoded(chars) < 0) |
910 | // check_decoded already decreases refcount |
911 | return NULL; |
912 | |
913 | return chars; |
914 | } |
915 | |
916 | static int |
917 | _textiowrapper_set_encoder(textio *self, PyObject *codec_info, |
918 | const char *errors) |
919 | { |
920 | PyObject *res; |
921 | int r; |
922 | |
923 | res = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_writable); |
924 | if (res == NULL) |
925 | return -1; |
926 | |
927 | r = PyObject_IsTrue(res); |
928 | Py_DECREF(res); |
929 | if (r == -1) |
930 | return -1; |
931 | |
932 | if (r != 1) |
933 | return 0; |
934 | |
935 | Py_CLEAR(self->encoder); |
936 | self->encodefunc = NULL; |
937 | self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors); |
938 | if (self->encoder == NULL) |
939 | return -1; |
940 | |
941 | /* Get the normalized named of the codec */ |
942 | if (_PyObject_LookupAttrId(codec_info, &PyId_name, &res) < 0) { |
943 | return -1; |
944 | } |
945 | if (res != NULL && PyUnicode_Check(res)) { |
946 | const encodefuncentry *e = encodefuncs; |
947 | while (e->name != NULL) { |
948 | if (_PyUnicode_EqualToASCIIString(res, e->name)) { |
949 | self->encodefunc = e->encodefunc; |
950 | break; |
951 | } |
952 | e++; |
953 | } |
954 | } |
955 | Py_XDECREF(res); |
956 | |
957 | return 0; |
958 | } |
959 | |
960 | static int |
961 | _textiowrapper_fix_encoder_state(textio *self) |
962 | { |
963 | if (!self->seekable || !self->encoder) { |
964 | return 0; |
965 | } |
966 | |
967 | self->encoding_start_of_stream = 1; |
968 | |
969 | PyObject *cookieObj = PyObject_CallMethodNoArgs( |
970 | self->buffer, _PyIO_str_tell); |
971 | if (cookieObj == NULL) { |
972 | return -1; |
973 | } |
974 | |
975 | int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_GetZero(), Py_EQ); |
976 | Py_DECREF(cookieObj); |
977 | if (cmp < 0) { |
978 | return -1; |
979 | } |
980 | |
981 | if (cmp == 0) { |
982 | self->encoding_start_of_stream = 0; |
983 | PyObject *res = PyObject_CallMethodOneArg( |
984 | self->encoder, _PyIO_str_setstate, _PyLong_GetZero()); |
985 | if (res == NULL) { |
986 | return -1; |
987 | } |
988 | Py_DECREF(res); |
989 | } |
990 | |
991 | return 0; |
992 | } |
993 | |
994 | static int |
995 | io_check_errors(PyObject *errors) |
996 | { |
997 | assert(errors != NULL && errors != Py_None); |
998 | |
999 | PyInterpreterState *interp = _PyInterpreterState_GET(); |
1000 | #ifndef Py_DEBUG |
1001 | /* In release mode, only check in development mode (-X dev) */ |
1002 | if (!_PyInterpreterState_GetConfig(interp)->dev_mode) { |
1003 | return 0; |
1004 | } |
1005 | #else |
1006 | /* Always check in debug mode */ |
1007 | #endif |
1008 | |
1009 | /* Avoid calling PyCodec_LookupError() before the codec registry is ready: |
1010 | before_PyUnicode_InitEncodings() is called. */ |
1011 | if (!interp->unicode.fs_codec.encoding) { |
1012 | return 0; |
1013 | } |
1014 | |
1015 | Py_ssize_t name_length; |
1016 | const char *name = PyUnicode_AsUTF8AndSize(errors, &name_length); |
1017 | if (name == NULL) { |
1018 | return -1; |
1019 | } |
1020 | if (strlen(name) != (size_t)name_length) { |
1021 | PyErr_SetString(PyExc_ValueError, "embedded null character in errors" ); |
1022 | return -1; |
1023 | } |
1024 | PyObject *handler = PyCodec_LookupError(name); |
1025 | if (handler != NULL) { |
1026 | Py_DECREF(handler); |
1027 | return 0; |
1028 | } |
1029 | return -1; |
1030 | } |
1031 | |
1032 | |
1033 | |
1034 | /*[clinic input] |
1035 | _io.TextIOWrapper.__init__ |
1036 | buffer: object |
1037 | encoding: str(accept={str, NoneType}) = None |
1038 | errors: object = None |
1039 | newline: str(accept={str, NoneType}) = None |
1040 | line_buffering: bool(accept={int}) = False |
1041 | write_through: bool(accept={int}) = False |
1042 | |
1043 | Character and line based layer over a BufferedIOBase object, buffer. |
1044 | |
1045 | encoding gives the name of the encoding that the stream will be |
1046 | decoded or encoded with. It defaults to locale.getpreferredencoding(False). |
1047 | |
1048 | errors determines the strictness of encoding and decoding (see |
1049 | help(codecs.Codec) or the documentation for codecs.register) and |
1050 | defaults to "strict". |
1051 | |
1052 | newline controls how line endings are handled. It can be None, '', |
1053 | '\n', '\r', and '\r\n'. It works as follows: |
1054 | |
1055 | * On input, if newline is None, universal newlines mode is |
1056 | enabled. Lines in the input can end in '\n', '\r', or '\r\n', and |
1057 | these are translated into '\n' before being returned to the |
1058 | caller. If it is '', universal newline mode is enabled, but line |
1059 | endings are returned to the caller untranslated. If it has any of |
1060 | the other legal values, input lines are only terminated by the given |
1061 | string, and the line ending is returned to the caller untranslated. |
1062 | |
1063 | * On output, if newline is None, any '\n' characters written are |
1064 | translated to the system default line separator, os.linesep. If |
1065 | newline is '' or '\n', no translation takes place. If newline is any |
1066 | of the other legal values, any '\n' characters written are translated |
1067 | to the given string. |
1068 | |
1069 | If line_buffering is True, a call to flush is implied when a call to |
1070 | write contains a newline character. |
1071 | [clinic start generated code]*/ |
1072 | |
1073 | static int |
1074 | _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer, |
1075 | const char *encoding, PyObject *errors, |
1076 | const char *newline, int line_buffering, |
1077 | int write_through) |
1078 | /*[clinic end generated code: output=72267c0c01032ed2 input=77d8696d1a1f460b]*/ |
1079 | { |
1080 | PyObject *raw, *codec_info = NULL; |
1081 | _PyIO_State *state = NULL; |
1082 | PyObject *res; |
1083 | int r; |
1084 | |
1085 | self->ok = 0; |
1086 | self->detached = 0; |
1087 | |
1088 | if (encoding == NULL) { |
1089 | PyInterpreterState *interp = _PyInterpreterState_GET(); |
1090 | if (_PyInterpreterState_GetConfig(interp)->warn_default_encoding) { |
1091 | if (PyErr_WarnEx(PyExc_EncodingWarning, |
1092 | "'encoding' argument not specified" , 1)) { |
1093 | return -1; |
1094 | } |
1095 | } |
1096 | } |
1097 | else if (strcmp(encoding, "locale" ) == 0) { |
1098 | encoding = NULL; |
1099 | } |
1100 | |
1101 | if (errors == Py_None) { |
1102 | errors = _PyUnicode_FromId(&PyId_strict); /* borrowed */ |
1103 | if (errors == NULL) { |
1104 | return -1; |
1105 | } |
1106 | } |
1107 | else if (!PyUnicode_Check(errors)) { |
1108 | // Check 'errors' argument here because Argument Clinic doesn't support |
1109 | // 'str(accept={str, NoneType})' converter. |
1110 | PyErr_Format( |
1111 | PyExc_TypeError, |
1112 | "TextIOWrapper() argument 'errors' must be str or None, not %.50s" , |
1113 | Py_TYPE(errors)->tp_name); |
1114 | return -1; |
1115 | } |
1116 | else if (io_check_errors(errors)) { |
1117 | return -1; |
1118 | } |
1119 | |
1120 | if (validate_newline(newline) < 0) { |
1121 | return -1; |
1122 | } |
1123 | |
1124 | Py_CLEAR(self->buffer); |
1125 | Py_CLEAR(self->encoding); |
1126 | Py_CLEAR(self->encoder); |
1127 | Py_CLEAR(self->decoder); |
1128 | Py_CLEAR(self->readnl); |
1129 | Py_CLEAR(self->decoded_chars); |
1130 | Py_CLEAR(self->pending_bytes); |
1131 | Py_CLEAR(self->snapshot); |
1132 | Py_CLEAR(self->errors); |
1133 | Py_CLEAR(self->raw); |
1134 | self->decoded_chars_used = 0; |
1135 | self->pending_bytes_count = 0; |
1136 | self->encodefunc = NULL; |
1137 | self->b2cratio = 0.0; |
1138 | |
1139 | if (encoding == NULL) { |
1140 | /* Try os.device_encoding(fileno) */ |
1141 | PyObject *fileno; |
1142 | state = IO_STATE(); |
1143 | if (state == NULL) |
1144 | goto error; |
1145 | fileno = _PyObject_CallMethodIdNoArgs(buffer, &PyId_fileno); |
1146 | /* Ignore only AttributeError and UnsupportedOperation */ |
1147 | if (fileno == NULL) { |
1148 | if (PyErr_ExceptionMatches(PyExc_AttributeError) || |
1149 | PyErr_ExceptionMatches(state->unsupported_operation)) { |
1150 | PyErr_Clear(); |
1151 | } |
1152 | else { |
1153 | goto error; |
1154 | } |
1155 | } |
1156 | else { |
1157 | int fd = _PyLong_AsInt(fileno); |
1158 | Py_DECREF(fileno); |
1159 | if (fd == -1 && PyErr_Occurred()) { |
1160 | goto error; |
1161 | } |
1162 | |
1163 | self->encoding = _Py_device_encoding(fd); |
1164 | if (self->encoding == NULL) |
1165 | goto error; |
1166 | else if (!PyUnicode_Check(self->encoding)) |
1167 | Py_CLEAR(self->encoding); |
1168 | } |
1169 | } |
1170 | if (encoding == NULL && self->encoding == NULL) { |
1171 | self->encoding = _Py_GetLocaleEncodingObject(); |
1172 | if (self->encoding == NULL) { |
1173 | goto error; |
1174 | } |
1175 | assert(PyUnicode_Check(self->encoding)); |
1176 | } |
1177 | if (self->encoding != NULL) { |
1178 | encoding = PyUnicode_AsUTF8(self->encoding); |
1179 | if (encoding == NULL) |
1180 | goto error; |
1181 | } |
1182 | else if (encoding != NULL) { |
1183 | self->encoding = PyUnicode_FromString(encoding); |
1184 | if (self->encoding == NULL) |
1185 | goto error; |
1186 | } |
1187 | else { |
1188 | PyErr_SetString(PyExc_OSError, |
1189 | "could not determine default encoding" ); |
1190 | goto error; |
1191 | } |
1192 | |
1193 | /* Check we have been asked for a real text encoding */ |
1194 | codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()" ); |
1195 | if (codec_info == NULL) { |
1196 | Py_CLEAR(self->encoding); |
1197 | goto error; |
1198 | } |
1199 | |
1200 | /* XXX: Failures beyond this point have the potential to leak elements |
1201 | * of the partially constructed object (like self->encoding) |
1202 | */ |
1203 | |
1204 | Py_INCREF(errors); |
1205 | self->errors = errors; |
1206 | self->chunk_size = 8192; |
1207 | self->line_buffering = line_buffering; |
1208 | self->write_through = write_through; |
1209 | if (set_newline(self, newline) < 0) { |
1210 | goto error; |
1211 | } |
1212 | |
1213 | self->buffer = buffer; |
1214 | Py_INCREF(buffer); |
1215 | |
1216 | /* Build the decoder object */ |
1217 | if (_textiowrapper_set_decoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0) |
1218 | goto error; |
1219 | |
1220 | /* Build the encoder object */ |
1221 | if (_textiowrapper_set_encoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0) |
1222 | goto error; |
1223 | |
1224 | /* Finished sorting out the codec details */ |
1225 | Py_CLEAR(codec_info); |
1226 | |
1227 | if (Py_IS_TYPE(buffer, &PyBufferedReader_Type) || |
1228 | Py_IS_TYPE(buffer, &PyBufferedWriter_Type) || |
1229 | Py_IS_TYPE(buffer, &PyBufferedRandom_Type)) |
1230 | { |
1231 | if (_PyObject_LookupAttrId(buffer, &PyId_raw, &raw) < 0) |
1232 | goto error; |
1233 | /* Cache the raw FileIO object to speed up 'closed' checks */ |
1234 | if (raw != NULL) { |
1235 | if (Py_IS_TYPE(raw, &PyFileIO_Type)) |
1236 | self->raw = raw; |
1237 | else |
1238 | Py_DECREF(raw); |
1239 | } |
1240 | } |
1241 | |
1242 | res = _PyObject_CallMethodIdNoArgs(buffer, &PyId_seekable); |
1243 | if (res == NULL) |
1244 | goto error; |
1245 | r = PyObject_IsTrue(res); |
1246 | Py_DECREF(res); |
1247 | if (r < 0) |
1248 | goto error; |
1249 | self->seekable = self->telling = r; |
1250 | |
1251 | r = _PyObject_LookupAttr(buffer, _PyIO_str_read1, &res); |
1252 | if (r < 0) { |
1253 | goto error; |
1254 | } |
1255 | Py_XDECREF(res); |
1256 | self->has_read1 = r; |
1257 | |
1258 | self->encoding_start_of_stream = 0; |
1259 | if (_textiowrapper_fix_encoder_state(self) < 0) { |
1260 | goto error; |
1261 | } |
1262 | |
1263 | self->ok = 1; |
1264 | return 0; |
1265 | |
1266 | error: |
1267 | Py_XDECREF(codec_info); |
1268 | return -1; |
1269 | } |
1270 | |
1271 | /* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true, |
1272 | * -1 on error. |
1273 | */ |
1274 | static int |
1275 | convert_optional_bool(PyObject *obj, int default_value) |
1276 | { |
1277 | long v; |
1278 | if (obj == Py_None) { |
1279 | v = default_value; |
1280 | } |
1281 | else { |
1282 | v = PyLong_AsLong(obj); |
1283 | if (v == -1 && PyErr_Occurred()) |
1284 | return -1; |
1285 | } |
1286 | return v != 0; |
1287 | } |
1288 | |
1289 | static int |
1290 | textiowrapper_change_encoding(textio *self, PyObject *encoding, |
1291 | PyObject *errors, int newline_changed) |
1292 | { |
1293 | /* Use existing settings where new settings are not specified */ |
1294 | if (encoding == Py_None && errors == Py_None && !newline_changed) { |
1295 | return 0; // no change |
1296 | } |
1297 | |
1298 | if (encoding == Py_None) { |
1299 | encoding = self->encoding; |
1300 | if (errors == Py_None) { |
1301 | errors = self->errors; |
1302 | } |
1303 | } |
1304 | else if (errors == Py_None) { |
1305 | errors = _PyUnicode_FromId(&PyId_strict); |
1306 | if (errors == NULL) { |
1307 | return -1; |
1308 | } |
1309 | } |
1310 | |
1311 | const char *c_errors = PyUnicode_AsUTF8(errors); |
1312 | if (c_errors == NULL) { |
1313 | return -1; |
1314 | } |
1315 | |
1316 | // Create new encoder & decoder |
1317 | PyObject *codec_info = _PyCodec_LookupTextEncoding( |
1318 | PyUnicode_AsUTF8(encoding), "codecs.open()" ); |
1319 | if (codec_info == NULL) { |
1320 | return -1; |
1321 | } |
1322 | if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 || |
1323 | _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) { |
1324 | Py_DECREF(codec_info); |
1325 | return -1; |
1326 | } |
1327 | Py_DECREF(codec_info); |
1328 | |
1329 | Py_INCREF(encoding); |
1330 | Py_INCREF(errors); |
1331 | Py_SETREF(self->encoding, encoding); |
1332 | Py_SETREF(self->errors, errors); |
1333 | |
1334 | return _textiowrapper_fix_encoder_state(self); |
1335 | } |
1336 | |
1337 | /*[clinic input] |
1338 | _io.TextIOWrapper.reconfigure |
1339 | * |
1340 | encoding: object = None |
1341 | errors: object = None |
1342 | newline as newline_obj: object(c_default="NULL") = None |
1343 | line_buffering as line_buffering_obj: object = None |
1344 | write_through as write_through_obj: object = None |
1345 | |
1346 | Reconfigure the text stream with new parameters. |
1347 | |
1348 | This also does an implicit stream flush. |
1349 | |
1350 | [clinic start generated code]*/ |
1351 | |
1352 | static PyObject * |
1353 | _io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding, |
1354 | PyObject *errors, PyObject *newline_obj, |
1355 | PyObject *line_buffering_obj, |
1356 | PyObject *write_through_obj) |
1357 | /*[clinic end generated code: output=52b812ff4b3d4b0f input=671e82136e0f5822]*/ |
1358 | { |
1359 | int line_buffering; |
1360 | int write_through; |
1361 | const char *newline = NULL; |
1362 | |
1363 | /* Check if something is in the read buffer */ |
1364 | if (self->decoded_chars != NULL) { |
1365 | if (encoding != Py_None || errors != Py_None || newline_obj != NULL) { |
1366 | _unsupported("It is not possible to set the encoding or newline " |
1367 | "of stream after the first read" ); |
1368 | return NULL; |
1369 | } |
1370 | } |
1371 | |
1372 | if (newline_obj != NULL && newline_obj != Py_None) { |
1373 | newline = PyUnicode_AsUTF8(newline_obj); |
1374 | if (newline == NULL || validate_newline(newline) < 0) { |
1375 | return NULL; |
1376 | } |
1377 | } |
1378 | |
1379 | line_buffering = convert_optional_bool(line_buffering_obj, |
1380 | self->line_buffering); |
1381 | write_through = convert_optional_bool(write_through_obj, |
1382 | self->write_through); |
1383 | if (line_buffering < 0 || write_through < 0) { |
1384 | return NULL; |
1385 | } |
1386 | |
1387 | PyObject *res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush); |
1388 | if (res == NULL) { |
1389 | return NULL; |
1390 | } |
1391 | Py_DECREF(res); |
1392 | self->b2cratio = 0; |
1393 | |
1394 | if (newline_obj != NULL && set_newline(self, newline) < 0) { |
1395 | return NULL; |
1396 | } |
1397 | |
1398 | if (textiowrapper_change_encoding( |
1399 | self, encoding, errors, newline_obj != NULL) < 0) { |
1400 | return NULL; |
1401 | } |
1402 | |
1403 | self->line_buffering = line_buffering; |
1404 | self->write_through = write_through; |
1405 | Py_RETURN_NONE; |
1406 | } |
1407 | |
1408 | static int |
1409 | textiowrapper_clear(textio *self) |
1410 | { |
1411 | self->ok = 0; |
1412 | Py_CLEAR(self->buffer); |
1413 | Py_CLEAR(self->encoding); |
1414 | Py_CLEAR(self->encoder); |
1415 | Py_CLEAR(self->decoder); |
1416 | Py_CLEAR(self->readnl); |
1417 | Py_CLEAR(self->decoded_chars); |
1418 | Py_CLEAR(self->pending_bytes); |
1419 | Py_CLEAR(self->snapshot); |
1420 | Py_CLEAR(self->errors); |
1421 | Py_CLEAR(self->raw); |
1422 | |
1423 | Py_CLEAR(self->dict); |
1424 | return 0; |
1425 | } |
1426 | |
1427 | static void |
1428 | textiowrapper_dealloc(textio *self) |
1429 | { |
1430 | self->finalizing = 1; |
1431 | if (_PyIOBase_finalize((PyObject *) self) < 0) |
1432 | return; |
1433 | self->ok = 0; |
1434 | _PyObject_GC_UNTRACK(self); |
1435 | if (self->weakreflist != NULL) |
1436 | PyObject_ClearWeakRefs((PyObject *)self); |
1437 | textiowrapper_clear(self); |
1438 | Py_TYPE(self)->tp_free((PyObject *)self); |
1439 | } |
1440 | |
1441 | static int |
1442 | textiowrapper_traverse(textio *self, visitproc visit, void *arg) |
1443 | { |
1444 | Py_VISIT(self->buffer); |
1445 | Py_VISIT(self->encoding); |
1446 | Py_VISIT(self->encoder); |
1447 | Py_VISIT(self->decoder); |
1448 | Py_VISIT(self->readnl); |
1449 | Py_VISIT(self->decoded_chars); |
1450 | Py_VISIT(self->pending_bytes); |
1451 | Py_VISIT(self->snapshot); |
1452 | Py_VISIT(self->errors); |
1453 | Py_VISIT(self->raw); |
1454 | |
1455 | Py_VISIT(self->dict); |
1456 | return 0; |
1457 | } |
1458 | |
1459 | static PyObject * |
1460 | textiowrapper_closed_get(textio *self, void *context); |
1461 | |
1462 | /* This macro takes some shortcuts to make the common case faster. */ |
1463 | #define CHECK_CLOSED(self) \ |
1464 | do { \ |
1465 | int r; \ |
1466 | PyObject *_res; \ |
1467 | if (Py_IS_TYPE(self, &PyTextIOWrapper_Type)) { \ |
1468 | if (self->raw != NULL) \ |
1469 | r = _PyFileIO_closed(self->raw); \ |
1470 | else { \ |
1471 | _res = textiowrapper_closed_get(self, NULL); \ |
1472 | if (_res == NULL) \ |
1473 | return NULL; \ |
1474 | r = PyObject_IsTrue(_res); \ |
1475 | Py_DECREF(_res); \ |
1476 | if (r < 0) \ |
1477 | return NULL; \ |
1478 | } \ |
1479 | if (r > 0) { \ |
1480 | PyErr_SetString(PyExc_ValueError, \ |
1481 | "I/O operation on closed file."); \ |
1482 | return NULL; \ |
1483 | } \ |
1484 | } \ |
1485 | else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \ |
1486 | return NULL; \ |
1487 | } while (0) |
1488 | |
1489 | #define CHECK_INITIALIZED(self) \ |
1490 | if (self->ok <= 0) { \ |
1491 | PyErr_SetString(PyExc_ValueError, \ |
1492 | "I/O operation on uninitialized object"); \ |
1493 | return NULL; \ |
1494 | } |
1495 | |
1496 | #define CHECK_ATTACHED(self) \ |
1497 | CHECK_INITIALIZED(self); \ |
1498 | if (self->detached) { \ |
1499 | PyErr_SetString(PyExc_ValueError, \ |
1500 | "underlying buffer has been detached"); \ |
1501 | return NULL; \ |
1502 | } |
1503 | |
1504 | #define CHECK_ATTACHED_INT(self) \ |
1505 | if (self->ok <= 0) { \ |
1506 | PyErr_SetString(PyExc_ValueError, \ |
1507 | "I/O operation on uninitialized object"); \ |
1508 | return -1; \ |
1509 | } else if (self->detached) { \ |
1510 | PyErr_SetString(PyExc_ValueError, \ |
1511 | "underlying buffer has been detached"); \ |
1512 | return -1; \ |
1513 | } |
1514 | |
1515 | |
1516 | /*[clinic input] |
1517 | _io.TextIOWrapper.detach |
1518 | [clinic start generated code]*/ |
1519 | |
1520 | static PyObject * |
1521 | _io_TextIOWrapper_detach_impl(textio *self) |
1522 | /*[clinic end generated code: output=7ba3715cd032d5f2 input=e5a71fbda9e1d9f9]*/ |
1523 | { |
1524 | PyObject *buffer, *res; |
1525 | CHECK_ATTACHED(self); |
1526 | res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush); |
1527 | if (res == NULL) |
1528 | return NULL; |
1529 | Py_DECREF(res); |
1530 | buffer = self->buffer; |
1531 | self->buffer = NULL; |
1532 | self->detached = 1; |
1533 | return buffer; |
1534 | } |
1535 | |
1536 | /* Flush the internal write buffer. This doesn't explicitly flush the |
1537 | underlying buffered object, though. */ |
1538 | static int |
1539 | _textiowrapper_writeflush(textio *self) |
1540 | { |
1541 | if (self->pending_bytes == NULL) |
1542 | return 0; |
1543 | |
1544 | PyObject *pending = self->pending_bytes; |
1545 | PyObject *b; |
1546 | |
1547 | if (PyBytes_Check(pending)) { |
1548 | b = pending; |
1549 | Py_INCREF(b); |
1550 | } |
1551 | else if (PyUnicode_Check(pending)) { |
1552 | assert(PyUnicode_IS_ASCII(pending)); |
1553 | assert(PyUnicode_GET_LENGTH(pending) == self->pending_bytes_count); |
1554 | b = PyBytes_FromStringAndSize( |
1555 | PyUnicode_DATA(pending), PyUnicode_GET_LENGTH(pending)); |
1556 | if (b == NULL) { |
1557 | return -1; |
1558 | } |
1559 | } |
1560 | else { |
1561 | assert(PyList_Check(pending)); |
1562 | b = PyBytes_FromStringAndSize(NULL, self->pending_bytes_count); |
1563 | if (b == NULL) { |
1564 | return -1; |
1565 | } |
1566 | |
1567 | char *buf = PyBytes_AsString(b); |
1568 | Py_ssize_t pos = 0; |
1569 | |
1570 | for (Py_ssize_t i = 0; i < PyList_GET_SIZE(pending); i++) { |
1571 | PyObject *obj = PyList_GET_ITEM(pending, i); |
1572 | char *src; |
1573 | Py_ssize_t len; |
1574 | if (PyUnicode_Check(obj)) { |
1575 | assert(PyUnicode_IS_ASCII(obj)); |
1576 | src = PyUnicode_DATA(obj); |
1577 | len = PyUnicode_GET_LENGTH(obj); |
1578 | } |
1579 | else { |
1580 | assert(PyBytes_Check(obj)); |
1581 | if (PyBytes_AsStringAndSize(obj, &src, &len) < 0) { |
1582 | Py_DECREF(b); |
1583 | return -1; |
1584 | } |
1585 | } |
1586 | memcpy(buf + pos, src, len); |
1587 | pos += len; |
1588 | } |
1589 | assert(pos == self->pending_bytes_count); |
1590 | } |
1591 | |
1592 | self->pending_bytes_count = 0; |
1593 | self->pending_bytes = NULL; |
1594 | Py_DECREF(pending); |
1595 | |
1596 | PyObject *ret; |
1597 | do { |
1598 | ret = PyObject_CallMethodOneArg(self->buffer, _PyIO_str_write, b); |
1599 | } while (ret == NULL && _PyIO_trap_eintr()); |
1600 | Py_DECREF(b); |
1601 | // NOTE: We cleared buffer but we don't know how many bytes are actually written |
1602 | // when an error occurred. |
1603 | if (ret == NULL) |
1604 | return -1; |
1605 | Py_DECREF(ret); |
1606 | return 0; |
1607 | } |
1608 | |
1609 | /*[clinic input] |
1610 | _io.TextIOWrapper.write |
1611 | text: unicode |
1612 | / |
1613 | [clinic start generated code]*/ |
1614 | |
1615 | static PyObject * |
1616 | _io_TextIOWrapper_write_impl(textio *self, PyObject *text) |
1617 | /*[clinic end generated code: output=d2deb0d50771fcec input=fdf19153584a0e44]*/ |
1618 | { |
1619 | PyObject *ret; |
1620 | PyObject *b; |
1621 | Py_ssize_t textlen; |
1622 | int haslf = 0; |
1623 | int needflush = 0, text_needflush = 0; |
1624 | |
1625 | if (PyUnicode_READY(text) == -1) |
1626 | return NULL; |
1627 | |
1628 | CHECK_ATTACHED(self); |
1629 | CHECK_CLOSED(self); |
1630 | |
1631 | if (self->encoder == NULL) |
1632 | return _unsupported("not writable" ); |
1633 | |
1634 | Py_INCREF(text); |
1635 | |
1636 | textlen = PyUnicode_GET_LENGTH(text); |
1637 | |
1638 | if ((self->writetranslate && self->writenl != NULL) || self->line_buffering) |
1639 | if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1) |
1640 | haslf = 1; |
1641 | |
1642 | if (haslf && self->writetranslate && self->writenl != NULL) { |
1643 | PyObject *newtext = _PyObject_CallMethodId( |
1644 | text, &PyId_replace, "ss" , "\n" , self->writenl); |
1645 | Py_DECREF(text); |
1646 | if (newtext == NULL) |
1647 | return NULL; |
1648 | text = newtext; |
1649 | } |
1650 | |
1651 | if (self->write_through) |
1652 | text_needflush = 1; |
1653 | if (self->line_buffering && |
1654 | (haslf || |
1655 | PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1)) |
1656 | needflush = 1; |
1657 | |
1658 | /* XXX What if we were just reading? */ |
1659 | if (self->encodefunc != NULL) { |
1660 | if (PyUnicode_IS_ASCII(text) && |
1661 | // See bpo-43260 |
1662 | PyUnicode_GET_LENGTH(text) <= self->chunk_size && |
1663 | is_asciicompat_encoding(self->encodefunc)) { |
1664 | b = text; |
1665 | Py_INCREF(b); |
1666 | } |
1667 | else { |
1668 | b = (*self->encodefunc)((PyObject *) self, text); |
1669 | } |
1670 | self->encoding_start_of_stream = 0; |
1671 | } |
1672 | else { |
1673 | b = PyObject_CallMethodOneArg(self->encoder, _PyIO_str_encode, text); |
1674 | } |
1675 | |
1676 | Py_DECREF(text); |
1677 | if (b == NULL) |
1678 | return NULL; |
1679 | if (b != text && !PyBytes_Check(b)) { |
1680 | PyErr_Format(PyExc_TypeError, |
1681 | "encoder should return a bytes object, not '%.200s'" , |
1682 | Py_TYPE(b)->tp_name); |
1683 | Py_DECREF(b); |
1684 | return NULL; |
1685 | } |
1686 | |
1687 | Py_ssize_t bytes_len; |
1688 | if (b == text) { |
1689 | bytes_len = PyUnicode_GET_LENGTH(b); |
1690 | } |
1691 | else { |
1692 | bytes_len = PyBytes_GET_SIZE(b); |
1693 | } |
1694 | |
1695 | if (self->pending_bytes == NULL) { |
1696 | self->pending_bytes_count = 0; |
1697 | self->pending_bytes = b; |
1698 | } |
1699 | else if (self->pending_bytes_count + bytes_len > self->chunk_size) { |
1700 | // Prevent to concatenate more than chunk_size data. |
1701 | if (_textiowrapper_writeflush(self) < 0) { |
1702 | Py_DECREF(b); |
1703 | return NULL; |
1704 | } |
1705 | self->pending_bytes = b; |
1706 | } |
1707 | else if (!PyList_CheckExact(self->pending_bytes)) { |
1708 | PyObject *list = PyList_New(2); |
1709 | if (list == NULL) { |
1710 | Py_DECREF(b); |
1711 | return NULL; |
1712 | } |
1713 | PyList_SET_ITEM(list, 0, self->pending_bytes); |
1714 | PyList_SET_ITEM(list, 1, b); |
1715 | self->pending_bytes = list; |
1716 | } |
1717 | else { |
1718 | if (PyList_Append(self->pending_bytes, b) < 0) { |
1719 | Py_DECREF(b); |
1720 | return NULL; |
1721 | } |
1722 | Py_DECREF(b); |
1723 | } |
1724 | |
1725 | self->pending_bytes_count += bytes_len; |
1726 | if (self->pending_bytes_count >= self->chunk_size || needflush || |
1727 | text_needflush) { |
1728 | if (_textiowrapper_writeflush(self) < 0) |
1729 | return NULL; |
1730 | } |
1731 | |
1732 | if (needflush) { |
1733 | ret = PyObject_CallMethodNoArgs(self->buffer, _PyIO_str_flush); |
1734 | if (ret == NULL) |
1735 | return NULL; |
1736 | Py_DECREF(ret); |
1737 | } |
1738 | |
1739 | textiowrapper_set_decoded_chars(self, NULL); |
1740 | Py_CLEAR(self->snapshot); |
1741 | |
1742 | if (self->decoder) { |
1743 | ret = _PyObject_CallMethodIdNoArgs(self->decoder, &PyId_reset); |
1744 | if (ret == NULL) |
1745 | return NULL; |
1746 | Py_DECREF(ret); |
1747 | } |
1748 | |
1749 | return PyLong_FromSsize_t(textlen); |
1750 | } |
1751 | |
1752 | /* Steal a reference to chars and store it in the decoded_char buffer; |
1753 | */ |
1754 | static void |
1755 | textiowrapper_set_decoded_chars(textio *self, PyObject *chars) |
1756 | { |
1757 | Py_XSETREF(self->decoded_chars, chars); |
1758 | self->decoded_chars_used = 0; |
1759 | } |
1760 | |
1761 | static PyObject * |
1762 | textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n) |
1763 | { |
1764 | PyObject *chars; |
1765 | Py_ssize_t avail; |
1766 | |
1767 | if (self->decoded_chars == NULL) |
1768 | return PyUnicode_FromStringAndSize(NULL, 0); |
1769 | |
1770 | /* decoded_chars is guaranteed to be "ready". */ |
1771 | avail = (PyUnicode_GET_LENGTH(self->decoded_chars) |
1772 | - self->decoded_chars_used); |
1773 | |
1774 | assert(avail >= 0); |
1775 | |
1776 | if (n < 0 || n > avail) |
1777 | n = avail; |
1778 | |
1779 | if (self->decoded_chars_used > 0 || n < avail) { |
1780 | chars = PyUnicode_Substring(self->decoded_chars, |
1781 | self->decoded_chars_used, |
1782 | self->decoded_chars_used + n); |
1783 | if (chars == NULL) |
1784 | return NULL; |
1785 | } |
1786 | else { |
1787 | chars = self->decoded_chars; |
1788 | Py_INCREF(chars); |
1789 | } |
1790 | |
1791 | self->decoded_chars_used += n; |
1792 | return chars; |
1793 | } |
1794 | |
1795 | /* Read and decode the next chunk of data from the BufferedReader. |
1796 | */ |
1797 | static int |
1798 | textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint) |
1799 | { |
1800 | PyObject *dec_buffer = NULL; |
1801 | PyObject *dec_flags = NULL; |
1802 | PyObject *input_chunk = NULL; |
1803 | Py_buffer input_chunk_buf; |
1804 | PyObject *decoded_chars, *chunk_size; |
1805 | Py_ssize_t nbytes, nchars; |
1806 | int eof; |
1807 | |
1808 | /* The return value is True unless EOF was reached. The decoded string is |
1809 | * placed in self._decoded_chars (replacing its previous value). The |
1810 | * entire input chunk is sent to the decoder, though some of it may remain |
1811 | * buffered in the decoder, yet to be converted. |
1812 | */ |
1813 | |
1814 | if (self->decoder == NULL) { |
1815 | _unsupported("not readable" ); |
1816 | return -1; |
1817 | } |
1818 | |
1819 | if (self->telling) { |
1820 | /* To prepare for tell(), we need to snapshot a point in the file |
1821 | * where the decoder's input buffer is empty. |
1822 | */ |
1823 | PyObject *state = PyObject_CallMethodNoArgs(self->decoder, |
1824 | _PyIO_str_getstate); |
1825 | if (state == NULL) |
1826 | return -1; |
1827 | /* Given this, we know there was a valid snapshot point |
1828 | * len(dec_buffer) bytes ago with decoder state (b'', dec_flags). |
1829 | */ |
1830 | if (!PyTuple_Check(state)) { |
1831 | PyErr_SetString(PyExc_TypeError, |
1832 | "illegal decoder state" ); |
1833 | Py_DECREF(state); |
1834 | return -1; |
1835 | } |
1836 | if (!PyArg_ParseTuple(state, |
1837 | "OO;illegal decoder state" , &dec_buffer, &dec_flags)) |
1838 | { |
1839 | Py_DECREF(state); |
1840 | return -1; |
1841 | } |
1842 | |
1843 | if (!PyBytes_Check(dec_buffer)) { |
1844 | PyErr_Format(PyExc_TypeError, |
1845 | "illegal decoder state: the first item should be a " |
1846 | "bytes object, not '%.200s'" , |
1847 | Py_TYPE(dec_buffer)->tp_name); |
1848 | Py_DECREF(state); |
1849 | return -1; |
1850 | } |
1851 | Py_INCREF(dec_buffer); |
1852 | Py_INCREF(dec_flags); |
1853 | Py_DECREF(state); |
1854 | } |
1855 | |
1856 | /* Read a chunk, decode it, and put the result in self._decoded_chars. */ |
1857 | if (size_hint > 0) { |
1858 | size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint); |
1859 | } |
1860 | chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint)); |
1861 | if (chunk_size == NULL) |
1862 | goto fail; |
1863 | |
1864 | input_chunk = PyObject_CallMethodOneArg(self->buffer, |
1865 | (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read), |
1866 | chunk_size); |
1867 | Py_DECREF(chunk_size); |
1868 | if (input_chunk == NULL) |
1869 | goto fail; |
1870 | |
1871 | if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) { |
1872 | PyErr_Format(PyExc_TypeError, |
1873 | "underlying %s() should have returned a bytes-like object, " |
1874 | "not '%.200s'" , (self->has_read1 ? "read1" : "read" ), |
1875 | Py_TYPE(input_chunk)->tp_name); |
1876 | goto fail; |
1877 | } |
1878 | |
1879 | nbytes = input_chunk_buf.len; |
1880 | eof = (nbytes == 0); |
1881 | |
1882 | decoded_chars = _textiowrapper_decode(self->decoder, input_chunk, eof); |
1883 | PyBuffer_Release(&input_chunk_buf); |
1884 | if (decoded_chars == NULL) |
1885 | goto fail; |
1886 | |
1887 | textiowrapper_set_decoded_chars(self, decoded_chars); |
1888 | nchars = PyUnicode_GET_LENGTH(decoded_chars); |
1889 | if (nchars > 0) |
1890 | self->b2cratio = (double) nbytes / nchars; |
1891 | else |
1892 | self->b2cratio = 0.0; |
1893 | if (nchars > 0) |
1894 | eof = 0; |
1895 | |
1896 | if (self->telling) { |
1897 | /* At the snapshot point, len(dec_buffer) bytes before the read, the |
1898 | * next input to be decoded is dec_buffer + input_chunk. |
1899 | */ |
1900 | PyObject *next_input = dec_buffer; |
1901 | PyBytes_Concat(&next_input, input_chunk); |
1902 | dec_buffer = NULL; /* Reference lost to PyBytes_Concat */ |
1903 | if (next_input == NULL) { |
1904 | goto fail; |
1905 | } |
1906 | PyObject *snapshot = Py_BuildValue("NN" , dec_flags, next_input); |
1907 | if (snapshot == NULL) { |
1908 | dec_flags = NULL; |
1909 | goto fail; |
1910 | } |
1911 | Py_XSETREF(self->snapshot, snapshot); |
1912 | } |
1913 | Py_DECREF(input_chunk); |
1914 | |
1915 | return (eof == 0); |
1916 | |
1917 | fail: |
1918 | Py_XDECREF(dec_buffer); |
1919 | Py_XDECREF(dec_flags); |
1920 | Py_XDECREF(input_chunk); |
1921 | return -1; |
1922 | } |
1923 | |
1924 | /*[clinic input] |
1925 | _io.TextIOWrapper.read |
1926 | size as n: Py_ssize_t(accept={int, NoneType}) = -1 |
1927 | / |
1928 | [clinic start generated code]*/ |
1929 | |
1930 | static PyObject * |
1931 | _io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n) |
1932 | /*[clinic end generated code: output=7e651ce6cc6a25a6 input=123eecbfe214aeb8]*/ |
1933 | { |
1934 | PyObject *result = NULL, *chunks = NULL; |
1935 | |
1936 | CHECK_ATTACHED(self); |
1937 | CHECK_CLOSED(self); |
1938 | |
1939 | if (self->decoder == NULL) |
1940 | return _unsupported("not readable" ); |
1941 | |
1942 | if (_textiowrapper_writeflush(self) < 0) |
1943 | return NULL; |
1944 | |
1945 | if (n < 0) { |
1946 | /* Read everything */ |
1947 | PyObject *bytes = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_read); |
1948 | PyObject *decoded; |
1949 | if (bytes == NULL) |
1950 | goto fail; |
1951 | |
1952 | if (Py_IS_TYPE(self->decoder, &PyIncrementalNewlineDecoder_Type)) |
1953 | decoded = _PyIncrementalNewlineDecoder_decode(self->decoder, |
1954 | bytes, 1); |
1955 | else |
1956 | decoded = PyObject_CallMethodObjArgs( |
1957 | self->decoder, _PyIO_str_decode, bytes, Py_True, NULL); |
1958 | Py_DECREF(bytes); |
1959 | if (check_decoded(decoded) < 0) |
1960 | goto fail; |
1961 | |
1962 | result = textiowrapper_get_decoded_chars(self, -1); |
1963 | |
1964 | if (result == NULL) { |
1965 | Py_DECREF(decoded); |
1966 | return NULL; |
1967 | } |
1968 | |
1969 | PyUnicode_AppendAndDel(&result, decoded); |
1970 | if (result == NULL) |
1971 | goto fail; |
1972 | |
1973 | textiowrapper_set_decoded_chars(self, NULL); |
1974 | Py_CLEAR(self->snapshot); |
1975 | return result; |
1976 | } |
1977 | else { |
1978 | int res = 1; |
1979 | Py_ssize_t remaining = n; |
1980 | |
1981 | result = textiowrapper_get_decoded_chars(self, n); |
1982 | if (result == NULL) |
1983 | goto fail; |
1984 | if (PyUnicode_READY(result) == -1) |
1985 | goto fail; |
1986 | remaining -= PyUnicode_GET_LENGTH(result); |
1987 | |
1988 | /* Keep reading chunks until we have n characters to return */ |
1989 | while (remaining > 0) { |
1990 | res = textiowrapper_read_chunk(self, remaining); |
1991 | if (res < 0) { |
1992 | /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals() |
1993 | when EINTR occurs so we needn't do it ourselves. */ |
1994 | if (_PyIO_trap_eintr()) { |
1995 | continue; |
1996 | } |
1997 | goto fail; |
1998 | } |
1999 | if (res == 0) /* EOF */ |
2000 | break; |
2001 | if (chunks == NULL) { |
2002 | chunks = PyList_New(0); |
2003 | if (chunks == NULL) |
2004 | goto fail; |
2005 | } |
2006 | if (PyUnicode_GET_LENGTH(result) > 0 && |
2007 | PyList_Append(chunks, result) < 0) |
2008 | goto fail; |
2009 | Py_DECREF(result); |
2010 | result = textiowrapper_get_decoded_chars(self, remaining); |
2011 | if (result == NULL) |
2012 | goto fail; |
2013 | remaining -= PyUnicode_GET_LENGTH(result); |
2014 | } |
2015 | if (chunks != NULL) { |
2016 | if (result != NULL && PyList_Append(chunks, result) < 0) |
2017 | goto fail; |
2018 | Py_XSETREF(result, PyUnicode_Join(_PyIO_empty_str, chunks)); |
2019 | if (result == NULL) |
2020 | goto fail; |
2021 | Py_CLEAR(chunks); |
2022 | } |
2023 | return result; |
2024 | } |
2025 | fail: |
2026 | Py_XDECREF(result); |
2027 | Py_XDECREF(chunks); |
2028 | return NULL; |
2029 | } |
2030 | |
2031 | |
2032 | /* NOTE: `end` must point to the real end of the Py_UCS4 storage, |
2033 | that is to the NUL character. Otherwise the function will produce |
2034 | incorrect results. */ |
2035 | static const char * |
2036 | find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch) |
2037 | { |
2038 | if (kind == PyUnicode_1BYTE_KIND) { |
2039 | assert(ch < 256); |
2040 | return (char *) memchr((const void *) s, (char) ch, end - s); |
2041 | } |
2042 | for (;;) { |
2043 | while (PyUnicode_READ(kind, s, 0) > ch) |
2044 | s += kind; |
2045 | if (PyUnicode_READ(kind, s, 0) == ch) |
2046 | return s; |
2047 | if (s == end) |
2048 | return NULL; |
2049 | s += kind; |
2050 | } |
2051 | } |
2052 | |
2053 | Py_ssize_t |
2054 | _PyIO_find_line_ending( |
2055 | int translated, int universal, PyObject *readnl, |
2056 | int kind, const char *start, const char *end, Py_ssize_t *consumed) |
2057 | { |
2058 | Py_ssize_t len = (end - start)/kind; |
2059 | |
2060 | if (translated) { |
2061 | /* Newlines are already translated, only search for \n */ |
2062 | const char *pos = find_control_char(kind, start, end, '\n'); |
2063 | if (pos != NULL) |
2064 | return (pos - start)/kind + 1; |
2065 | else { |
2066 | *consumed = len; |
2067 | return -1; |
2068 | } |
2069 | } |
2070 | else if (universal) { |
2071 | /* Universal newline search. Find any of \r, \r\n, \n |
2072 | * The decoder ensures that \r\n are not split in two pieces |
2073 | */ |
2074 | const char *s = start; |
2075 | for (;;) { |
2076 | Py_UCS4 ch; |
2077 | /* Fast path for non-control chars. The loop always ends |
2078 | since the Unicode string is NUL-terminated. */ |
2079 | while (PyUnicode_READ(kind, s, 0) > '\r') |
2080 | s += kind; |
2081 | if (s >= end) { |
2082 | *consumed = len; |
2083 | return -1; |
2084 | } |
2085 | ch = PyUnicode_READ(kind, s, 0); |
2086 | s += kind; |
2087 | if (ch == '\n') |
2088 | return (s - start)/kind; |
2089 | if (ch == '\r') { |
2090 | if (PyUnicode_READ(kind, s, 0) == '\n') |
2091 | return (s - start)/kind + 1; |
2092 | else |
2093 | return (s - start)/kind; |
2094 | } |
2095 | } |
2096 | } |
2097 | else { |
2098 | /* Non-universal mode. */ |
2099 | Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl); |
2100 | const Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl); |
2101 | /* Assume that readnl is an ASCII character. */ |
2102 | assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND); |
2103 | if (readnl_len == 1) { |
2104 | const char *pos = find_control_char(kind, start, end, nl[0]); |
2105 | if (pos != NULL) |
2106 | return (pos - start)/kind + 1; |
2107 | *consumed = len; |
2108 | return -1; |
2109 | } |
2110 | else { |
2111 | const char *s = start; |
2112 | const char *e = end - (readnl_len - 1)*kind; |
2113 | const char *pos; |
2114 | if (e < s) |
2115 | e = s; |
2116 | while (s < e) { |
2117 | Py_ssize_t i; |
2118 | const char *pos = find_control_char(kind, s, end, nl[0]); |
2119 | if (pos == NULL || pos >= e) |
2120 | break; |
2121 | for (i = 1; i < readnl_len; i++) { |
2122 | if (PyUnicode_READ(kind, pos, i) != nl[i]) |
2123 | break; |
2124 | } |
2125 | if (i == readnl_len) |
2126 | return (pos - start)/kind + readnl_len; |
2127 | s = pos + kind; |
2128 | } |
2129 | pos = find_control_char(kind, e, end, nl[0]); |
2130 | if (pos == NULL) |
2131 | *consumed = len; |
2132 | else |
2133 | *consumed = (pos - start)/kind; |
2134 | return -1; |
2135 | } |
2136 | } |
2137 | } |
2138 | |
2139 | static PyObject * |
2140 | _textiowrapper_readline(textio *self, Py_ssize_t limit) |
2141 | { |
2142 | PyObject *line = NULL, *chunks = NULL, *remaining = NULL; |
2143 | Py_ssize_t start, endpos, chunked, offset_to_buffer; |
2144 | int res; |
2145 | |
2146 | CHECK_CLOSED(self); |
2147 | |
2148 | if (_textiowrapper_writeflush(self) < 0) |
2149 | return NULL; |
2150 | |
2151 | chunked = 0; |
2152 | |
2153 | while (1) { |
2154 | const char *ptr; |
2155 | Py_ssize_t line_len; |
2156 | int kind; |
2157 | Py_ssize_t consumed = 0; |
2158 | |
2159 | /* First, get some data if necessary */ |
2160 | res = 1; |
2161 | while (!self->decoded_chars || |
2162 | !PyUnicode_GET_LENGTH(self->decoded_chars)) { |
2163 | res = textiowrapper_read_chunk(self, 0); |
2164 | if (res < 0) { |
2165 | /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals() |
2166 | when EINTR occurs so we needn't do it ourselves. */ |
2167 | if (_PyIO_trap_eintr()) { |
2168 | continue; |
2169 | } |
2170 | goto error; |
2171 | } |
2172 | if (res == 0) |
2173 | break; |
2174 | } |
2175 | if (res == 0) { |
2176 | /* end of file */ |
2177 | textiowrapper_set_decoded_chars(self, NULL); |
2178 | Py_CLEAR(self->snapshot); |
2179 | start = endpos = offset_to_buffer = 0; |
2180 | break; |
2181 | } |
2182 | |
2183 | if (remaining == NULL) { |
2184 | line = self->decoded_chars; |
2185 | start = self->decoded_chars_used; |
2186 | offset_to_buffer = 0; |
2187 | Py_INCREF(line); |
2188 | } |
2189 | else { |
2190 | assert(self->decoded_chars_used == 0); |
2191 | line = PyUnicode_Concat(remaining, self->decoded_chars); |
2192 | start = 0; |
2193 | offset_to_buffer = PyUnicode_GET_LENGTH(remaining); |
2194 | Py_CLEAR(remaining); |
2195 | if (line == NULL) |
2196 | goto error; |
2197 | if (PyUnicode_READY(line) == -1) |
2198 | goto error; |
2199 | } |
2200 | |
2201 | ptr = PyUnicode_DATA(line); |
2202 | line_len = PyUnicode_GET_LENGTH(line); |
2203 | kind = PyUnicode_KIND(line); |
2204 | |
2205 | endpos = _PyIO_find_line_ending( |
2206 | self->readtranslate, self->readuniversal, self->readnl, |
2207 | kind, |
2208 | ptr + kind * start, |
2209 | ptr + kind * line_len, |
2210 | &consumed); |
2211 | if (endpos >= 0) { |
2212 | endpos += start; |
2213 | if (limit >= 0 && (endpos - start) + chunked >= limit) |
2214 | endpos = start + limit - chunked; |
2215 | break; |
2216 | } |
2217 | |
2218 | /* We can put aside up to `endpos` */ |
2219 | endpos = consumed + start; |
2220 | if (limit >= 0 && (endpos - start) + chunked >= limit) { |
2221 | /* Didn't find line ending, but reached length limit */ |
2222 | endpos = start + limit - chunked; |
2223 | break; |
2224 | } |
2225 | |
2226 | if (endpos > start) { |
2227 | /* No line ending seen yet - put aside current data */ |
2228 | PyObject *s; |
2229 | if (chunks == NULL) { |
2230 | chunks = PyList_New(0); |
2231 | if (chunks == NULL) |
2232 | goto error; |
2233 | } |
2234 | s = PyUnicode_Substring(line, start, endpos); |
2235 | if (s == NULL) |
2236 | goto error; |
2237 | if (PyList_Append(chunks, s) < 0) { |
2238 | Py_DECREF(s); |
2239 | goto error; |
2240 | } |
2241 | chunked += PyUnicode_GET_LENGTH(s); |
2242 | Py_DECREF(s); |
2243 | } |
2244 | /* There may be some remaining bytes we'll have to prepend to the |
2245 | next chunk of data */ |
2246 | if (endpos < line_len) { |
2247 | remaining = PyUnicode_Substring(line, endpos, line_len); |
2248 | if (remaining == NULL) |
2249 | goto error; |
2250 | } |
2251 | Py_CLEAR(line); |
2252 | /* We have consumed the buffer */ |
2253 | textiowrapper_set_decoded_chars(self, NULL); |
2254 | } |
2255 | |
2256 | if (line != NULL) { |
2257 | /* Our line ends in the current buffer */ |
2258 | self->decoded_chars_used = endpos - offset_to_buffer; |
2259 | if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) { |
2260 | PyObject *s = PyUnicode_Substring(line, start, endpos); |
2261 | Py_CLEAR(line); |
2262 | if (s == NULL) |
2263 | goto error; |
2264 | line = s; |
2265 | } |
2266 | } |
2267 | if (remaining != NULL) { |
2268 | if (chunks == NULL) { |
2269 | chunks = PyList_New(0); |
2270 | if (chunks == NULL) |
2271 | goto error; |
2272 | } |
2273 | if (PyList_Append(chunks, remaining) < 0) |
2274 | goto error; |
2275 | Py_CLEAR(remaining); |
2276 | } |
2277 | if (chunks != NULL) { |
2278 | if (line != NULL) { |
2279 | if (PyList_Append(chunks, line) < 0) |
2280 | goto error; |
2281 | Py_DECREF(line); |
2282 | } |
2283 | line = PyUnicode_Join(_PyIO_empty_str, chunks); |
2284 | if (line == NULL) |
2285 | goto error; |
2286 | Py_CLEAR(chunks); |
2287 | } |
2288 | if (line == NULL) { |
2289 | Py_INCREF(_PyIO_empty_str); |
2290 | line = _PyIO_empty_str; |
2291 | } |
2292 | |
2293 | return line; |
2294 | |
2295 | error: |
2296 | Py_XDECREF(chunks); |
2297 | Py_XDECREF(remaining); |
2298 | Py_XDECREF(line); |
2299 | return NULL; |
2300 | } |
2301 | |
2302 | /*[clinic input] |
2303 | _io.TextIOWrapper.readline |
2304 | size: Py_ssize_t = -1 |
2305 | / |
2306 | [clinic start generated code]*/ |
2307 | |
2308 | static PyObject * |
2309 | _io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size) |
2310 | /*[clinic end generated code: output=344afa98804e8b25 input=56c7172483b36db6]*/ |
2311 | { |
2312 | CHECK_ATTACHED(self); |
2313 | return _textiowrapper_readline(self, size); |
2314 | } |
2315 | |
2316 | /* Seek and Tell */ |
2317 | |
2318 | typedef struct { |
2319 | Py_off_t start_pos; |
2320 | int dec_flags; |
2321 | int bytes_to_feed; |
2322 | int chars_to_skip; |
2323 | char need_eof; |
2324 | } cookie_type; |
2325 | |
2326 | /* |
2327 | To speed up cookie packing/unpacking, we store the fields in a temporary |
2328 | string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.). |
2329 | The following macros define at which offsets in the intermediary byte |
2330 | string the various CookieStruct fields will be stored. |
2331 | */ |
2332 | |
2333 | #define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char)) |
2334 | |
2335 | #if PY_BIG_ENDIAN |
2336 | /* We want the least significant byte of start_pos to also be the least |
2337 | significant byte of the cookie, which means that in big-endian mode we |
2338 | must copy the fields in reverse order. */ |
2339 | |
2340 | # define OFF_START_POS (sizeof(char) + 3 * sizeof(int)) |
2341 | # define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int)) |
2342 | # define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int)) |
2343 | # define OFF_CHARS_TO_SKIP (sizeof(char)) |
2344 | # define OFF_NEED_EOF 0 |
2345 | |
2346 | #else |
2347 | /* Little-endian mode: the least significant byte of start_pos will |
2348 | naturally end up the least significant byte of the cookie. */ |
2349 | |
2350 | # define OFF_START_POS 0 |
2351 | # define OFF_DEC_FLAGS (sizeof(Py_off_t)) |
2352 | # define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int)) |
2353 | # define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int)) |
2354 | # define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int)) |
2355 | |
2356 | #endif |
2357 | |
2358 | static int |
2359 | textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj) |
2360 | { |
2361 | unsigned char buffer[COOKIE_BUF_LEN]; |
2362 | PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj); |
2363 | if (cookieLong == NULL) |
2364 | return -1; |
2365 | |
2366 | if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer), |
2367 | PY_LITTLE_ENDIAN, 0) < 0) { |
2368 | Py_DECREF(cookieLong); |
2369 | return -1; |
2370 | } |
2371 | Py_DECREF(cookieLong); |
2372 | |
2373 | memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos)); |
2374 | memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags)); |
2375 | memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed)); |
2376 | memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip)); |
2377 | memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof)); |
2378 | |
2379 | return 0; |
2380 | } |
2381 | |
2382 | static PyObject * |
2383 | textiowrapper_build_cookie(cookie_type *cookie) |
2384 | { |
2385 | unsigned char buffer[COOKIE_BUF_LEN]; |
2386 | |
2387 | memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos)); |
2388 | memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags)); |
2389 | memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed)); |
2390 | memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip)); |
2391 | memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof)); |
2392 | |
2393 | return _PyLong_FromByteArray(buffer, sizeof(buffer), |
2394 | PY_LITTLE_ENDIAN, 0); |
2395 | } |
2396 | |
2397 | static int |
2398 | _textiowrapper_decoder_setstate(textio *self, cookie_type *cookie) |
2399 | { |
2400 | PyObject *res; |
2401 | /* When seeking to the start of the stream, we call decoder.reset() |
2402 | rather than decoder.getstate(). |
2403 | This is for a few decoders such as utf-16 for which the state value |
2404 | at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of |
2405 | utf-16, that we are expecting a BOM). |
2406 | */ |
2407 | if (cookie->start_pos == 0 && cookie->dec_flags == 0) |
2408 | res = PyObject_CallMethodNoArgs(self->decoder, _PyIO_str_reset); |
2409 | else |
2410 | res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, |
2411 | "((yi))" , "" , cookie->dec_flags); |
2412 | if (res == NULL) |
2413 | return -1; |
2414 | Py_DECREF(res); |
2415 | return 0; |
2416 | } |
2417 | |
2418 | static int |
2419 | _textiowrapper_encoder_reset(textio *self, int start_of_stream) |
2420 | { |
2421 | PyObject *res; |
2422 | if (start_of_stream) { |
2423 | res = PyObject_CallMethodNoArgs(self->encoder, _PyIO_str_reset); |
2424 | self->encoding_start_of_stream = 1; |
2425 | } |
2426 | else { |
2427 | res = PyObject_CallMethodOneArg(self->encoder, _PyIO_str_setstate, |
2428 | _PyLong_GetZero()); |
2429 | self->encoding_start_of_stream = 0; |
2430 | } |
2431 | if (res == NULL) |
2432 | return -1; |
2433 | Py_DECREF(res); |
2434 | return 0; |
2435 | } |
2436 | |
2437 | static int |
2438 | _textiowrapper_encoder_setstate(textio *self, cookie_type *cookie) |
2439 | { |
2440 | /* Same as _textiowrapper_decoder_setstate() above. */ |
2441 | return _textiowrapper_encoder_reset( |
2442 | self, cookie->start_pos == 0 && cookie->dec_flags == 0); |
2443 | } |
2444 | |
2445 | /*[clinic input] |
2446 | _io.TextIOWrapper.seek |
2447 | cookie as cookieObj: object |
2448 | whence: int = 0 |
2449 | / |
2450 | [clinic start generated code]*/ |
2451 | |
2452 | static PyObject * |
2453 | _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence) |
2454 | /*[clinic end generated code: output=0a15679764e2d04d input=0458abeb3d7842be]*/ |
2455 | { |
2456 | PyObject *posobj; |
2457 | cookie_type cookie; |
2458 | PyObject *res; |
2459 | int cmp; |
2460 | PyObject *snapshot; |
2461 | |
2462 | CHECK_ATTACHED(self); |
2463 | CHECK_CLOSED(self); |
2464 | |
2465 | Py_INCREF(cookieObj); |
2466 | |
2467 | if (!self->seekable) { |
2468 | _unsupported("underlying stream is not seekable" ); |
2469 | goto fail; |
2470 | } |
2471 | |
2472 | PyObject *zero = _PyLong_GetZero(); // borrowed reference |
2473 | |
2474 | switch (whence) { |
2475 | case SEEK_CUR: |
2476 | /* seek relative to current position */ |
2477 | cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ); |
2478 | if (cmp < 0) |
2479 | goto fail; |
2480 | |
2481 | if (cmp == 0) { |
2482 | _unsupported("can't do nonzero cur-relative seeks" ); |
2483 | goto fail; |
2484 | } |
2485 | |
2486 | /* Seeking to the current position should attempt to |
2487 | * sync the underlying buffer with the current position. |
2488 | */ |
2489 | Py_DECREF(cookieObj); |
2490 | cookieObj = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_tell); |
2491 | if (cookieObj == NULL) |
2492 | goto fail; |
2493 | break; |
2494 | |
2495 | case SEEK_END: |
2496 | /* seek relative to end of file */ |
2497 | cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ); |
2498 | if (cmp < 0) |
2499 | goto fail; |
2500 | |
2501 | if (cmp == 0) { |
2502 | _unsupported("can't do nonzero end-relative seeks" ); |
2503 | goto fail; |
2504 | } |
2505 | |
2506 | res = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_flush); |
2507 | if (res == NULL) |
2508 | goto fail; |
2509 | Py_DECREF(res); |
2510 | |
2511 | textiowrapper_set_decoded_chars(self, NULL); |
2512 | Py_CLEAR(self->snapshot); |
2513 | if (self->decoder) { |
2514 | res = _PyObject_CallMethodIdNoArgs(self->decoder, &PyId_reset); |
2515 | if (res == NULL) |
2516 | goto fail; |
2517 | Py_DECREF(res); |
2518 | } |
2519 | |
2520 | res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii" , 0, 2); |
2521 | Py_CLEAR(cookieObj); |
2522 | if (res == NULL) |
2523 | goto fail; |
2524 | if (self->encoder) { |
2525 | /* If seek() == 0, we are at the start of stream, otherwise not */ |
2526 | cmp = PyObject_RichCompareBool(res, zero, Py_EQ); |
2527 | if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) { |
2528 | Py_DECREF(res); |
2529 | goto fail; |
2530 | } |
2531 | } |
2532 | return res; |
2533 | |
2534 | case SEEK_SET: |
2535 | break; |
2536 | |
2537 | default: |
2538 | PyErr_Format(PyExc_ValueError, |
2539 | "invalid whence (%d, should be %d, %d or %d)" , whence, |
2540 | SEEK_SET, SEEK_CUR, SEEK_END); |
2541 | goto fail; |
2542 | } |
2543 | |
2544 | cmp = PyObject_RichCompareBool(cookieObj, zero, Py_LT); |
2545 | if (cmp < 0) |
2546 | goto fail; |
2547 | |
2548 | if (cmp == 1) { |
2549 | PyErr_Format(PyExc_ValueError, |
2550 | "negative seek position %R" , cookieObj); |
2551 | goto fail; |
2552 | } |
2553 | |
2554 | res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush); |
2555 | if (res == NULL) |
2556 | goto fail; |
2557 | Py_DECREF(res); |
2558 | |
2559 | /* The strategy of seek() is to go back to the safe start point |
2560 | * and replay the effect of read(chars_to_skip) from there. |
2561 | */ |
2562 | if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0) |
2563 | goto fail; |
2564 | |
2565 | /* Seek back to the safe start point. */ |
2566 | posobj = PyLong_FromOff_t(cookie.start_pos); |
2567 | if (posobj == NULL) |
2568 | goto fail; |
2569 | res = PyObject_CallMethodOneArg(self->buffer, _PyIO_str_seek, posobj); |
2570 | Py_DECREF(posobj); |
2571 | if (res == NULL) |
2572 | goto fail; |
2573 | Py_DECREF(res); |
2574 | |
2575 | textiowrapper_set_decoded_chars(self, NULL); |
2576 | Py_CLEAR(self->snapshot); |
2577 | |
2578 | /* Restore the decoder to its state from the safe start point. */ |
2579 | if (self->decoder) { |
2580 | if (_textiowrapper_decoder_setstate(self, &cookie) < 0) |
2581 | goto fail; |
2582 | } |
2583 | |
2584 | if (cookie.chars_to_skip) { |
2585 | /* Just like _read_chunk, feed the decoder and save a snapshot. */ |
2586 | PyObject *input_chunk = _PyObject_CallMethodId( |
2587 | self->buffer, &PyId_read, "i" , cookie.bytes_to_feed); |
2588 | PyObject *decoded; |
2589 | |
2590 | if (input_chunk == NULL) |
2591 | goto fail; |
2592 | |
2593 | if (!PyBytes_Check(input_chunk)) { |
2594 | PyErr_Format(PyExc_TypeError, |
2595 | "underlying read() should have returned a bytes " |
2596 | "object, not '%.200s'" , |
2597 | Py_TYPE(input_chunk)->tp_name); |
2598 | Py_DECREF(input_chunk); |
2599 | goto fail; |
2600 | } |
2601 | |
2602 | snapshot = Py_BuildValue("iN" , cookie.dec_flags, input_chunk); |
2603 | if (snapshot == NULL) { |
2604 | goto fail; |
2605 | } |
2606 | Py_XSETREF(self->snapshot, snapshot); |
2607 | |
2608 | decoded = _PyObject_CallMethodIdObjArgs(self->decoder, &PyId_decode, |
2609 | input_chunk, cookie.need_eof ? Py_True : Py_False, NULL); |
2610 | |
2611 | if (check_decoded(decoded) < 0) |
2612 | goto fail; |
2613 | |
2614 | textiowrapper_set_decoded_chars(self, decoded); |
2615 | |
2616 | /* Skip chars_to_skip of the decoded characters. */ |
2617 | if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) { |
2618 | PyErr_SetString(PyExc_OSError, "can't restore logical file position" ); |
2619 | goto fail; |
2620 | } |
2621 | self->decoded_chars_used = cookie.chars_to_skip; |
2622 | } |
2623 | else { |
2624 | snapshot = Py_BuildValue("iy" , cookie.dec_flags, "" ); |
2625 | if (snapshot == NULL) |
2626 | goto fail; |
2627 | Py_XSETREF(self->snapshot, snapshot); |
2628 | } |
2629 | |
2630 | /* Finally, reset the encoder (merely useful for proper BOM handling) */ |
2631 | if (self->encoder) { |
2632 | if (_textiowrapper_encoder_setstate(self, &cookie) < 0) |
2633 | goto fail; |
2634 | } |
2635 | return cookieObj; |
2636 | fail: |
2637 | Py_XDECREF(cookieObj); |
2638 | return NULL; |
2639 | |
2640 | } |
2641 | |
2642 | /*[clinic input] |
2643 | _io.TextIOWrapper.tell |
2644 | [clinic start generated code]*/ |
2645 | |
2646 | static PyObject * |
2647 | _io_TextIOWrapper_tell_impl(textio *self) |
2648 | /*[clinic end generated code: output=4f168c08bf34ad5f input=9a2caf88c24f9ddf]*/ |
2649 | { |
2650 | PyObject *res; |
2651 | PyObject *posobj = NULL; |
2652 | cookie_type cookie = {0,0,0,0,0}; |
2653 | PyObject *next_input; |
2654 | Py_ssize_t chars_to_skip, chars_decoded; |
2655 | Py_ssize_t skip_bytes, skip_back; |
2656 | PyObject *saved_state = NULL; |
2657 | const char *input, *input_end; |
2658 | Py_ssize_t dec_buffer_len; |
2659 | int dec_flags; |
2660 | |
2661 | CHECK_ATTACHED(self); |
2662 | CHECK_CLOSED(self); |
2663 | |
2664 | if (!self->seekable) { |
2665 | _unsupported("underlying stream is not seekable" ); |
2666 | goto fail; |
2667 | } |
2668 | if (!self->telling) { |
2669 | PyErr_SetString(PyExc_OSError, |
2670 | "telling position disabled by next() call" ); |
2671 | goto fail; |
2672 | } |
2673 | |
2674 | if (_textiowrapper_writeflush(self) < 0) |
2675 | return NULL; |
2676 | res = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_flush); |
2677 | if (res == NULL) |
2678 | goto fail; |
2679 | Py_DECREF(res); |
2680 | |
2681 | posobj = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_tell); |
2682 | if (posobj == NULL) |
2683 | goto fail; |
2684 | |
2685 | if (self->decoder == NULL || self->snapshot == NULL) { |
2686 | assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0); |
2687 | return posobj; |
2688 | } |
2689 | |
2690 | #if defined(HAVE_LARGEFILE_SUPPORT) |
2691 | cookie.start_pos = PyLong_AsLongLong(posobj); |
2692 | #else |
2693 | cookie.start_pos = PyLong_AsLong(posobj); |
2694 | #endif |
2695 | Py_DECREF(posobj); |
2696 | if (PyErr_Occurred()) |
2697 | goto fail; |
2698 | |
2699 | /* Skip backward to the snapshot point (see _read_chunk). */ |
2700 | assert(PyTuple_Check(self->snapshot)); |
2701 | if (!PyArg_ParseTuple(self->snapshot, "iO" , &cookie.dec_flags, &next_input)) |
2702 | goto fail; |
2703 | |
2704 | assert (PyBytes_Check(next_input)); |
2705 | |
2706 | cookie.start_pos -= PyBytes_GET_SIZE(next_input); |
2707 | |
2708 | /* How many decoded characters have been used up since the snapshot? */ |
2709 | if (self->decoded_chars_used == 0) { |
2710 | /* We haven't moved from the snapshot point. */ |
2711 | return textiowrapper_build_cookie(&cookie); |
2712 | } |
2713 | |
2714 | chars_to_skip = self->decoded_chars_used; |
2715 | |
2716 | /* Decoder state will be restored at the end */ |
2717 | saved_state = PyObject_CallMethodNoArgs(self->decoder, |
2718 | _PyIO_str_getstate); |
2719 | if (saved_state == NULL) |
2720 | goto fail; |
2721 | |
2722 | #define DECODER_GETSTATE() do { \ |
2723 | PyObject *dec_buffer; \ |
2724 | PyObject *_state = PyObject_CallMethodNoArgs(self->decoder, \ |
2725 | _PyIO_str_getstate); \ |
2726 | if (_state == NULL) \ |
2727 | goto fail; \ |
2728 | if (!PyTuple_Check(_state)) { \ |
2729 | PyErr_SetString(PyExc_TypeError, \ |
2730 | "illegal decoder state"); \ |
2731 | Py_DECREF(_state); \ |
2732 | goto fail; \ |
2733 | } \ |
2734 | if (!PyArg_ParseTuple(_state, "Oi;illegal decoder state", \ |
2735 | &dec_buffer, &dec_flags)) \ |
2736 | { \ |
2737 | Py_DECREF(_state); \ |
2738 | goto fail; \ |
2739 | } \ |
2740 | if (!PyBytes_Check(dec_buffer)) { \ |
2741 | PyErr_Format(PyExc_TypeError, \ |
2742 | "illegal decoder state: the first item should be a " \ |
2743 | "bytes object, not '%.200s'", \ |
2744 | Py_TYPE(dec_buffer)->tp_name); \ |
2745 | Py_DECREF(_state); \ |
2746 | goto fail; \ |
2747 | } \ |
2748 | dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \ |
2749 | Py_DECREF(_state); \ |
2750 | } while (0) |
2751 | |
2752 | #define DECODER_DECODE(start, len, res) do { \ |
2753 | PyObject *_decoded = _PyObject_CallMethodId( \ |
2754 | self->decoder, &PyId_decode, "y#", start, len); \ |
2755 | if (check_decoded(_decoded) < 0) \ |
2756 | goto fail; \ |
2757 | res = PyUnicode_GET_LENGTH(_decoded); \ |
2758 | Py_DECREF(_decoded); \ |
2759 | } while (0) |
2760 | |
2761 | /* Fast search for an acceptable start point, close to our |
2762 | current pos */ |
2763 | skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip); |
2764 | skip_back = 1; |
2765 | assert(skip_back <= PyBytes_GET_SIZE(next_input)); |
2766 | input = PyBytes_AS_STRING(next_input); |
2767 | while (skip_bytes > 0) { |
2768 | /* Decode up to temptative start point */ |
2769 | if (_textiowrapper_decoder_setstate(self, &cookie) < 0) |
2770 | goto fail; |
2771 | DECODER_DECODE(input, skip_bytes, chars_decoded); |
2772 | if (chars_decoded <= chars_to_skip) { |
2773 | DECODER_GETSTATE(); |
2774 | if (dec_buffer_len == 0) { |
2775 | /* Before pos and no bytes buffered in decoder => OK */ |
2776 | cookie.dec_flags = dec_flags; |
2777 | chars_to_skip -= chars_decoded; |
2778 | break; |
2779 | } |
2780 | /* Skip back by buffered amount and reset heuristic */ |
2781 | skip_bytes -= dec_buffer_len; |
2782 | skip_back = 1; |
2783 | } |
2784 | else { |
2785 | /* We're too far ahead, skip back a bit */ |
2786 | skip_bytes -= skip_back; |
2787 | skip_back *= 2; |
2788 | } |
2789 | } |
2790 | if (skip_bytes <= 0) { |
2791 | skip_bytes = 0; |
2792 | if (_textiowrapper_decoder_setstate(self, &cookie) < 0) |
2793 | goto fail; |
2794 | } |
2795 | |
2796 | /* Note our initial start point. */ |
2797 | cookie.start_pos += skip_bytes; |
2798 | cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int); |
2799 | if (chars_to_skip == 0) |
2800 | goto finally; |
2801 | |
2802 | /* We should be close to the desired position. Now feed the decoder one |
2803 | * byte at a time until we reach the `chars_to_skip` target. |
2804 | * As we go, note the nearest "safe start point" before the current |
2805 | * location (a point where the decoder has nothing buffered, so seek() |
2806 | * can safely start from there and advance to this location). |
2807 | */ |
2808 | chars_decoded = 0; |
2809 | input = PyBytes_AS_STRING(next_input); |
2810 | input_end = input + PyBytes_GET_SIZE(next_input); |
2811 | input += skip_bytes; |
2812 | while (input < input_end) { |
2813 | Py_ssize_t n; |
2814 | |
2815 | DECODER_DECODE(input, (Py_ssize_t)1, n); |
2816 | /* We got n chars for 1 byte */ |
2817 | chars_decoded += n; |
2818 | cookie.bytes_to_feed += 1; |
2819 | DECODER_GETSTATE(); |
2820 | |
2821 | if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) { |
2822 | /* Decoder buffer is empty, so this is a safe start point. */ |
2823 | cookie.start_pos += cookie.bytes_to_feed; |
2824 | chars_to_skip -= chars_decoded; |
2825 | cookie.dec_flags = dec_flags; |
2826 | cookie.bytes_to_feed = 0; |
2827 | chars_decoded = 0; |
2828 | } |
2829 | if (chars_decoded >= chars_to_skip) |
2830 | break; |
2831 | input++; |
2832 | } |
2833 | if (input == input_end) { |
2834 | /* We didn't get enough decoded data; signal EOF to get more. */ |
2835 | PyObject *decoded = _PyObject_CallMethodId( |
2836 | self->decoder, &PyId_decode, "yO" , "" , /* final = */ Py_True); |
2837 | if (check_decoded(decoded) < 0) |
2838 | goto fail; |
2839 | chars_decoded += PyUnicode_GET_LENGTH(decoded); |
2840 | Py_DECREF(decoded); |
2841 | cookie.need_eof = 1; |
2842 | |
2843 | if (chars_decoded < chars_to_skip) { |
2844 | PyErr_SetString(PyExc_OSError, |
2845 | "can't reconstruct logical file position" ); |
2846 | goto fail; |
2847 | } |
2848 | } |
2849 | |
2850 | finally: |
2851 | res = _PyObject_CallMethodIdOneArg(self->decoder, &PyId_setstate, saved_state); |
2852 | Py_DECREF(saved_state); |
2853 | if (res == NULL) |
2854 | return NULL; |
2855 | Py_DECREF(res); |
2856 | |
2857 | /* The returned cookie corresponds to the last safe start point. */ |
2858 | cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int); |
2859 | return textiowrapper_build_cookie(&cookie); |
2860 | |
2861 | fail: |
2862 | if (saved_state) { |
2863 | PyObject *type, *value, *traceback; |
2864 | PyErr_Fetch(&type, &value, &traceback); |
2865 | res = _PyObject_CallMethodIdOneArg(self->decoder, &PyId_setstate, saved_state); |
2866 | _PyErr_ChainExceptions(type, value, traceback); |
2867 | Py_DECREF(saved_state); |
2868 | Py_XDECREF(res); |
2869 | } |
2870 | return NULL; |
2871 | } |
2872 | |
2873 | /*[clinic input] |
2874 | _io.TextIOWrapper.truncate |
2875 | pos: object = None |
2876 | / |
2877 | [clinic start generated code]*/ |
2878 | |
2879 | static PyObject * |
2880 | _io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos) |
2881 | /*[clinic end generated code: output=90ec2afb9bb7745f input=56ec8baa65aea377]*/ |
2882 | { |
2883 | PyObject *res; |
2884 | |
2885 | CHECK_ATTACHED(self) |
2886 | |
2887 | res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush); |
2888 | if (res == NULL) |
2889 | return NULL; |
2890 | Py_DECREF(res); |
2891 | |
2892 | return PyObject_CallMethodOneArg(self->buffer, _PyIO_str_truncate, pos); |
2893 | } |
2894 | |
2895 | static PyObject * |
2896 | textiowrapper_repr(textio *self) |
2897 | { |
2898 | PyObject *nameobj, *modeobj, *res, *s; |
2899 | int status; |
2900 | |
2901 | CHECK_INITIALIZED(self); |
2902 | |
2903 | res = PyUnicode_FromString("<_io.TextIOWrapper" ); |
2904 | if (res == NULL) |
2905 | return NULL; |
2906 | |
2907 | status = Py_ReprEnter((PyObject *)self); |
2908 | if (status != 0) { |
2909 | if (status > 0) { |
2910 | PyErr_Format(PyExc_RuntimeError, |
2911 | "reentrant call inside %s.__repr__" , |
2912 | Py_TYPE(self)->tp_name); |
2913 | } |
2914 | goto error; |
2915 | } |
2916 | if (_PyObject_LookupAttrId((PyObject *) self, &PyId_name, &nameobj) < 0) { |
2917 | if (!PyErr_ExceptionMatches(PyExc_ValueError)) { |
2918 | goto error; |
2919 | } |
2920 | /* Ignore ValueError raised if the underlying stream was detached */ |
2921 | PyErr_Clear(); |
2922 | } |
2923 | if (nameobj != NULL) { |
2924 | s = PyUnicode_FromFormat(" name=%R" , nameobj); |
2925 | Py_DECREF(nameobj); |
2926 | if (s == NULL) |
2927 | goto error; |
2928 | PyUnicode_AppendAndDel(&res, s); |
2929 | if (res == NULL) |
2930 | goto error; |
2931 | } |
2932 | if (_PyObject_LookupAttrId((PyObject *) self, &PyId_mode, &modeobj) < 0) { |
2933 | goto error; |
2934 | } |
2935 | if (modeobj != NULL) { |
2936 | s = PyUnicode_FromFormat(" mode=%R" , modeobj); |
2937 | Py_DECREF(modeobj); |
2938 | if (s == NULL) |
2939 | goto error; |
2940 | PyUnicode_AppendAndDel(&res, s); |
2941 | if (res == NULL) |
2942 | goto error; |
2943 | } |
2944 | s = PyUnicode_FromFormat("%U encoding=%R>" , |
2945 | res, self->encoding); |
2946 | Py_DECREF(res); |
2947 | if (status == 0) { |
2948 | Py_ReprLeave((PyObject *)self); |
2949 | } |
2950 | return s; |
2951 | |
2952 | error: |
2953 | Py_XDECREF(res); |
2954 | if (status == 0) { |
2955 | Py_ReprLeave((PyObject *)self); |
2956 | } |
2957 | return NULL; |
2958 | } |
2959 | |
2960 | |
2961 | /* Inquiries */ |
2962 | |
2963 | /*[clinic input] |
2964 | _io.TextIOWrapper.fileno |
2965 | [clinic start generated code]*/ |
2966 | |
2967 | static PyObject * |
2968 | _io_TextIOWrapper_fileno_impl(textio *self) |
2969 | /*[clinic end generated code: output=21490a4c3da13e6c input=c488ca83d0069f9b]*/ |
2970 | { |
2971 | CHECK_ATTACHED(self); |
2972 | return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_fileno); |
2973 | } |
2974 | |
2975 | /*[clinic input] |
2976 | _io.TextIOWrapper.seekable |
2977 | [clinic start generated code]*/ |
2978 | |
2979 | static PyObject * |
2980 | _io_TextIOWrapper_seekable_impl(textio *self) |
2981 | /*[clinic end generated code: output=ab223dbbcffc0f00 input=8b005ca06e1fca13]*/ |
2982 | { |
2983 | CHECK_ATTACHED(self); |
2984 | return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_seekable); |
2985 | } |
2986 | |
2987 | /*[clinic input] |
2988 | _io.TextIOWrapper.readable |
2989 | [clinic start generated code]*/ |
2990 | |
2991 | static PyObject * |
2992 | _io_TextIOWrapper_readable_impl(textio *self) |
2993 | /*[clinic end generated code: output=72ff7ba289a8a91b input=0704ea7e01b0d3eb]*/ |
2994 | { |
2995 | CHECK_ATTACHED(self); |
2996 | return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_readable); |
2997 | } |
2998 | |
2999 | /*[clinic input] |
3000 | _io.TextIOWrapper.writable |
3001 | [clinic start generated code]*/ |
3002 | |
3003 | static PyObject * |
3004 | _io_TextIOWrapper_writable_impl(textio *self) |
3005 | /*[clinic end generated code: output=a728c71790d03200 input=c41740bc9d8636e8]*/ |
3006 | { |
3007 | CHECK_ATTACHED(self); |
3008 | return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_writable); |
3009 | } |
3010 | |
3011 | /*[clinic input] |
3012 | _io.TextIOWrapper.isatty |
3013 | [clinic start generated code]*/ |
3014 | |
3015 | static PyObject * |
3016 | _io_TextIOWrapper_isatty_impl(textio *self) |
3017 | /*[clinic end generated code: output=12be1a35bace882e input=fb68d9f2c99bbfff]*/ |
3018 | { |
3019 | CHECK_ATTACHED(self); |
3020 | return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_isatty); |
3021 | } |
3022 | |
3023 | /*[clinic input] |
3024 | _io.TextIOWrapper.flush |
3025 | [clinic start generated code]*/ |
3026 | |
3027 | static PyObject * |
3028 | _io_TextIOWrapper_flush_impl(textio *self) |
3029 | /*[clinic end generated code: output=59de9165f9c2e4d2 input=928c60590694ab85]*/ |
3030 | { |
3031 | CHECK_ATTACHED(self); |
3032 | CHECK_CLOSED(self); |
3033 | self->telling = self->seekable; |
3034 | if (_textiowrapper_writeflush(self) < 0) |
3035 | return NULL; |
3036 | return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_flush); |
3037 | } |
3038 | |
3039 | /*[clinic input] |
3040 | _io.TextIOWrapper.close |
3041 | [clinic start generated code]*/ |
3042 | |
3043 | static PyObject * |
3044 | _io_TextIOWrapper_close_impl(textio *self) |
3045 | /*[clinic end generated code: output=056ccf8b4876e4f4 input=9c2114315eae1948]*/ |
3046 | { |
3047 | PyObject *res; |
3048 | int r; |
3049 | CHECK_ATTACHED(self); |
3050 | |
3051 | res = textiowrapper_closed_get(self, NULL); |
3052 | if (res == NULL) |
3053 | return NULL; |
3054 | r = PyObject_IsTrue(res); |
3055 | Py_DECREF(res); |
3056 | if (r < 0) |
3057 | return NULL; |
3058 | |
3059 | if (r > 0) { |
3060 | Py_RETURN_NONE; /* stream already closed */ |
3061 | } |
3062 | else { |
3063 | PyObject *exc = NULL, *val, *tb; |
3064 | if (self->finalizing) { |
3065 | res = _PyObject_CallMethodIdOneArg(self->buffer, |
3066 | &PyId__dealloc_warn, |
3067 | (PyObject *)self); |
3068 | if (res) |
3069 | Py_DECREF(res); |
3070 | else |
3071 | PyErr_Clear(); |
3072 | } |
3073 | res = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_flush); |
3074 | if (res == NULL) |
3075 | PyErr_Fetch(&exc, &val, &tb); |
3076 | else |
3077 | Py_DECREF(res); |
3078 | |
3079 | res = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_close); |
3080 | if (exc != NULL) { |
3081 | _PyErr_ChainExceptions(exc, val, tb); |
3082 | Py_CLEAR(res); |
3083 | } |
3084 | return res; |
3085 | } |
3086 | } |
3087 | |
3088 | static PyObject * |
3089 | textiowrapper_iternext(textio *self) |
3090 | { |
3091 | PyObject *line; |
3092 | |
3093 | CHECK_ATTACHED(self); |
3094 | |
3095 | self->telling = 0; |
3096 | if (Py_IS_TYPE(self, &PyTextIOWrapper_Type)) { |
3097 | /* Skip method call overhead for speed */ |
3098 | line = _textiowrapper_readline(self, -1); |
3099 | } |
3100 | else { |
3101 | line = PyObject_CallMethodNoArgs((PyObject *)self, |
3102 | _PyIO_str_readline); |
3103 | if (line && !PyUnicode_Check(line)) { |
3104 | PyErr_Format(PyExc_OSError, |
3105 | "readline() should have returned a str object, " |
3106 | "not '%.200s'" , Py_TYPE(line)->tp_name); |
3107 | Py_DECREF(line); |
3108 | return NULL; |
3109 | } |
3110 | } |
3111 | |
3112 | if (line == NULL || PyUnicode_READY(line) == -1) |
3113 | return NULL; |
3114 | |
3115 | if (PyUnicode_GET_LENGTH(line) == 0) { |
3116 | /* Reached EOF or would have blocked */ |
3117 | Py_DECREF(line); |
3118 | Py_CLEAR(self->snapshot); |
3119 | self->telling = self->seekable; |
3120 | return NULL; |
3121 | } |
3122 | |
3123 | return line; |
3124 | } |
3125 | |
3126 | static PyObject * |
3127 | textiowrapper_name_get(textio *self, void *context) |
3128 | { |
3129 | CHECK_ATTACHED(self); |
3130 | return _PyObject_GetAttrId(self->buffer, &PyId_name); |
3131 | } |
3132 | |
3133 | static PyObject * |
3134 | textiowrapper_closed_get(textio *self, void *context) |
3135 | { |
3136 | CHECK_ATTACHED(self); |
3137 | return PyObject_GetAttr(self->buffer, _PyIO_str_closed); |
3138 | } |
3139 | |
3140 | static PyObject * |
3141 | textiowrapper_newlines_get(textio *self, void *context) |
3142 | { |
3143 | PyObject *res; |
3144 | CHECK_ATTACHED(self); |
3145 | if (self->decoder == NULL || |
3146 | _PyObject_LookupAttr(self->decoder, _PyIO_str_newlines, &res) == 0) |
3147 | { |
3148 | Py_RETURN_NONE; |
3149 | } |
3150 | return res; |
3151 | } |
3152 | |
3153 | static PyObject * |
3154 | textiowrapper_errors_get(textio *self, void *context) |
3155 | { |
3156 | CHECK_INITIALIZED(self); |
3157 | Py_INCREF(self->errors); |
3158 | return self->errors; |
3159 | } |
3160 | |
3161 | static PyObject * |
3162 | textiowrapper_chunk_size_get(textio *self, void *context) |
3163 | { |
3164 | CHECK_ATTACHED(self); |
3165 | return PyLong_FromSsize_t(self->chunk_size); |
3166 | } |
3167 | |
3168 | static int |
3169 | textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context) |
3170 | { |
3171 | Py_ssize_t n; |
3172 | CHECK_ATTACHED_INT(self); |
3173 | if (arg == NULL) { |
3174 | PyErr_SetString(PyExc_AttributeError, "cannot delete attribute" ); |
3175 | return -1; |
3176 | } |
3177 | n = PyNumber_AsSsize_t(arg, PyExc_ValueError); |
3178 | if (n == -1 && PyErr_Occurred()) |
3179 | return -1; |
3180 | if (n <= 0) { |
3181 | PyErr_SetString(PyExc_ValueError, |
3182 | "a strictly positive integer is required" ); |
3183 | return -1; |
3184 | } |
3185 | self->chunk_size = n; |
3186 | return 0; |
3187 | } |
3188 | |
3189 | #include "clinic/textio.c.h" |
3190 | |
3191 | static PyMethodDef incrementalnewlinedecoder_methods[] = { |
3192 | _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF |
3193 | _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF |
3194 | _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF |
3195 | _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF |
3196 | {NULL} |
3197 | }; |
3198 | |
3199 | static PyGetSetDef incrementalnewlinedecoder_getset[] = { |
3200 | {"newlines" , (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL}, |
3201 | {NULL} |
3202 | }; |
3203 | |
3204 | PyTypeObject PyIncrementalNewlineDecoder_Type = { |
3205 | PyVarObject_HEAD_INIT(NULL, 0) |
3206 | "_io.IncrementalNewlineDecoder" , /*tp_name*/ |
3207 | sizeof(nldecoder_object), /*tp_basicsize*/ |
3208 | 0, /*tp_itemsize*/ |
3209 | (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/ |
3210 | 0, /*tp_vectorcall_offset*/ |
3211 | 0, /*tp_getattr*/ |
3212 | 0, /*tp_setattr*/ |
3213 | 0, /*tp_as_async*/ |
3214 | 0, /*tp_repr*/ |
3215 | 0, /*tp_as_number*/ |
3216 | 0, /*tp_as_sequence*/ |
3217 | 0, /*tp_as_mapping*/ |
3218 | 0, /*tp_hash */ |
3219 | 0, /*tp_call*/ |
3220 | 0, /*tp_str*/ |
3221 | 0, /*tp_getattro*/ |
3222 | 0, /*tp_setattro*/ |
3223 | 0, /*tp_as_buffer*/ |
3224 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ |
3225 | _io_IncrementalNewlineDecoder___init____doc__, /* tp_doc */ |
3226 | 0, /* tp_traverse */ |
3227 | 0, /* tp_clear */ |
3228 | 0, /* tp_richcompare */ |
3229 | 0, /*tp_weaklistoffset*/ |
3230 | 0, /* tp_iter */ |
3231 | 0, /* tp_iternext */ |
3232 | incrementalnewlinedecoder_methods, /* tp_methods */ |
3233 | 0, /* tp_members */ |
3234 | incrementalnewlinedecoder_getset, /* tp_getset */ |
3235 | 0, /* tp_base */ |
3236 | 0, /* tp_dict */ |
3237 | 0, /* tp_descr_get */ |
3238 | 0, /* tp_descr_set */ |
3239 | 0, /* tp_dictoffset */ |
3240 | _io_IncrementalNewlineDecoder___init__, /* tp_init */ |
3241 | 0, /* tp_alloc */ |
3242 | PyType_GenericNew, /* tp_new */ |
3243 | }; |
3244 | |
3245 | |
3246 | static PyMethodDef textiowrapper_methods[] = { |
3247 | _IO_TEXTIOWRAPPER_DETACH_METHODDEF |
3248 | _IO_TEXTIOWRAPPER_RECONFIGURE_METHODDEF |
3249 | _IO_TEXTIOWRAPPER_WRITE_METHODDEF |
3250 | _IO_TEXTIOWRAPPER_READ_METHODDEF |
3251 | _IO_TEXTIOWRAPPER_READLINE_METHODDEF |
3252 | _IO_TEXTIOWRAPPER_FLUSH_METHODDEF |
3253 | _IO_TEXTIOWRAPPER_CLOSE_METHODDEF |
3254 | |
3255 | _IO_TEXTIOWRAPPER_FILENO_METHODDEF |
3256 | _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF |
3257 | _IO_TEXTIOWRAPPER_READABLE_METHODDEF |
3258 | _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF |
3259 | _IO_TEXTIOWRAPPER_ISATTY_METHODDEF |
3260 | |
3261 | _IO_TEXTIOWRAPPER_SEEK_METHODDEF |
3262 | _IO_TEXTIOWRAPPER_TELL_METHODDEF |
3263 | _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF |
3264 | {NULL, NULL} |
3265 | }; |
3266 | |
3267 | static PyMemberDef textiowrapper_members[] = { |
3268 | {"encoding" , T_OBJECT, offsetof(textio, encoding), READONLY}, |
3269 | {"buffer" , T_OBJECT, offsetof(textio, buffer), READONLY}, |
3270 | {"line_buffering" , T_BOOL, offsetof(textio, line_buffering), READONLY}, |
3271 | {"write_through" , T_BOOL, offsetof(textio, write_through), READONLY}, |
3272 | {"_finalizing" , T_BOOL, offsetof(textio, finalizing), 0}, |
3273 | {NULL} |
3274 | }; |
3275 | |
3276 | static PyGetSetDef textiowrapper_getset[] = { |
3277 | {"name" , (getter)textiowrapper_name_get, NULL, NULL}, |
3278 | {"closed" , (getter)textiowrapper_closed_get, NULL, NULL}, |
3279 | /* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL}, |
3280 | */ |
3281 | {"newlines" , (getter)textiowrapper_newlines_get, NULL, NULL}, |
3282 | {"errors" , (getter)textiowrapper_errors_get, NULL, NULL}, |
3283 | {"_CHUNK_SIZE" , (getter)textiowrapper_chunk_size_get, |
3284 | (setter)textiowrapper_chunk_size_set, NULL}, |
3285 | {NULL} |
3286 | }; |
3287 | |
3288 | PyTypeObject PyTextIOWrapper_Type = { |
3289 | PyVarObject_HEAD_INIT(NULL, 0) |
3290 | "_io.TextIOWrapper" , /*tp_name*/ |
3291 | sizeof(textio), /*tp_basicsize*/ |
3292 | 0, /*tp_itemsize*/ |
3293 | (destructor)textiowrapper_dealloc, /*tp_dealloc*/ |
3294 | 0, /*tp_vectorcall_offset*/ |
3295 | 0, /*tp_getattr*/ |
3296 | 0, /*tps_etattr*/ |
3297 | 0, /*tp_as_async*/ |
3298 | (reprfunc)textiowrapper_repr,/*tp_repr*/ |
3299 | 0, /*tp_as_number*/ |
3300 | 0, /*tp_as_sequence*/ |
3301 | 0, /*tp_as_mapping*/ |
3302 | 0, /*tp_hash */ |
3303 | 0, /*tp_call*/ |
3304 | 0, /*tp_str*/ |
3305 | 0, /*tp_getattro*/ |
3306 | 0, /*tp_setattro*/ |
3307 | 0, /*tp_as_buffer*/ |
3308 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3309 | | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ |
3310 | _io_TextIOWrapper___init____doc__, /* tp_doc */ |
3311 | (traverseproc)textiowrapper_traverse, /* tp_traverse */ |
3312 | (inquiry)textiowrapper_clear, /* tp_clear */ |
3313 | 0, /* tp_richcompare */ |
3314 | offsetof(textio, weakreflist), /*tp_weaklistoffset*/ |
3315 | 0, /* tp_iter */ |
3316 | (iternextfunc)textiowrapper_iternext, /* tp_iternext */ |
3317 | textiowrapper_methods, /* tp_methods */ |
3318 | textiowrapper_members, /* tp_members */ |
3319 | textiowrapper_getset, /* tp_getset */ |
3320 | 0, /* tp_base */ |
3321 | 0, /* tp_dict */ |
3322 | 0, /* tp_descr_get */ |
3323 | 0, /* tp_descr_set */ |
3324 | offsetof(textio, dict), /*tp_dictoffset*/ |
3325 | _io_TextIOWrapper___init__, /* tp_init */ |
3326 | 0, /* tp_alloc */ |
3327 | PyType_GenericNew, /* tp_new */ |
3328 | 0, /* tp_free */ |
3329 | 0, /* tp_is_gc */ |
3330 | 0, /* tp_bases */ |
3331 | 0, /* tp_mro */ |
3332 | 0, /* tp_cache */ |
3333 | 0, /* tp_subclasses */ |
3334 | 0, /* tp_weaklist */ |
3335 | 0, /* tp_del */ |
3336 | 0, /* tp_version_tag */ |
3337 | 0, /* tp_finalize */ |
3338 | }; |
3339 | |