1#define PY_SSIZE_T_CLEAN
2#include "Python.h"
3#include <stddef.h> // offsetof()
4#include "pycore_accu.h"
5#include "pycore_object.h"
6#include "_iomodule.h"
7
8/* Implementation note: the buffer is always at least one character longer
9 than the enclosed string, for proper functioning of _PyIO_find_line_ending.
10*/
11
12#define STATE_REALIZED 1
13#define STATE_ACCUMULATING 2
14
15/*[clinic input]
16module _io
17class _io.StringIO "stringio *" "&PyStringIO_Type"
18[clinic start generated code]*/
19/*[clinic end generated code: output=da39a3ee5e6b4b0d input=c17bc0f42165cd7d]*/
20
21typedef struct {
22 PyObject_HEAD
23 Py_UCS4 *buf;
24 Py_ssize_t pos;
25 Py_ssize_t string_size;
26 size_t buf_size;
27
28 /* The stringio object can be in two states: accumulating or realized.
29 In accumulating state, the internal buffer contains nothing and
30 the contents are given by the embedded _PyAccu structure.
31 In realized state, the internal buffer is meaningful and the
32 _PyAccu is destroyed.
33 */
34 int state;
35 _PyAccu accu;
36
37 char ok; /* initialized? */
38 char closed;
39 char readuniversal;
40 char readtranslate;
41 PyObject *decoder;
42 PyObject *readnl;
43 PyObject *writenl;
44
45 PyObject *dict;
46 PyObject *weakreflist;
47} stringio;
48
49static int _io_StringIO___init__(PyObject *self, PyObject *args, PyObject *kwargs);
50
51#define CHECK_INITIALIZED(self) \
52 if (self->ok <= 0) { \
53 PyErr_SetString(PyExc_ValueError, \
54 "I/O operation on uninitialized object"); \
55 return NULL; \
56 }
57
58#define CHECK_CLOSED(self) \
59 if (self->closed) { \
60 PyErr_SetString(PyExc_ValueError, \
61 "I/O operation on closed file"); \
62 return NULL; \
63 }
64
65#define ENSURE_REALIZED(self) \
66 if (realize(self) < 0) { \
67 return NULL; \
68 }
69
70
71/* Internal routine for changing the size, in terms of characters, of the
72 buffer of StringIO objects. The caller should ensure that the 'size'
73 argument is non-negative. Returns 0 on success, -1 otherwise. */
74static int
75resize_buffer(stringio *self, size_t size)
76{
77 /* Here, unsigned types are used to avoid dealing with signed integer
78 overflow, which is undefined in C. */
79 size_t alloc = self->buf_size;
80 Py_UCS4 *new_buf = NULL;
81
82 assert(self->buf != NULL);
83
84 /* Reserve one more char for line ending detection. */
85 size = size + 1;
86 /* For simplicity, stay in the range of the signed type. Anyway, Python
87 doesn't allow strings to be longer than this. */
88 if (size > PY_SSIZE_T_MAX)
89 goto overflow;
90
91 if (size < alloc / 2) {
92 /* Major downsize; resize down to exact size. */
93 alloc = size + 1;
94 }
95 else if (size < alloc) {
96 /* Within allocated size; quick exit */
97 return 0;
98 }
99 else if (size <= alloc * 1.125) {
100 /* Moderate upsize; overallocate similar to list_resize() */
101 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
102 }
103 else {
104 /* Major upsize; resize up to exact size */
105 alloc = size + 1;
106 }
107
108 if (alloc > PY_SIZE_MAX / sizeof(Py_UCS4))
109 goto overflow;
110 new_buf = (Py_UCS4 *)PyMem_Realloc(self->buf, alloc * sizeof(Py_UCS4));
111 if (new_buf == NULL) {
112 PyErr_NoMemory();
113 return -1;
114 }
115 self->buf_size = alloc;
116 self->buf = new_buf;
117
118 return 0;
119
120 overflow:
121 PyErr_SetString(PyExc_OverflowError,
122 "new buffer size too large");
123 return -1;
124}
125
126static PyObject *
127make_intermediate(stringio *self)
128{
129 PyObject *intermediate = _PyAccu_Finish(&self->accu);
130 self->state = STATE_REALIZED;
131 if (intermediate == NULL)
132 return NULL;
133 if (_PyAccu_Init(&self->accu) ||
134 _PyAccu_Accumulate(&self->accu, intermediate)) {
135 Py_DECREF(intermediate);
136 return NULL;
137 }
138 self->state = STATE_ACCUMULATING;
139 return intermediate;
140}
141
142static int
143realize(stringio *self)
144{
145 Py_ssize_t len;
146 PyObject *intermediate;
147
148 if (self->state == STATE_REALIZED)
149 return 0;
150 assert(self->state == STATE_ACCUMULATING);
151 self->state = STATE_REALIZED;
152
153 intermediate = _PyAccu_Finish(&self->accu);
154 if (intermediate == NULL)
155 return -1;
156
157 /* Append the intermediate string to the internal buffer.
158 The length should be equal to the current cursor position.
159 */
160 len = PyUnicode_GET_LENGTH(intermediate);
161 if (resize_buffer(self, len) < 0) {
162 Py_DECREF(intermediate);
163 return -1;
164 }
165 if (!PyUnicode_AsUCS4(intermediate, self->buf, len, 0)) {
166 Py_DECREF(intermediate);
167 return -1;
168 }
169
170 Py_DECREF(intermediate);
171 return 0;
172}
173
174/* Internal routine for writing a whole PyUnicode object to the buffer of a
175 StringIO object. Returns 0 on success, or -1 on error. */
176static Py_ssize_t
177write_str(stringio *self, PyObject *obj)
178{
179 Py_ssize_t len;
180 PyObject *decoded = NULL;
181
182 assert(self->buf != NULL);
183 assert(self->pos >= 0);
184
185 if (self->decoder != NULL) {
186 decoded = _PyIncrementalNewlineDecoder_decode(
187 self->decoder, obj, 1 /* always final */);
188 }
189 else {
190 decoded = obj;
191 Py_INCREF(decoded);
192 }
193 if (self->writenl) {
194 PyObject *translated = PyUnicode_Replace(
195 decoded, _PyIO_str_nl, self->writenl, -1);
196 Py_DECREF(decoded);
197 decoded = translated;
198 }
199 if (decoded == NULL)
200 return -1;
201
202 assert(PyUnicode_Check(decoded));
203 if (PyUnicode_READY(decoded)) {
204 Py_DECREF(decoded);
205 return -1;
206 }
207 len = PyUnicode_GET_LENGTH(decoded);
208 assert(len >= 0);
209
210 /* This overflow check is not strictly necessary. However, it avoids us to
211 deal with funky things like comparing an unsigned and a signed
212 integer. */
213 if (self->pos > PY_SSIZE_T_MAX - len) {
214 PyErr_SetString(PyExc_OverflowError,
215 "new position too large");
216 goto fail;
217 }
218
219 if (self->state == STATE_ACCUMULATING) {
220 if (self->string_size == self->pos) {
221 if (_PyAccu_Accumulate(&self->accu, decoded))
222 goto fail;
223 goto success;
224 }
225 if (realize(self))
226 goto fail;
227 }
228
229 if (self->pos + len > self->string_size) {
230 if (resize_buffer(self, self->pos + len) < 0)
231 goto fail;
232 }
233
234 if (self->pos > self->string_size) {
235 /* In case of overseek, pad with null bytes the buffer region between
236 the end of stream and the current position.
237
238 0 lo string_size hi
239 | |<---used--->|<----------available----------->|
240 | | <--to pad-->|<---to write---> |
241 0 buf position
242
243 */
244 memset(self->buf + self->string_size, '\0',
245 (self->pos - self->string_size) * sizeof(Py_UCS4));
246 }
247
248 /* Copy the data to the internal buffer, overwriting some of the
249 existing data if self->pos < self->string_size. */
250 if (!PyUnicode_AsUCS4(decoded,
251 self->buf + self->pos,
252 self->buf_size - self->pos,
253 0))
254 goto fail;
255
256success:
257 /* Set the new length of the internal string if it has changed. */
258 self->pos += len;
259 if (self->string_size < self->pos)
260 self->string_size = self->pos;
261
262 Py_DECREF(decoded);
263 return 0;
264
265fail:
266 Py_XDECREF(decoded);
267 return -1;
268}
269
270/*[clinic input]
271_io.StringIO.getvalue
272
273Retrieve the entire contents of the object.
274[clinic start generated code]*/
275
276static PyObject *
277_io_StringIO_getvalue_impl(stringio *self)
278/*[clinic end generated code: output=27b6a7bfeaebce01 input=d23cb81d6791cf88]*/
279{
280 CHECK_INITIALIZED(self);
281 CHECK_CLOSED(self);
282 if (self->state == STATE_ACCUMULATING)
283 return make_intermediate(self);
284 return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, self->buf,
285 self->string_size);
286}
287
288/*[clinic input]
289_io.StringIO.tell
290
291Tell the current file position.
292[clinic start generated code]*/
293
294static PyObject *
295_io_StringIO_tell_impl(stringio *self)
296/*[clinic end generated code: output=2e87ac67b116c77b input=ec866ebaff02f405]*/
297{
298 CHECK_INITIALIZED(self);
299 CHECK_CLOSED(self);
300 return PyLong_FromSsize_t(self->pos);
301}
302
303/*[clinic input]
304_io.StringIO.read
305 size: Py_ssize_t(accept={int, NoneType}) = -1
306 /
307
308Read at most size characters, returned as a string.
309
310If the argument is negative or omitted, read until EOF
311is reached. Return an empty string at EOF.
312[clinic start generated code]*/
313
314static PyObject *
315_io_StringIO_read_impl(stringio *self, Py_ssize_t size)
316/*[clinic end generated code: output=ae8cf6002f71626c input=0921093383dfb92d]*/
317{
318 Py_ssize_t n;
319 Py_UCS4 *output;
320
321 CHECK_INITIALIZED(self);
322 CHECK_CLOSED(self);
323
324 /* adjust invalid sizes */
325 n = self->string_size - self->pos;
326 if (size < 0 || size > n) {
327 size = n;
328 if (size < 0)
329 size = 0;
330 }
331
332 /* Optimization for seek(0); read() */
333 if (self->state == STATE_ACCUMULATING && self->pos == 0 && size == n) {
334 PyObject *result = make_intermediate(self);
335 self->pos = self->string_size;
336 return result;
337 }
338
339 ENSURE_REALIZED(self);
340 output = self->buf + self->pos;
341 self->pos += size;
342 return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, output, size);
343}
344
345/* Internal helper, used by stringio_readline and stringio_iternext */
346static PyObject *
347_stringio_readline(stringio *self, Py_ssize_t limit)
348{
349 Py_UCS4 *start, *end, old_char;
350 Py_ssize_t len, consumed;
351
352 /* In case of overseek, return the empty string */
353 if (self->pos >= self->string_size)
354 return PyUnicode_New(0, 0);
355
356 start = self->buf + self->pos;
357 if (limit < 0 || limit > self->string_size - self->pos)
358 limit = self->string_size - self->pos;
359
360 end = start + limit;
361 old_char = *end;
362 *end = '\0';
363 len = _PyIO_find_line_ending(
364 self->readtranslate, self->readuniversal, self->readnl,
365 PyUnicode_4BYTE_KIND, (char*)start, (char*)end, &consumed);
366 *end = old_char;
367 /* If we haven't found any line ending, we just return everything
368 (`consumed` is ignored). */
369 if (len < 0)
370 len = limit;
371 self->pos += len;
372 return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, start, len);
373}
374
375/*[clinic input]
376_io.StringIO.readline
377 size: Py_ssize_t(accept={int, NoneType}) = -1
378 /
379
380Read until newline or EOF.
381
382Returns an empty string if EOF is hit immediately.
383[clinic start generated code]*/
384
385static PyObject *
386_io_StringIO_readline_impl(stringio *self, Py_ssize_t size)
387/*[clinic end generated code: output=cabd6452f1b7e85d input=a5bd70bf682aa276]*/
388{
389 CHECK_INITIALIZED(self);
390 CHECK_CLOSED(self);
391 ENSURE_REALIZED(self);
392
393 return _stringio_readline(self, size);
394}
395
396static PyObject *
397stringio_iternext(stringio *self)
398{
399 PyObject *line;
400
401 CHECK_INITIALIZED(self);
402 CHECK_CLOSED(self);
403 ENSURE_REALIZED(self);
404
405 if (Py_IS_TYPE(self, &PyStringIO_Type)) {
406 /* Skip method call overhead for speed */
407 line = _stringio_readline(self, -1);
408 }
409 else {
410 /* XXX is subclassing StringIO really supported? */
411 line = PyObject_CallMethodNoArgs((PyObject *)self,
412 _PyIO_str_readline);
413 if (line && !PyUnicode_Check(line)) {
414 PyErr_Format(PyExc_OSError,
415 "readline() should have returned a str object, "
416 "not '%.200s'", Py_TYPE(line)->tp_name);
417 Py_DECREF(line);
418 return NULL;
419 }
420 }
421
422 if (line == NULL)
423 return NULL;
424
425 if (PyUnicode_GET_LENGTH(line) == 0) {
426 /* Reached EOF */
427 Py_DECREF(line);
428 return NULL;
429 }
430
431 return line;
432}
433
434/*[clinic input]
435_io.StringIO.truncate
436 pos as size: Py_ssize_t(accept={int, NoneType}, c_default="self->pos") = None
437 /
438
439Truncate size to pos.
440
441The pos argument defaults to the current file position, as
442returned by tell(). The current file position is unchanged.
443Returns the new absolute position.
444[clinic start generated code]*/
445
446static PyObject *
447_io_StringIO_truncate_impl(stringio *self, Py_ssize_t size)
448/*[clinic end generated code: output=eb3aef8e06701365 input=5505cff90ca48b96]*/
449{
450 CHECK_INITIALIZED(self);
451 CHECK_CLOSED(self);
452
453 if (size < 0) {
454 PyErr_Format(PyExc_ValueError,
455 "Negative size value %zd", size);
456 return NULL;
457 }
458
459 if (size < self->string_size) {
460 ENSURE_REALIZED(self);
461 if (resize_buffer(self, size) < 0)
462 return NULL;
463 self->string_size = size;
464 }
465
466 return PyLong_FromSsize_t(size);
467}
468
469/*[clinic input]
470_io.StringIO.seek
471 pos: Py_ssize_t
472 whence: int = 0
473 /
474
475Change stream position.
476
477Seek to character offset pos relative to position indicated by whence:
478 0 Start of stream (the default). pos should be >= 0;
479 1 Current position - pos must be 0;
480 2 End of stream - pos must be 0.
481Returns the new absolute position.
482[clinic start generated code]*/
483
484static PyObject *
485_io_StringIO_seek_impl(stringio *self, Py_ssize_t pos, int whence)
486/*[clinic end generated code: output=e9e0ac9a8ae71c25 input=e3855b24e7cae06a]*/
487{
488 CHECK_INITIALIZED(self);
489 CHECK_CLOSED(self);
490
491 if (whence != 0 && whence != 1 && whence != 2) {
492 PyErr_Format(PyExc_ValueError,
493 "Invalid whence (%i, should be 0, 1 or 2)", whence);
494 return NULL;
495 }
496 else if (pos < 0 && whence == 0) {
497 PyErr_Format(PyExc_ValueError,
498 "Negative seek position %zd", pos);
499 return NULL;
500 }
501 else if (whence != 0 && pos != 0) {
502 PyErr_SetString(PyExc_OSError,
503 "Can't do nonzero cur-relative seeks");
504 return NULL;
505 }
506
507 /* whence = 0: offset relative to beginning of the string.
508 whence = 1: no change to current position.
509 whence = 2: change position to end of file. */
510 if (whence == 1) {
511 pos = self->pos;
512 }
513 else if (whence == 2) {
514 pos = self->string_size;
515 }
516
517 self->pos = pos;
518
519 return PyLong_FromSsize_t(self->pos);
520}
521
522/*[clinic input]
523_io.StringIO.write
524 s as obj: object
525 /
526
527Write string to file.
528
529Returns the number of characters written, which is always equal to
530the length of the string.
531[clinic start generated code]*/
532
533static PyObject *
534_io_StringIO_write(stringio *self, PyObject *obj)
535/*[clinic end generated code: output=0deaba91a15b94da input=cf96f3b16586e669]*/
536{
537 Py_ssize_t size;
538
539 CHECK_INITIALIZED(self);
540 if (!PyUnicode_Check(obj)) {
541 PyErr_Format(PyExc_TypeError, "string argument expected, got '%s'",
542 Py_TYPE(obj)->tp_name);
543 return NULL;
544 }
545 if (PyUnicode_READY(obj))
546 return NULL;
547 CHECK_CLOSED(self);
548 size = PyUnicode_GET_LENGTH(obj);
549
550 if (size > 0 && write_str(self, obj) < 0)
551 return NULL;
552
553 return PyLong_FromSsize_t(size);
554}
555
556/*[clinic input]
557_io.StringIO.close
558
559Close the IO object.
560
561Attempting any further operation after the object is closed
562will raise a ValueError.
563
564This method has no effect if the file is already closed.
565[clinic start generated code]*/
566
567static PyObject *
568_io_StringIO_close_impl(stringio *self)
569/*[clinic end generated code: output=04399355cbe518f1 input=cbc10b45f35d6d46]*/
570{
571 self->closed = 1;
572 /* Free up some memory */
573 if (resize_buffer(self, 0) < 0)
574 return NULL;
575 _PyAccu_Destroy(&self->accu);
576 Py_CLEAR(self->readnl);
577 Py_CLEAR(self->writenl);
578 Py_CLEAR(self->decoder);
579 Py_RETURN_NONE;
580}
581
582static int
583stringio_traverse(stringio *self, visitproc visit, void *arg)
584{
585 Py_VISIT(self->dict);
586 return 0;
587}
588
589static int
590stringio_clear(stringio *self)
591{
592 Py_CLEAR(self->dict);
593 return 0;
594}
595
596static void
597stringio_dealloc(stringio *self)
598{
599 _PyObject_GC_UNTRACK(self);
600 self->ok = 0;
601 if (self->buf) {
602 PyMem_Free(self->buf);
603 self->buf = NULL;
604 }
605 _PyAccu_Destroy(&self->accu);
606 Py_CLEAR(self->readnl);
607 Py_CLEAR(self->writenl);
608 Py_CLEAR(self->decoder);
609 Py_CLEAR(self->dict);
610 if (self->weakreflist != NULL)
611 PyObject_ClearWeakRefs((PyObject *) self);
612 Py_TYPE(self)->tp_free(self);
613}
614
615static PyObject *
616stringio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
617{
618 stringio *self;
619
620 assert(type != NULL && type->tp_alloc != NULL);
621 self = (stringio *)type->tp_alloc(type, 0);
622 if (self == NULL)
623 return NULL;
624
625 /* tp_alloc initializes all the fields to zero. So we don't have to
626 initialize them here. */
627
628 self->buf = (Py_UCS4 *)PyMem_Malloc(0);
629 if (self->buf == NULL) {
630 Py_DECREF(self);
631 return PyErr_NoMemory();
632 }
633
634 return (PyObject *)self;
635}
636
637/*[clinic input]
638_io.StringIO.__init__
639 initial_value as value: object(c_default="NULL") = ''
640 newline as newline_obj: object(c_default="NULL") = '\n'
641
642Text I/O implementation using an in-memory buffer.
643
644The initial_value argument sets the value of object. The newline
645argument is like the one of TextIOWrapper's constructor.
646[clinic start generated code]*/
647
648static int
649_io_StringIO___init___impl(stringio *self, PyObject *value,
650 PyObject *newline_obj)
651/*[clinic end generated code: output=a421ea023b22ef4e input=cee2d9181b2577a3]*/
652{
653 const char *newline = "\n";
654 Py_ssize_t value_len;
655
656 /* Parse the newline argument. We only want to allow unicode objects or
657 None. */
658 if (newline_obj == Py_None) {
659 newline = NULL;
660 }
661 else if (newline_obj) {
662 if (!PyUnicode_Check(newline_obj)) {
663 PyErr_Format(PyExc_TypeError,
664 "newline must be str or None, not %.200s",
665 Py_TYPE(newline_obj)->tp_name);
666 return -1;
667 }
668 newline = PyUnicode_AsUTF8(newline_obj);
669 if (newline == NULL)
670 return -1;
671 }
672
673 if (newline && newline[0] != '\0'
674 && !(newline[0] == '\n' && newline[1] == '\0')
675 && !(newline[0] == '\r' && newline[1] == '\0')
676 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
677 PyErr_Format(PyExc_ValueError,
678 "illegal newline value: %R", newline_obj);
679 return -1;
680 }
681 if (value && value != Py_None && !PyUnicode_Check(value)) {
682 PyErr_Format(PyExc_TypeError,
683 "initial_value must be str or None, not %.200s",
684 Py_TYPE(value)->tp_name);
685 return -1;
686 }
687
688 self->ok = 0;
689
690 _PyAccu_Destroy(&self->accu);
691 Py_CLEAR(self->readnl);
692 Py_CLEAR(self->writenl);
693 Py_CLEAR(self->decoder);
694
695 assert((newline != NULL && newline_obj != Py_None) ||
696 (newline == NULL && newline_obj == Py_None));
697
698 if (newline) {
699 self->readnl = PyUnicode_FromString(newline);
700 if (self->readnl == NULL)
701 return -1;
702 }
703 self->readuniversal = (newline == NULL || newline[0] == '\0');
704 self->readtranslate = (newline == NULL);
705 /* If newline == "", we don't translate anything.
706 If newline == "\n" or newline == None, we translate to "\n", which is
707 a no-op.
708 (for newline == None, TextIOWrapper translates to os.linesep, but it
709 is pointless for StringIO)
710 */
711 if (newline != NULL && newline[0] == '\r') {
712 self->writenl = self->readnl;
713 Py_INCREF(self->writenl);
714 }
715
716 if (self->readuniversal) {
717 self->decoder = PyObject_CallFunctionObjArgs(
718 (PyObject *)&PyIncrementalNewlineDecoder_Type,
719 Py_None, self->readtranslate ? Py_True : Py_False, NULL);
720 if (self->decoder == NULL)
721 return -1;
722 }
723
724 /* Now everything is set up, resize buffer to size of initial value,
725 and copy it */
726 self->string_size = 0;
727 if (value && value != Py_None)
728 value_len = PyUnicode_GetLength(value);
729 else
730 value_len = 0;
731 if (value_len > 0) {
732 /* This is a heuristic, for newline translation might change
733 the string length. */
734 if (resize_buffer(self, 0) < 0)
735 return -1;
736 self->state = STATE_REALIZED;
737 self->pos = 0;
738 if (write_str(self, value) < 0)
739 return -1;
740 }
741 else {
742 /* Empty stringio object, we can start by accumulating */
743 if (resize_buffer(self, 0) < 0)
744 return -1;
745 if (_PyAccu_Init(&self->accu))
746 return -1;
747 self->state = STATE_ACCUMULATING;
748 }
749 self->pos = 0;
750
751 self->closed = 0;
752 self->ok = 1;
753 return 0;
754}
755
756/* Properties and pseudo-properties */
757
758/*[clinic input]
759_io.StringIO.readable
760
761Returns True if the IO object can be read.
762[clinic start generated code]*/
763
764static PyObject *
765_io_StringIO_readable_impl(stringio *self)
766/*[clinic end generated code: output=b19d44dd8b1ceb99 input=39ce068b224c21ad]*/
767{
768 CHECK_INITIALIZED(self);
769 CHECK_CLOSED(self);
770 Py_RETURN_TRUE;
771}
772
773/*[clinic input]
774_io.StringIO.writable
775
776Returns True if the IO object can be written.
777[clinic start generated code]*/
778
779static PyObject *
780_io_StringIO_writable_impl(stringio *self)
781/*[clinic end generated code: output=13e4dd77187074ca input=7a691353aac38835]*/
782{
783 CHECK_INITIALIZED(self);
784 CHECK_CLOSED(self);
785 Py_RETURN_TRUE;
786}
787
788/*[clinic input]
789_io.StringIO.seekable
790
791Returns True if the IO object can be seeked.
792[clinic start generated code]*/
793
794static PyObject *
795_io_StringIO_seekable_impl(stringio *self)
796/*[clinic end generated code: output=4d20b4641c756879 input=4c606d05b32952e6]*/
797{
798 CHECK_INITIALIZED(self);
799 CHECK_CLOSED(self);
800 Py_RETURN_TRUE;
801}
802
803/* Pickling support.
804
805 The implementation of __getstate__ is similar to the one for BytesIO,
806 except that we also save the newline parameter. For __setstate__ and unlike
807 BytesIO, we call __init__ to restore the object's state. Doing so allows us
808 to avoid decoding the complex newline state while keeping the object
809 representation compact.
810
811 See comment in bytesio.c regarding why only pickle protocols and onward are
812 supported.
813*/
814
815static PyObject *
816stringio_getstate(stringio *self, PyObject *Py_UNUSED(ignored))
817{
818 PyObject *initvalue = _io_StringIO_getvalue_impl(self);
819 PyObject *dict;
820 PyObject *state;
821
822 if (initvalue == NULL)
823 return NULL;
824 if (self->dict == NULL) {
825 Py_INCREF(Py_None);
826 dict = Py_None;
827 }
828 else {
829 dict = PyDict_Copy(self->dict);
830 if (dict == NULL) {
831 Py_DECREF(initvalue);
832 return NULL;
833 }
834 }
835
836 state = Py_BuildValue("(OOnN)", initvalue,
837 self->readnl ? self->readnl : Py_None,
838 self->pos, dict);
839 Py_DECREF(initvalue);
840 return state;
841}
842
843static PyObject *
844stringio_setstate(stringio *self, PyObject *state)
845{
846 PyObject *initarg;
847 PyObject *position_obj;
848 PyObject *dict;
849 Py_ssize_t pos;
850
851 assert(state != NULL);
852 CHECK_CLOSED(self);
853
854 /* We allow the state tuple to be longer than 4, because we may need
855 someday to extend the object's state without breaking
856 backward-compatibility. */
857 if (!PyTuple_Check(state) || PyTuple_GET_SIZE(state) < 4) {
858 PyErr_Format(PyExc_TypeError,
859 "%.200s.__setstate__ argument should be 4-tuple, got %.200s",
860 Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name);
861 return NULL;
862 }
863
864 /* Initialize the object's state. */
865 initarg = PyTuple_GetSlice(state, 0, 2);
866 if (initarg == NULL)
867 return NULL;
868 if (_io_StringIO___init__((PyObject *)self, initarg, NULL) < 0) {
869 Py_DECREF(initarg);
870 return NULL;
871 }
872 Py_DECREF(initarg);
873
874 /* Restore the buffer state. Even if __init__ did initialize the buffer,
875 we have to initialize it again since __init__ may translate the
876 newlines in the initial_value string. We clearly do not want that
877 because the string value in the state tuple has already been translated
878 once by __init__. So we do not take any chance and replace object's
879 buffer completely. */
880 {
881 PyObject *item;
882 Py_UCS4 *buf;
883 Py_ssize_t bufsize;
884
885 item = PyTuple_GET_ITEM(state, 0);
886 buf = PyUnicode_AsUCS4Copy(item);
887 if (buf == NULL)
888 return NULL;
889 bufsize = PyUnicode_GET_LENGTH(item);
890
891 if (resize_buffer(self, bufsize) < 0) {
892 PyMem_Free(buf);
893 return NULL;
894 }
895 memcpy(self->buf, buf, bufsize * sizeof(Py_UCS4));
896 PyMem_Free(buf);
897 self->string_size = bufsize;
898 }
899
900 /* Set carefully the position value. Alternatively, we could use the seek
901 method instead of modifying self->pos directly to better protect the
902 object internal state against erroneous (or malicious) inputs. */
903 position_obj = PyTuple_GET_ITEM(state, 2);
904 if (!PyLong_Check(position_obj)) {
905 PyErr_Format(PyExc_TypeError,
906 "third item of state must be an integer, got %.200s",
907 Py_TYPE(position_obj)->tp_name);
908 return NULL;
909 }
910 pos = PyLong_AsSsize_t(position_obj);
911 if (pos == -1 && PyErr_Occurred())
912 return NULL;
913 if (pos < 0) {
914 PyErr_SetString(PyExc_ValueError,
915 "position value cannot be negative");
916 return NULL;
917 }
918 self->pos = pos;
919
920 /* Set the dictionary of the instance variables. */
921 dict = PyTuple_GET_ITEM(state, 3);
922 if (dict != Py_None) {
923 if (!PyDict_Check(dict)) {
924 PyErr_Format(PyExc_TypeError,
925 "fourth item of state should be a dict, got a %.200s",
926 Py_TYPE(dict)->tp_name);
927 return NULL;
928 }
929 if (self->dict) {
930 /* Alternatively, we could replace the internal dictionary
931 completely. However, it seems more practical to just update it. */
932 if (PyDict_Update(self->dict, dict) < 0)
933 return NULL;
934 }
935 else {
936 Py_INCREF(dict);
937 self->dict = dict;
938 }
939 }
940
941 Py_RETURN_NONE;
942}
943
944
945static PyObject *
946stringio_closed(stringio *self, void *context)
947{
948 CHECK_INITIALIZED(self);
949 return PyBool_FromLong(self->closed);
950}
951
952static PyObject *
953stringio_line_buffering(stringio *self, void *context)
954{
955 CHECK_INITIALIZED(self);
956 CHECK_CLOSED(self);
957 Py_RETURN_FALSE;
958}
959
960static PyObject *
961stringio_newlines(stringio *self, void *context)
962{
963 CHECK_INITIALIZED(self);
964 CHECK_CLOSED(self);
965 if (self->decoder == NULL)
966 Py_RETURN_NONE;
967 return PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
968}
969
970#include "clinic/stringio.c.h"
971
972static struct PyMethodDef stringio_methods[] = {
973 _IO_STRINGIO_CLOSE_METHODDEF
974 _IO_STRINGIO_GETVALUE_METHODDEF
975 _IO_STRINGIO_READ_METHODDEF
976 _IO_STRINGIO_READLINE_METHODDEF
977 _IO_STRINGIO_TELL_METHODDEF
978 _IO_STRINGIO_TRUNCATE_METHODDEF
979 _IO_STRINGIO_SEEK_METHODDEF
980 _IO_STRINGIO_WRITE_METHODDEF
981
982 _IO_STRINGIO_SEEKABLE_METHODDEF
983 _IO_STRINGIO_READABLE_METHODDEF
984 _IO_STRINGIO_WRITABLE_METHODDEF
985
986 {"__getstate__", (PyCFunction)stringio_getstate, METH_NOARGS},
987 {"__setstate__", (PyCFunction)stringio_setstate, METH_O},
988 {NULL, NULL} /* sentinel */
989};
990
991static PyGetSetDef stringio_getset[] = {
992 {"closed", (getter)stringio_closed, NULL, NULL},
993 {"newlines", (getter)stringio_newlines, NULL, NULL},
994 /* (following comments straight off of the original Python wrapper:)
995 XXX Cruft to support the TextIOWrapper API. This would only
996 be meaningful if StringIO supported the buffer attribute.
997 Hopefully, a better solution, than adding these pseudo-attributes,
998 will be found.
999 */
1000 {"line_buffering", (getter)stringio_line_buffering, NULL, NULL},
1001 {NULL}
1002};
1003
1004PyTypeObject PyStringIO_Type = {
1005 PyVarObject_HEAD_INIT(NULL, 0)
1006 "_io.StringIO", /*tp_name*/
1007 sizeof(stringio), /*tp_basicsize*/
1008 0, /*tp_itemsize*/
1009 (destructor)stringio_dealloc, /*tp_dealloc*/
1010 0, /*tp_vectorcall_offset*/
1011 0, /*tp_getattr*/
1012 0, /*tp_setattr*/
1013 0, /*tp_as_async*/
1014 0, /*tp_repr*/
1015 0, /*tp_as_number*/
1016 0, /*tp_as_sequence*/
1017 0, /*tp_as_mapping*/
1018 0, /*tp_hash*/
1019 0, /*tp_call*/
1020 0, /*tp_str*/
1021 0, /*tp_getattro*/
1022 0, /*tp_setattro*/
1023 0, /*tp_as_buffer*/
1024 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
1025 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1026 _io_StringIO___init____doc__, /*tp_doc*/
1027 (traverseproc)stringio_traverse, /*tp_traverse*/
1028 (inquiry)stringio_clear, /*tp_clear*/
1029 0, /*tp_richcompare*/
1030 offsetof(stringio, weakreflist), /*tp_weaklistoffset*/
1031 0, /*tp_iter*/
1032 (iternextfunc)stringio_iternext, /*tp_iternext*/
1033 stringio_methods, /*tp_methods*/
1034 0, /*tp_members*/
1035 stringio_getset, /*tp_getset*/
1036 0, /*tp_base*/
1037 0, /*tp_dict*/
1038 0, /*tp_descr_get*/
1039 0, /*tp_descr_set*/
1040 offsetof(stringio, dict), /*tp_dictoffset*/
1041 _io_StringIO___init__, /*tp_init*/
1042 0, /*tp_alloc*/
1043 stringio_new, /*tp_new*/
1044};
1045