1 | #define PY_SSIZE_T_CLEAN |
2 | #include "Python.h" |
3 | #include <stddef.h> // offsetof() |
4 | #include "pycore_accu.h" |
5 | #include "pycore_object.h" |
6 | #include "_iomodule.h" |
7 | |
8 | /* Implementation note: the buffer is always at least one character longer |
9 | than the enclosed string, for proper functioning of _PyIO_find_line_ending. |
10 | */ |
11 | |
12 | #define STATE_REALIZED 1 |
13 | #define STATE_ACCUMULATING 2 |
14 | |
15 | /*[clinic input] |
16 | module _io |
17 | class _io.StringIO "stringio *" "&PyStringIO_Type" |
18 | [clinic start generated code]*/ |
19 | /*[clinic end generated code: output=da39a3ee5e6b4b0d input=c17bc0f42165cd7d]*/ |
20 | |
21 | typedef struct { |
22 | PyObject_HEAD |
23 | Py_UCS4 *buf; |
24 | Py_ssize_t pos; |
25 | Py_ssize_t string_size; |
26 | size_t buf_size; |
27 | |
28 | /* The stringio object can be in two states: accumulating or realized. |
29 | In accumulating state, the internal buffer contains nothing and |
30 | the contents are given by the embedded _PyAccu structure. |
31 | In realized state, the internal buffer is meaningful and the |
32 | _PyAccu is destroyed. |
33 | */ |
34 | int state; |
35 | _PyAccu accu; |
36 | |
37 | char ok; /* initialized? */ |
38 | char closed; |
39 | char readuniversal; |
40 | char readtranslate; |
41 | PyObject *decoder; |
42 | PyObject *readnl; |
43 | PyObject *writenl; |
44 | |
45 | PyObject *dict; |
46 | PyObject *weakreflist; |
47 | } stringio; |
48 | |
49 | static int _io_StringIO___init__(PyObject *self, PyObject *args, PyObject *kwargs); |
50 | |
51 | #define CHECK_INITIALIZED(self) \ |
52 | if (self->ok <= 0) { \ |
53 | PyErr_SetString(PyExc_ValueError, \ |
54 | "I/O operation on uninitialized object"); \ |
55 | return NULL; \ |
56 | } |
57 | |
58 | #define CHECK_CLOSED(self) \ |
59 | if (self->closed) { \ |
60 | PyErr_SetString(PyExc_ValueError, \ |
61 | "I/O operation on closed file"); \ |
62 | return NULL; \ |
63 | } |
64 | |
65 | #define ENSURE_REALIZED(self) \ |
66 | if (realize(self) < 0) { \ |
67 | return NULL; \ |
68 | } |
69 | |
70 | |
71 | /* Internal routine for changing the size, in terms of characters, of the |
72 | buffer of StringIO objects. The caller should ensure that the 'size' |
73 | argument is non-negative. Returns 0 on success, -1 otherwise. */ |
74 | static int |
75 | resize_buffer(stringio *self, size_t size) |
76 | { |
77 | /* Here, unsigned types are used to avoid dealing with signed integer |
78 | overflow, which is undefined in C. */ |
79 | size_t alloc = self->buf_size; |
80 | Py_UCS4 *new_buf = NULL; |
81 | |
82 | assert(self->buf != NULL); |
83 | |
84 | /* Reserve one more char for line ending detection. */ |
85 | size = size + 1; |
86 | /* For simplicity, stay in the range of the signed type. Anyway, Python |
87 | doesn't allow strings to be longer than this. */ |
88 | if (size > PY_SSIZE_T_MAX) |
89 | goto overflow; |
90 | |
91 | if (size < alloc / 2) { |
92 | /* Major downsize; resize down to exact size. */ |
93 | alloc = size + 1; |
94 | } |
95 | else if (size < alloc) { |
96 | /* Within allocated size; quick exit */ |
97 | return 0; |
98 | } |
99 | else if (size <= alloc * 1.125) { |
100 | /* Moderate upsize; overallocate similar to list_resize() */ |
101 | alloc = size + (size >> 3) + (size < 9 ? 3 : 6); |
102 | } |
103 | else { |
104 | /* Major upsize; resize up to exact size */ |
105 | alloc = size + 1; |
106 | } |
107 | |
108 | if (alloc > PY_SIZE_MAX / sizeof(Py_UCS4)) |
109 | goto overflow; |
110 | new_buf = (Py_UCS4 *)PyMem_Realloc(self->buf, alloc * sizeof(Py_UCS4)); |
111 | if (new_buf == NULL) { |
112 | PyErr_NoMemory(); |
113 | return -1; |
114 | } |
115 | self->buf_size = alloc; |
116 | self->buf = new_buf; |
117 | |
118 | return 0; |
119 | |
120 | overflow: |
121 | PyErr_SetString(PyExc_OverflowError, |
122 | "new buffer size too large" ); |
123 | return -1; |
124 | } |
125 | |
126 | static PyObject * |
127 | make_intermediate(stringio *self) |
128 | { |
129 | PyObject *intermediate = _PyAccu_Finish(&self->accu); |
130 | self->state = STATE_REALIZED; |
131 | if (intermediate == NULL) |
132 | return NULL; |
133 | if (_PyAccu_Init(&self->accu) || |
134 | _PyAccu_Accumulate(&self->accu, intermediate)) { |
135 | Py_DECREF(intermediate); |
136 | return NULL; |
137 | } |
138 | self->state = STATE_ACCUMULATING; |
139 | return intermediate; |
140 | } |
141 | |
142 | static int |
143 | realize(stringio *self) |
144 | { |
145 | Py_ssize_t len; |
146 | PyObject *intermediate; |
147 | |
148 | if (self->state == STATE_REALIZED) |
149 | return 0; |
150 | assert(self->state == STATE_ACCUMULATING); |
151 | self->state = STATE_REALIZED; |
152 | |
153 | intermediate = _PyAccu_Finish(&self->accu); |
154 | if (intermediate == NULL) |
155 | return -1; |
156 | |
157 | /* Append the intermediate string to the internal buffer. |
158 | The length should be equal to the current cursor position. |
159 | */ |
160 | len = PyUnicode_GET_LENGTH(intermediate); |
161 | if (resize_buffer(self, len) < 0) { |
162 | Py_DECREF(intermediate); |
163 | return -1; |
164 | } |
165 | if (!PyUnicode_AsUCS4(intermediate, self->buf, len, 0)) { |
166 | Py_DECREF(intermediate); |
167 | return -1; |
168 | } |
169 | |
170 | Py_DECREF(intermediate); |
171 | return 0; |
172 | } |
173 | |
174 | /* Internal routine for writing a whole PyUnicode object to the buffer of a |
175 | StringIO object. Returns 0 on success, or -1 on error. */ |
176 | static Py_ssize_t |
177 | write_str(stringio *self, PyObject *obj) |
178 | { |
179 | Py_ssize_t len; |
180 | PyObject *decoded = NULL; |
181 | |
182 | assert(self->buf != NULL); |
183 | assert(self->pos >= 0); |
184 | |
185 | if (self->decoder != NULL) { |
186 | decoded = _PyIncrementalNewlineDecoder_decode( |
187 | self->decoder, obj, 1 /* always final */); |
188 | } |
189 | else { |
190 | decoded = obj; |
191 | Py_INCREF(decoded); |
192 | } |
193 | if (self->writenl) { |
194 | PyObject *translated = PyUnicode_Replace( |
195 | decoded, _PyIO_str_nl, self->writenl, -1); |
196 | Py_DECREF(decoded); |
197 | decoded = translated; |
198 | } |
199 | if (decoded == NULL) |
200 | return -1; |
201 | |
202 | assert(PyUnicode_Check(decoded)); |
203 | if (PyUnicode_READY(decoded)) { |
204 | Py_DECREF(decoded); |
205 | return -1; |
206 | } |
207 | len = PyUnicode_GET_LENGTH(decoded); |
208 | assert(len >= 0); |
209 | |
210 | /* This overflow check is not strictly necessary. However, it avoids us to |
211 | deal with funky things like comparing an unsigned and a signed |
212 | integer. */ |
213 | if (self->pos > PY_SSIZE_T_MAX - len) { |
214 | PyErr_SetString(PyExc_OverflowError, |
215 | "new position too large" ); |
216 | goto fail; |
217 | } |
218 | |
219 | if (self->state == STATE_ACCUMULATING) { |
220 | if (self->string_size == self->pos) { |
221 | if (_PyAccu_Accumulate(&self->accu, decoded)) |
222 | goto fail; |
223 | goto success; |
224 | } |
225 | if (realize(self)) |
226 | goto fail; |
227 | } |
228 | |
229 | if (self->pos + len > self->string_size) { |
230 | if (resize_buffer(self, self->pos + len) < 0) |
231 | goto fail; |
232 | } |
233 | |
234 | if (self->pos > self->string_size) { |
235 | /* In case of overseek, pad with null bytes the buffer region between |
236 | the end of stream and the current position. |
237 | |
238 | 0 lo string_size hi |
239 | | |<---used--->|<----------available----------->| |
240 | | | <--to pad-->|<---to write---> | |
241 | 0 buf position |
242 | |
243 | */ |
244 | memset(self->buf + self->string_size, '\0', |
245 | (self->pos - self->string_size) * sizeof(Py_UCS4)); |
246 | } |
247 | |
248 | /* Copy the data to the internal buffer, overwriting some of the |
249 | existing data if self->pos < self->string_size. */ |
250 | if (!PyUnicode_AsUCS4(decoded, |
251 | self->buf + self->pos, |
252 | self->buf_size - self->pos, |
253 | 0)) |
254 | goto fail; |
255 | |
256 | success: |
257 | /* Set the new length of the internal string if it has changed. */ |
258 | self->pos += len; |
259 | if (self->string_size < self->pos) |
260 | self->string_size = self->pos; |
261 | |
262 | Py_DECREF(decoded); |
263 | return 0; |
264 | |
265 | fail: |
266 | Py_XDECREF(decoded); |
267 | return -1; |
268 | } |
269 | |
270 | /*[clinic input] |
271 | _io.StringIO.getvalue |
272 | |
273 | Retrieve the entire contents of the object. |
274 | [clinic start generated code]*/ |
275 | |
276 | static PyObject * |
277 | _io_StringIO_getvalue_impl(stringio *self) |
278 | /*[clinic end generated code: output=27b6a7bfeaebce01 input=d23cb81d6791cf88]*/ |
279 | { |
280 | CHECK_INITIALIZED(self); |
281 | CHECK_CLOSED(self); |
282 | if (self->state == STATE_ACCUMULATING) |
283 | return make_intermediate(self); |
284 | return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, self->buf, |
285 | self->string_size); |
286 | } |
287 | |
288 | /*[clinic input] |
289 | _io.StringIO.tell |
290 | |
291 | Tell the current file position. |
292 | [clinic start generated code]*/ |
293 | |
294 | static PyObject * |
295 | _io_StringIO_tell_impl(stringio *self) |
296 | /*[clinic end generated code: output=2e87ac67b116c77b input=ec866ebaff02f405]*/ |
297 | { |
298 | CHECK_INITIALIZED(self); |
299 | CHECK_CLOSED(self); |
300 | return PyLong_FromSsize_t(self->pos); |
301 | } |
302 | |
303 | /*[clinic input] |
304 | _io.StringIO.read |
305 | size: Py_ssize_t(accept={int, NoneType}) = -1 |
306 | / |
307 | |
308 | Read at most size characters, returned as a string. |
309 | |
310 | If the argument is negative or omitted, read until EOF |
311 | is reached. Return an empty string at EOF. |
312 | [clinic start generated code]*/ |
313 | |
314 | static PyObject * |
315 | _io_StringIO_read_impl(stringio *self, Py_ssize_t size) |
316 | /*[clinic end generated code: output=ae8cf6002f71626c input=0921093383dfb92d]*/ |
317 | { |
318 | Py_ssize_t n; |
319 | Py_UCS4 *output; |
320 | |
321 | CHECK_INITIALIZED(self); |
322 | CHECK_CLOSED(self); |
323 | |
324 | /* adjust invalid sizes */ |
325 | n = self->string_size - self->pos; |
326 | if (size < 0 || size > n) { |
327 | size = n; |
328 | if (size < 0) |
329 | size = 0; |
330 | } |
331 | |
332 | /* Optimization for seek(0); read() */ |
333 | if (self->state == STATE_ACCUMULATING && self->pos == 0 && size == n) { |
334 | PyObject *result = make_intermediate(self); |
335 | self->pos = self->string_size; |
336 | return result; |
337 | } |
338 | |
339 | ENSURE_REALIZED(self); |
340 | output = self->buf + self->pos; |
341 | self->pos += size; |
342 | return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, output, size); |
343 | } |
344 | |
345 | /* Internal helper, used by stringio_readline and stringio_iternext */ |
346 | static PyObject * |
347 | _stringio_readline(stringio *self, Py_ssize_t limit) |
348 | { |
349 | Py_UCS4 *start, *end, old_char; |
350 | Py_ssize_t len, consumed; |
351 | |
352 | /* In case of overseek, return the empty string */ |
353 | if (self->pos >= self->string_size) |
354 | return PyUnicode_New(0, 0); |
355 | |
356 | start = self->buf + self->pos; |
357 | if (limit < 0 || limit > self->string_size - self->pos) |
358 | limit = self->string_size - self->pos; |
359 | |
360 | end = start + limit; |
361 | old_char = *end; |
362 | *end = '\0'; |
363 | len = _PyIO_find_line_ending( |
364 | self->readtranslate, self->readuniversal, self->readnl, |
365 | PyUnicode_4BYTE_KIND, (char*)start, (char*)end, &consumed); |
366 | *end = old_char; |
367 | /* If we haven't found any line ending, we just return everything |
368 | (`consumed` is ignored). */ |
369 | if (len < 0) |
370 | len = limit; |
371 | self->pos += len; |
372 | return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, start, len); |
373 | } |
374 | |
375 | /*[clinic input] |
376 | _io.StringIO.readline |
377 | size: Py_ssize_t(accept={int, NoneType}) = -1 |
378 | / |
379 | |
380 | Read until newline or EOF. |
381 | |
382 | Returns an empty string if EOF is hit immediately. |
383 | [clinic start generated code]*/ |
384 | |
385 | static PyObject * |
386 | _io_StringIO_readline_impl(stringio *self, Py_ssize_t size) |
387 | /*[clinic end generated code: output=cabd6452f1b7e85d input=a5bd70bf682aa276]*/ |
388 | { |
389 | CHECK_INITIALIZED(self); |
390 | CHECK_CLOSED(self); |
391 | ENSURE_REALIZED(self); |
392 | |
393 | return _stringio_readline(self, size); |
394 | } |
395 | |
396 | static PyObject * |
397 | stringio_iternext(stringio *self) |
398 | { |
399 | PyObject *line; |
400 | |
401 | CHECK_INITIALIZED(self); |
402 | CHECK_CLOSED(self); |
403 | ENSURE_REALIZED(self); |
404 | |
405 | if (Py_IS_TYPE(self, &PyStringIO_Type)) { |
406 | /* Skip method call overhead for speed */ |
407 | line = _stringio_readline(self, -1); |
408 | } |
409 | else { |
410 | /* XXX is subclassing StringIO really supported? */ |
411 | line = PyObject_CallMethodNoArgs((PyObject *)self, |
412 | _PyIO_str_readline); |
413 | if (line && !PyUnicode_Check(line)) { |
414 | PyErr_Format(PyExc_OSError, |
415 | "readline() should have returned a str object, " |
416 | "not '%.200s'" , Py_TYPE(line)->tp_name); |
417 | Py_DECREF(line); |
418 | return NULL; |
419 | } |
420 | } |
421 | |
422 | if (line == NULL) |
423 | return NULL; |
424 | |
425 | if (PyUnicode_GET_LENGTH(line) == 0) { |
426 | /* Reached EOF */ |
427 | Py_DECREF(line); |
428 | return NULL; |
429 | } |
430 | |
431 | return line; |
432 | } |
433 | |
434 | /*[clinic input] |
435 | _io.StringIO.truncate |
436 | pos as size: Py_ssize_t(accept={int, NoneType}, c_default="self->pos") = None |
437 | / |
438 | |
439 | Truncate size to pos. |
440 | |
441 | The pos argument defaults to the current file position, as |
442 | returned by tell(). The current file position is unchanged. |
443 | Returns the new absolute position. |
444 | [clinic start generated code]*/ |
445 | |
446 | static PyObject * |
447 | _io_StringIO_truncate_impl(stringio *self, Py_ssize_t size) |
448 | /*[clinic end generated code: output=eb3aef8e06701365 input=5505cff90ca48b96]*/ |
449 | { |
450 | CHECK_INITIALIZED(self); |
451 | CHECK_CLOSED(self); |
452 | |
453 | if (size < 0) { |
454 | PyErr_Format(PyExc_ValueError, |
455 | "Negative size value %zd" , size); |
456 | return NULL; |
457 | } |
458 | |
459 | if (size < self->string_size) { |
460 | ENSURE_REALIZED(self); |
461 | if (resize_buffer(self, size) < 0) |
462 | return NULL; |
463 | self->string_size = size; |
464 | } |
465 | |
466 | return PyLong_FromSsize_t(size); |
467 | } |
468 | |
469 | /*[clinic input] |
470 | _io.StringIO.seek |
471 | pos: Py_ssize_t |
472 | whence: int = 0 |
473 | / |
474 | |
475 | Change stream position. |
476 | |
477 | Seek to character offset pos relative to position indicated by whence: |
478 | 0 Start of stream (the default). pos should be >= 0; |
479 | 1 Current position - pos must be 0; |
480 | 2 End of stream - pos must be 0. |
481 | Returns the new absolute position. |
482 | [clinic start generated code]*/ |
483 | |
484 | static PyObject * |
485 | _io_StringIO_seek_impl(stringio *self, Py_ssize_t pos, int whence) |
486 | /*[clinic end generated code: output=e9e0ac9a8ae71c25 input=e3855b24e7cae06a]*/ |
487 | { |
488 | CHECK_INITIALIZED(self); |
489 | CHECK_CLOSED(self); |
490 | |
491 | if (whence != 0 && whence != 1 && whence != 2) { |
492 | PyErr_Format(PyExc_ValueError, |
493 | "Invalid whence (%i, should be 0, 1 or 2)" , whence); |
494 | return NULL; |
495 | } |
496 | else if (pos < 0 && whence == 0) { |
497 | PyErr_Format(PyExc_ValueError, |
498 | "Negative seek position %zd" , pos); |
499 | return NULL; |
500 | } |
501 | else if (whence != 0 && pos != 0) { |
502 | PyErr_SetString(PyExc_OSError, |
503 | "Can't do nonzero cur-relative seeks" ); |
504 | return NULL; |
505 | } |
506 | |
507 | /* whence = 0: offset relative to beginning of the string. |
508 | whence = 1: no change to current position. |
509 | whence = 2: change position to end of file. */ |
510 | if (whence == 1) { |
511 | pos = self->pos; |
512 | } |
513 | else if (whence == 2) { |
514 | pos = self->string_size; |
515 | } |
516 | |
517 | self->pos = pos; |
518 | |
519 | return PyLong_FromSsize_t(self->pos); |
520 | } |
521 | |
522 | /*[clinic input] |
523 | _io.StringIO.write |
524 | s as obj: object |
525 | / |
526 | |
527 | Write string to file. |
528 | |
529 | Returns the number of characters written, which is always equal to |
530 | the length of the string. |
531 | [clinic start generated code]*/ |
532 | |
533 | static PyObject * |
534 | _io_StringIO_write(stringio *self, PyObject *obj) |
535 | /*[clinic end generated code: output=0deaba91a15b94da input=cf96f3b16586e669]*/ |
536 | { |
537 | Py_ssize_t size; |
538 | |
539 | CHECK_INITIALIZED(self); |
540 | if (!PyUnicode_Check(obj)) { |
541 | PyErr_Format(PyExc_TypeError, "string argument expected, got '%s'" , |
542 | Py_TYPE(obj)->tp_name); |
543 | return NULL; |
544 | } |
545 | if (PyUnicode_READY(obj)) |
546 | return NULL; |
547 | CHECK_CLOSED(self); |
548 | size = PyUnicode_GET_LENGTH(obj); |
549 | |
550 | if (size > 0 && write_str(self, obj) < 0) |
551 | return NULL; |
552 | |
553 | return PyLong_FromSsize_t(size); |
554 | } |
555 | |
556 | /*[clinic input] |
557 | _io.StringIO.close |
558 | |
559 | Close the IO object. |
560 | |
561 | Attempting any further operation after the object is closed |
562 | will raise a ValueError. |
563 | |
564 | This method has no effect if the file is already closed. |
565 | [clinic start generated code]*/ |
566 | |
567 | static PyObject * |
568 | _io_StringIO_close_impl(stringio *self) |
569 | /*[clinic end generated code: output=04399355cbe518f1 input=cbc10b45f35d6d46]*/ |
570 | { |
571 | self->closed = 1; |
572 | /* Free up some memory */ |
573 | if (resize_buffer(self, 0) < 0) |
574 | return NULL; |
575 | _PyAccu_Destroy(&self->accu); |
576 | Py_CLEAR(self->readnl); |
577 | Py_CLEAR(self->writenl); |
578 | Py_CLEAR(self->decoder); |
579 | Py_RETURN_NONE; |
580 | } |
581 | |
582 | static int |
583 | stringio_traverse(stringio *self, visitproc visit, void *arg) |
584 | { |
585 | Py_VISIT(self->dict); |
586 | return 0; |
587 | } |
588 | |
589 | static int |
590 | stringio_clear(stringio *self) |
591 | { |
592 | Py_CLEAR(self->dict); |
593 | return 0; |
594 | } |
595 | |
596 | static void |
597 | stringio_dealloc(stringio *self) |
598 | { |
599 | _PyObject_GC_UNTRACK(self); |
600 | self->ok = 0; |
601 | if (self->buf) { |
602 | PyMem_Free(self->buf); |
603 | self->buf = NULL; |
604 | } |
605 | _PyAccu_Destroy(&self->accu); |
606 | Py_CLEAR(self->readnl); |
607 | Py_CLEAR(self->writenl); |
608 | Py_CLEAR(self->decoder); |
609 | Py_CLEAR(self->dict); |
610 | if (self->weakreflist != NULL) |
611 | PyObject_ClearWeakRefs((PyObject *) self); |
612 | Py_TYPE(self)->tp_free(self); |
613 | } |
614 | |
615 | static PyObject * |
616 | stringio_new(PyTypeObject *type, PyObject *args, PyObject *kwds) |
617 | { |
618 | stringio *self; |
619 | |
620 | assert(type != NULL && type->tp_alloc != NULL); |
621 | self = (stringio *)type->tp_alloc(type, 0); |
622 | if (self == NULL) |
623 | return NULL; |
624 | |
625 | /* tp_alloc initializes all the fields to zero. So we don't have to |
626 | initialize them here. */ |
627 | |
628 | self->buf = (Py_UCS4 *)PyMem_Malloc(0); |
629 | if (self->buf == NULL) { |
630 | Py_DECREF(self); |
631 | return PyErr_NoMemory(); |
632 | } |
633 | |
634 | return (PyObject *)self; |
635 | } |
636 | |
637 | /*[clinic input] |
638 | _io.StringIO.__init__ |
639 | initial_value as value: object(c_default="NULL") = '' |
640 | newline as newline_obj: object(c_default="NULL") = '\n' |
641 | |
642 | Text I/O implementation using an in-memory buffer. |
643 | |
644 | The initial_value argument sets the value of object. The newline |
645 | argument is like the one of TextIOWrapper's constructor. |
646 | [clinic start generated code]*/ |
647 | |
648 | static int |
649 | _io_StringIO___init___impl(stringio *self, PyObject *value, |
650 | PyObject *newline_obj) |
651 | /*[clinic end generated code: output=a421ea023b22ef4e input=cee2d9181b2577a3]*/ |
652 | { |
653 | const char *newline = "\n" ; |
654 | Py_ssize_t value_len; |
655 | |
656 | /* Parse the newline argument. We only want to allow unicode objects or |
657 | None. */ |
658 | if (newline_obj == Py_None) { |
659 | newline = NULL; |
660 | } |
661 | else if (newline_obj) { |
662 | if (!PyUnicode_Check(newline_obj)) { |
663 | PyErr_Format(PyExc_TypeError, |
664 | "newline must be str or None, not %.200s" , |
665 | Py_TYPE(newline_obj)->tp_name); |
666 | return -1; |
667 | } |
668 | newline = PyUnicode_AsUTF8(newline_obj); |
669 | if (newline == NULL) |
670 | return -1; |
671 | } |
672 | |
673 | if (newline && newline[0] != '\0' |
674 | && !(newline[0] == '\n' && newline[1] == '\0') |
675 | && !(newline[0] == '\r' && newline[1] == '\0') |
676 | && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) { |
677 | PyErr_Format(PyExc_ValueError, |
678 | "illegal newline value: %R" , newline_obj); |
679 | return -1; |
680 | } |
681 | if (value && value != Py_None && !PyUnicode_Check(value)) { |
682 | PyErr_Format(PyExc_TypeError, |
683 | "initial_value must be str or None, not %.200s" , |
684 | Py_TYPE(value)->tp_name); |
685 | return -1; |
686 | } |
687 | |
688 | self->ok = 0; |
689 | |
690 | _PyAccu_Destroy(&self->accu); |
691 | Py_CLEAR(self->readnl); |
692 | Py_CLEAR(self->writenl); |
693 | Py_CLEAR(self->decoder); |
694 | |
695 | assert((newline != NULL && newline_obj != Py_None) || |
696 | (newline == NULL && newline_obj == Py_None)); |
697 | |
698 | if (newline) { |
699 | self->readnl = PyUnicode_FromString(newline); |
700 | if (self->readnl == NULL) |
701 | return -1; |
702 | } |
703 | self->readuniversal = (newline == NULL || newline[0] == '\0'); |
704 | self->readtranslate = (newline == NULL); |
705 | /* If newline == "", we don't translate anything. |
706 | If newline == "\n" or newline == None, we translate to "\n", which is |
707 | a no-op. |
708 | (for newline == None, TextIOWrapper translates to os.linesep, but it |
709 | is pointless for StringIO) |
710 | */ |
711 | if (newline != NULL && newline[0] == '\r') { |
712 | self->writenl = self->readnl; |
713 | Py_INCREF(self->writenl); |
714 | } |
715 | |
716 | if (self->readuniversal) { |
717 | self->decoder = PyObject_CallFunctionObjArgs( |
718 | (PyObject *)&PyIncrementalNewlineDecoder_Type, |
719 | Py_None, self->readtranslate ? Py_True : Py_False, NULL); |
720 | if (self->decoder == NULL) |
721 | return -1; |
722 | } |
723 | |
724 | /* Now everything is set up, resize buffer to size of initial value, |
725 | and copy it */ |
726 | self->string_size = 0; |
727 | if (value && value != Py_None) |
728 | value_len = PyUnicode_GetLength(value); |
729 | else |
730 | value_len = 0; |
731 | if (value_len > 0) { |
732 | /* This is a heuristic, for newline translation might change |
733 | the string length. */ |
734 | if (resize_buffer(self, 0) < 0) |
735 | return -1; |
736 | self->state = STATE_REALIZED; |
737 | self->pos = 0; |
738 | if (write_str(self, value) < 0) |
739 | return -1; |
740 | } |
741 | else { |
742 | /* Empty stringio object, we can start by accumulating */ |
743 | if (resize_buffer(self, 0) < 0) |
744 | return -1; |
745 | if (_PyAccu_Init(&self->accu)) |
746 | return -1; |
747 | self->state = STATE_ACCUMULATING; |
748 | } |
749 | self->pos = 0; |
750 | |
751 | self->closed = 0; |
752 | self->ok = 1; |
753 | return 0; |
754 | } |
755 | |
756 | /* Properties and pseudo-properties */ |
757 | |
758 | /*[clinic input] |
759 | _io.StringIO.readable |
760 | |
761 | Returns True if the IO object can be read. |
762 | [clinic start generated code]*/ |
763 | |
764 | static PyObject * |
765 | _io_StringIO_readable_impl(stringio *self) |
766 | /*[clinic end generated code: output=b19d44dd8b1ceb99 input=39ce068b224c21ad]*/ |
767 | { |
768 | CHECK_INITIALIZED(self); |
769 | CHECK_CLOSED(self); |
770 | Py_RETURN_TRUE; |
771 | } |
772 | |
773 | /*[clinic input] |
774 | _io.StringIO.writable |
775 | |
776 | Returns True if the IO object can be written. |
777 | [clinic start generated code]*/ |
778 | |
779 | static PyObject * |
780 | _io_StringIO_writable_impl(stringio *self) |
781 | /*[clinic end generated code: output=13e4dd77187074ca input=7a691353aac38835]*/ |
782 | { |
783 | CHECK_INITIALIZED(self); |
784 | CHECK_CLOSED(self); |
785 | Py_RETURN_TRUE; |
786 | } |
787 | |
788 | /*[clinic input] |
789 | _io.StringIO.seekable |
790 | |
791 | Returns True if the IO object can be seeked. |
792 | [clinic start generated code]*/ |
793 | |
794 | static PyObject * |
795 | _io_StringIO_seekable_impl(stringio *self) |
796 | /*[clinic end generated code: output=4d20b4641c756879 input=4c606d05b32952e6]*/ |
797 | { |
798 | CHECK_INITIALIZED(self); |
799 | CHECK_CLOSED(self); |
800 | Py_RETURN_TRUE; |
801 | } |
802 | |
803 | /* Pickling support. |
804 | |
805 | The implementation of __getstate__ is similar to the one for BytesIO, |
806 | except that we also save the newline parameter. For __setstate__ and unlike |
807 | BytesIO, we call __init__ to restore the object's state. Doing so allows us |
808 | to avoid decoding the complex newline state while keeping the object |
809 | representation compact. |
810 | |
811 | See comment in bytesio.c regarding why only pickle protocols and onward are |
812 | supported. |
813 | */ |
814 | |
815 | static PyObject * |
816 | stringio_getstate(stringio *self, PyObject *Py_UNUSED(ignored)) |
817 | { |
818 | PyObject *initvalue = _io_StringIO_getvalue_impl(self); |
819 | PyObject *dict; |
820 | PyObject *state; |
821 | |
822 | if (initvalue == NULL) |
823 | return NULL; |
824 | if (self->dict == NULL) { |
825 | Py_INCREF(Py_None); |
826 | dict = Py_None; |
827 | } |
828 | else { |
829 | dict = PyDict_Copy(self->dict); |
830 | if (dict == NULL) { |
831 | Py_DECREF(initvalue); |
832 | return NULL; |
833 | } |
834 | } |
835 | |
836 | state = Py_BuildValue("(OOnN)" , initvalue, |
837 | self->readnl ? self->readnl : Py_None, |
838 | self->pos, dict); |
839 | Py_DECREF(initvalue); |
840 | return state; |
841 | } |
842 | |
843 | static PyObject * |
844 | stringio_setstate(stringio *self, PyObject *state) |
845 | { |
846 | PyObject *initarg; |
847 | PyObject *position_obj; |
848 | PyObject *dict; |
849 | Py_ssize_t pos; |
850 | |
851 | assert(state != NULL); |
852 | CHECK_CLOSED(self); |
853 | |
854 | /* We allow the state tuple to be longer than 4, because we may need |
855 | someday to extend the object's state without breaking |
856 | backward-compatibility. */ |
857 | if (!PyTuple_Check(state) || PyTuple_GET_SIZE(state) < 4) { |
858 | PyErr_Format(PyExc_TypeError, |
859 | "%.200s.__setstate__ argument should be 4-tuple, got %.200s" , |
860 | Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name); |
861 | return NULL; |
862 | } |
863 | |
864 | /* Initialize the object's state. */ |
865 | initarg = PyTuple_GetSlice(state, 0, 2); |
866 | if (initarg == NULL) |
867 | return NULL; |
868 | if (_io_StringIO___init__((PyObject *)self, initarg, NULL) < 0) { |
869 | Py_DECREF(initarg); |
870 | return NULL; |
871 | } |
872 | Py_DECREF(initarg); |
873 | |
874 | /* Restore the buffer state. Even if __init__ did initialize the buffer, |
875 | we have to initialize it again since __init__ may translate the |
876 | newlines in the initial_value string. We clearly do not want that |
877 | because the string value in the state tuple has already been translated |
878 | once by __init__. So we do not take any chance and replace object's |
879 | buffer completely. */ |
880 | { |
881 | PyObject *item; |
882 | Py_UCS4 *buf; |
883 | Py_ssize_t bufsize; |
884 | |
885 | item = PyTuple_GET_ITEM(state, 0); |
886 | buf = PyUnicode_AsUCS4Copy(item); |
887 | if (buf == NULL) |
888 | return NULL; |
889 | bufsize = PyUnicode_GET_LENGTH(item); |
890 | |
891 | if (resize_buffer(self, bufsize) < 0) { |
892 | PyMem_Free(buf); |
893 | return NULL; |
894 | } |
895 | memcpy(self->buf, buf, bufsize * sizeof(Py_UCS4)); |
896 | PyMem_Free(buf); |
897 | self->string_size = bufsize; |
898 | } |
899 | |
900 | /* Set carefully the position value. Alternatively, we could use the seek |
901 | method instead of modifying self->pos directly to better protect the |
902 | object internal state against erroneous (or malicious) inputs. */ |
903 | position_obj = PyTuple_GET_ITEM(state, 2); |
904 | if (!PyLong_Check(position_obj)) { |
905 | PyErr_Format(PyExc_TypeError, |
906 | "third item of state must be an integer, got %.200s" , |
907 | Py_TYPE(position_obj)->tp_name); |
908 | return NULL; |
909 | } |
910 | pos = PyLong_AsSsize_t(position_obj); |
911 | if (pos == -1 && PyErr_Occurred()) |
912 | return NULL; |
913 | if (pos < 0) { |
914 | PyErr_SetString(PyExc_ValueError, |
915 | "position value cannot be negative" ); |
916 | return NULL; |
917 | } |
918 | self->pos = pos; |
919 | |
920 | /* Set the dictionary of the instance variables. */ |
921 | dict = PyTuple_GET_ITEM(state, 3); |
922 | if (dict != Py_None) { |
923 | if (!PyDict_Check(dict)) { |
924 | PyErr_Format(PyExc_TypeError, |
925 | "fourth item of state should be a dict, got a %.200s" , |
926 | Py_TYPE(dict)->tp_name); |
927 | return NULL; |
928 | } |
929 | if (self->dict) { |
930 | /* Alternatively, we could replace the internal dictionary |
931 | completely. However, it seems more practical to just update it. */ |
932 | if (PyDict_Update(self->dict, dict) < 0) |
933 | return NULL; |
934 | } |
935 | else { |
936 | Py_INCREF(dict); |
937 | self->dict = dict; |
938 | } |
939 | } |
940 | |
941 | Py_RETURN_NONE; |
942 | } |
943 | |
944 | |
945 | static PyObject * |
946 | stringio_closed(stringio *self, void *context) |
947 | { |
948 | CHECK_INITIALIZED(self); |
949 | return PyBool_FromLong(self->closed); |
950 | } |
951 | |
952 | static PyObject * |
953 | stringio_line_buffering(stringio *self, void *context) |
954 | { |
955 | CHECK_INITIALIZED(self); |
956 | CHECK_CLOSED(self); |
957 | Py_RETURN_FALSE; |
958 | } |
959 | |
960 | static PyObject * |
961 | stringio_newlines(stringio *self, void *context) |
962 | { |
963 | CHECK_INITIALIZED(self); |
964 | CHECK_CLOSED(self); |
965 | if (self->decoder == NULL) |
966 | Py_RETURN_NONE; |
967 | return PyObject_GetAttr(self->decoder, _PyIO_str_newlines); |
968 | } |
969 | |
970 | #include "clinic/stringio.c.h" |
971 | |
972 | static struct PyMethodDef stringio_methods[] = { |
973 | _IO_STRINGIO_CLOSE_METHODDEF |
974 | _IO_STRINGIO_GETVALUE_METHODDEF |
975 | _IO_STRINGIO_READ_METHODDEF |
976 | _IO_STRINGIO_READLINE_METHODDEF |
977 | _IO_STRINGIO_TELL_METHODDEF |
978 | _IO_STRINGIO_TRUNCATE_METHODDEF |
979 | _IO_STRINGIO_SEEK_METHODDEF |
980 | _IO_STRINGIO_WRITE_METHODDEF |
981 | |
982 | _IO_STRINGIO_SEEKABLE_METHODDEF |
983 | _IO_STRINGIO_READABLE_METHODDEF |
984 | _IO_STRINGIO_WRITABLE_METHODDEF |
985 | |
986 | {"__getstate__" , (PyCFunction)stringio_getstate, METH_NOARGS}, |
987 | {"__setstate__" , (PyCFunction)stringio_setstate, METH_O}, |
988 | {NULL, NULL} /* sentinel */ |
989 | }; |
990 | |
991 | static PyGetSetDef stringio_getset[] = { |
992 | {"closed" , (getter)stringio_closed, NULL, NULL}, |
993 | {"newlines" , (getter)stringio_newlines, NULL, NULL}, |
994 | /* (following comments straight off of the original Python wrapper:) |
995 | XXX Cruft to support the TextIOWrapper API. This would only |
996 | be meaningful if StringIO supported the buffer attribute. |
997 | Hopefully, a better solution, than adding these pseudo-attributes, |
998 | will be found. |
999 | */ |
1000 | {"line_buffering" , (getter)stringio_line_buffering, NULL, NULL}, |
1001 | {NULL} |
1002 | }; |
1003 | |
1004 | PyTypeObject PyStringIO_Type = { |
1005 | PyVarObject_HEAD_INIT(NULL, 0) |
1006 | "_io.StringIO" , /*tp_name*/ |
1007 | sizeof(stringio), /*tp_basicsize*/ |
1008 | 0, /*tp_itemsize*/ |
1009 | (destructor)stringio_dealloc, /*tp_dealloc*/ |
1010 | 0, /*tp_vectorcall_offset*/ |
1011 | 0, /*tp_getattr*/ |
1012 | 0, /*tp_setattr*/ |
1013 | 0, /*tp_as_async*/ |
1014 | 0, /*tp_repr*/ |
1015 | 0, /*tp_as_number*/ |
1016 | 0, /*tp_as_sequence*/ |
1017 | 0, /*tp_as_mapping*/ |
1018 | 0, /*tp_hash*/ |
1019 | 0, /*tp_call*/ |
1020 | 0, /*tp_str*/ |
1021 | 0, /*tp_getattro*/ |
1022 | 0, /*tp_setattro*/ |
1023 | 0, /*tp_as_buffer*/ |
1024 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1025 | | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ |
1026 | _io_StringIO___init____doc__, /*tp_doc*/ |
1027 | (traverseproc)stringio_traverse, /*tp_traverse*/ |
1028 | (inquiry)stringio_clear, /*tp_clear*/ |
1029 | 0, /*tp_richcompare*/ |
1030 | offsetof(stringio, weakreflist), /*tp_weaklistoffset*/ |
1031 | 0, /*tp_iter*/ |
1032 | (iternextfunc)stringio_iternext, /*tp_iternext*/ |
1033 | stringio_methods, /*tp_methods*/ |
1034 | 0, /*tp_members*/ |
1035 | stringio_getset, /*tp_getset*/ |
1036 | 0, /*tp_base*/ |
1037 | 0, /*tp_dict*/ |
1038 | 0, /*tp_descr_get*/ |
1039 | 0, /*tp_descr_set*/ |
1040 | offsetof(stringio, dict), /*tp_dictoffset*/ |
1041 | _io_StringIO___init__, /*tp_init*/ |
1042 | 0, /*tp_alloc*/ |
1043 | stringio_new, /*tp_new*/ |
1044 | }; |
1045 | |