1 | /* File object implementation (what's left of it -- see io.py) */ |
2 | |
3 | #define PY_SSIZE_T_CLEAN |
4 | #include "Python.h" |
5 | #include "pycore_runtime.h" // _PyRuntime |
6 | |
7 | #if defined(HAVE_GETC_UNLOCKED) && !defined(_Py_MEMORY_SANITIZER) |
8 | /* clang MemorySanitizer doesn't yet understand getc_unlocked. */ |
9 | #define GETC(f) getc_unlocked(f) |
10 | #define FLOCKFILE(f) flockfile(f) |
11 | #define FUNLOCKFILE(f) funlockfile(f) |
12 | #else |
13 | #define GETC(f) getc(f) |
14 | #define FLOCKFILE(f) |
15 | #define FUNLOCKFILE(f) |
16 | #endif |
17 | |
18 | /* Newline flags */ |
19 | #define NEWLINE_UNKNOWN 0 /* No newline seen, yet */ |
20 | #define NEWLINE_CR 1 /* \r newline seen */ |
21 | #define NEWLINE_LF 2 /* \n newline seen */ |
22 | #define NEWLINE_CRLF 4 /* \r\n newline seen */ |
23 | |
24 | #ifdef __cplusplus |
25 | extern "C" { |
26 | #endif |
27 | |
28 | _Py_IDENTIFIER(open); |
29 | |
30 | /* External C interface */ |
31 | |
32 | PyObject * |
33 | PyFile_FromFd(int fd, const char *name, const char *mode, int buffering, const char *encoding, |
34 | const char *errors, const char *newline, int closefd) |
35 | { |
36 | PyObject *io, *stream; |
37 | |
38 | /* import _io in case we are being used to open io.py */ |
39 | io = PyImport_ImportModule("_io" ); |
40 | if (io == NULL) |
41 | return NULL; |
42 | stream = _PyObject_CallMethodId(io, &PyId_open, "isisssO" , fd, mode, |
43 | buffering, encoding, errors, |
44 | newline, closefd ? Py_True : Py_False); |
45 | Py_DECREF(io); |
46 | if (stream == NULL) |
47 | return NULL; |
48 | /* ignore name attribute because the name attribute of _BufferedIOMixin |
49 | and TextIOWrapper is read only */ |
50 | return stream; |
51 | } |
52 | |
53 | PyObject * |
54 | PyFile_GetLine(PyObject *f, int n) |
55 | { |
56 | _Py_IDENTIFIER(readline); |
57 | PyObject *result; |
58 | |
59 | if (f == NULL) { |
60 | PyErr_BadInternalCall(); |
61 | return NULL; |
62 | } |
63 | |
64 | if (n <= 0) { |
65 | result = _PyObject_CallMethodIdNoArgs(f, &PyId_readline); |
66 | } |
67 | else { |
68 | result = _PyObject_CallMethodId(f, &PyId_readline, "i" , n); |
69 | } |
70 | if (result != NULL && !PyBytes_Check(result) && |
71 | !PyUnicode_Check(result)) { |
72 | Py_DECREF(result); |
73 | result = NULL; |
74 | PyErr_SetString(PyExc_TypeError, |
75 | "object.readline() returned non-string" ); |
76 | } |
77 | |
78 | if (n < 0 && result != NULL && PyBytes_Check(result)) { |
79 | const char *s = PyBytes_AS_STRING(result); |
80 | Py_ssize_t len = PyBytes_GET_SIZE(result); |
81 | if (len == 0) { |
82 | Py_DECREF(result); |
83 | result = NULL; |
84 | PyErr_SetString(PyExc_EOFError, |
85 | "EOF when reading a line" ); |
86 | } |
87 | else if (s[len-1] == '\n') { |
88 | if (Py_REFCNT(result) == 1) |
89 | _PyBytes_Resize(&result, len-1); |
90 | else { |
91 | PyObject *v; |
92 | v = PyBytes_FromStringAndSize(s, len-1); |
93 | Py_DECREF(result); |
94 | result = v; |
95 | } |
96 | } |
97 | } |
98 | if (n < 0 && result != NULL && PyUnicode_Check(result)) { |
99 | Py_ssize_t len = PyUnicode_GET_LENGTH(result); |
100 | if (len == 0) { |
101 | Py_DECREF(result); |
102 | result = NULL; |
103 | PyErr_SetString(PyExc_EOFError, |
104 | "EOF when reading a line" ); |
105 | } |
106 | else if (PyUnicode_READ_CHAR(result, len-1) == '\n') { |
107 | PyObject *v; |
108 | v = PyUnicode_Substring(result, 0, len-1); |
109 | Py_DECREF(result); |
110 | result = v; |
111 | } |
112 | } |
113 | return result; |
114 | } |
115 | |
116 | /* Interfaces to write objects/strings to file-like objects */ |
117 | |
118 | int |
119 | PyFile_WriteObject(PyObject *v, PyObject *f, int flags) |
120 | { |
121 | PyObject *writer, *value, *result; |
122 | _Py_IDENTIFIER(write); |
123 | |
124 | if (f == NULL) { |
125 | PyErr_SetString(PyExc_TypeError, "writeobject with NULL file" ); |
126 | return -1; |
127 | } |
128 | writer = _PyObject_GetAttrId(f, &PyId_write); |
129 | if (writer == NULL) |
130 | return -1; |
131 | if (flags & Py_PRINT_RAW) { |
132 | value = PyObject_Str(v); |
133 | } |
134 | else |
135 | value = PyObject_Repr(v); |
136 | if (value == NULL) { |
137 | Py_DECREF(writer); |
138 | return -1; |
139 | } |
140 | result = PyObject_CallOneArg(writer, value); |
141 | Py_DECREF(value); |
142 | Py_DECREF(writer); |
143 | if (result == NULL) |
144 | return -1; |
145 | Py_DECREF(result); |
146 | return 0; |
147 | } |
148 | |
149 | int |
150 | PyFile_WriteString(const char *s, PyObject *f) |
151 | { |
152 | if (f == NULL) { |
153 | /* Should be caused by a pre-existing error */ |
154 | if (!PyErr_Occurred()) |
155 | PyErr_SetString(PyExc_SystemError, |
156 | "null file for PyFile_WriteString" ); |
157 | return -1; |
158 | } |
159 | else if (!PyErr_Occurred()) { |
160 | PyObject *v = PyUnicode_FromString(s); |
161 | int err; |
162 | if (v == NULL) |
163 | return -1; |
164 | err = PyFile_WriteObject(v, f, Py_PRINT_RAW); |
165 | Py_DECREF(v); |
166 | return err; |
167 | } |
168 | else |
169 | return -1; |
170 | } |
171 | |
172 | /* Try to get a file-descriptor from a Python object. If the object |
173 | is an integer, its value is returned. If not, the |
174 | object's fileno() method is called if it exists; the method must return |
175 | an integer, which is returned as the file descriptor value. |
176 | -1 is returned on failure. |
177 | */ |
178 | |
179 | int |
180 | PyObject_AsFileDescriptor(PyObject *o) |
181 | { |
182 | int fd; |
183 | PyObject *meth; |
184 | _Py_IDENTIFIER(fileno); |
185 | |
186 | if (PyLong_Check(o)) { |
187 | fd = _PyLong_AsInt(o); |
188 | } |
189 | else if (_PyObject_LookupAttrId(o, &PyId_fileno, &meth) < 0) { |
190 | return -1; |
191 | } |
192 | else if (meth != NULL) { |
193 | PyObject *fno = _PyObject_CallNoArg(meth); |
194 | Py_DECREF(meth); |
195 | if (fno == NULL) |
196 | return -1; |
197 | |
198 | if (PyLong_Check(fno)) { |
199 | fd = _PyLong_AsInt(fno); |
200 | Py_DECREF(fno); |
201 | } |
202 | else { |
203 | PyErr_SetString(PyExc_TypeError, |
204 | "fileno() returned a non-integer" ); |
205 | Py_DECREF(fno); |
206 | return -1; |
207 | } |
208 | } |
209 | else { |
210 | PyErr_SetString(PyExc_TypeError, |
211 | "argument must be an int, or have a fileno() method." ); |
212 | return -1; |
213 | } |
214 | |
215 | if (fd == -1 && PyErr_Occurred()) |
216 | return -1; |
217 | if (fd < 0) { |
218 | PyErr_Format(PyExc_ValueError, |
219 | "file descriptor cannot be a negative integer (%i)" , |
220 | fd); |
221 | return -1; |
222 | } |
223 | return fd; |
224 | } |
225 | |
226 | int |
227 | _PyLong_FileDescriptor_Converter(PyObject *o, void *ptr) |
228 | { |
229 | int fd = PyObject_AsFileDescriptor(o); |
230 | if (fd == -1) { |
231 | return 0; |
232 | } |
233 | *(int *)ptr = fd; |
234 | return 1; |
235 | } |
236 | |
237 | /* |
238 | ** Py_UniversalNewlineFgets is an fgets variation that understands |
239 | ** all of \r, \n and \r\n conventions. |
240 | ** The stream should be opened in binary mode. |
241 | ** If fobj is NULL the routine always does newline conversion, and |
242 | ** it may peek one char ahead to gobble the second char in \r\n. |
243 | ** If fobj is non-NULL it must be a PyFileObject. In this case there |
244 | ** is no readahead but in stead a flag is used to skip a following |
245 | ** \n on the next read. Also, if the file is open in binary mode |
246 | ** the whole conversion is skipped. Finally, the routine keeps track of |
247 | ** the different types of newlines seen. |
248 | ** Note that we need no error handling: fgets() treats error and eof |
249 | ** identically. |
250 | */ |
251 | char * |
252 | Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj) |
253 | { |
254 | char *p = buf; |
255 | int c; |
256 | int newlinetypes = 0; |
257 | int skipnextlf = 0; |
258 | |
259 | if (fobj) { |
260 | errno = ENXIO; /* What can you do... */ |
261 | return NULL; |
262 | } |
263 | FLOCKFILE(stream); |
264 | c = 'x'; /* Shut up gcc warning */ |
265 | while (--n > 0 && (c = GETC(stream)) != EOF ) { |
266 | if (skipnextlf ) { |
267 | skipnextlf = 0; |
268 | if (c == '\n') { |
269 | /* Seeing a \n here with skipnextlf true |
270 | ** means we saw a \r before. |
271 | */ |
272 | newlinetypes |= NEWLINE_CRLF; |
273 | c = GETC(stream); |
274 | if (c == EOF) break; |
275 | } else { |
276 | /* |
277 | ** Note that c == EOF also brings us here, |
278 | ** so we're okay if the last char in the file |
279 | ** is a CR. |
280 | */ |
281 | newlinetypes |= NEWLINE_CR; |
282 | } |
283 | } |
284 | if (c == '\r') { |
285 | /* A \r is translated into a \n, and we skip |
286 | ** an adjacent \n, if any. We don't set the |
287 | ** newlinetypes flag until we've seen the next char. |
288 | */ |
289 | skipnextlf = 1; |
290 | c = '\n'; |
291 | } else if ( c == '\n') { |
292 | newlinetypes |= NEWLINE_LF; |
293 | } |
294 | *p++ = c; |
295 | if (c == '\n') break; |
296 | } |
297 | /* if ( c == EOF && skipnextlf ) |
298 | newlinetypes |= NEWLINE_CR; */ |
299 | FUNLOCKFILE(stream); |
300 | *p = '\0'; |
301 | if ( skipnextlf ) { |
302 | /* If we have no file object we cannot save the |
303 | ** skipnextlf flag. We have to readahead, which |
304 | ** will cause a pause if we're reading from an |
305 | ** interactive stream, but that is very unlikely |
306 | ** unless we're doing something silly like |
307 | ** exec(open("/dev/tty").read()). |
308 | */ |
309 | c = GETC(stream); |
310 | if ( c != '\n' ) |
311 | ungetc(c, stream); |
312 | } |
313 | if (p == buf) |
314 | return NULL; |
315 | return buf; |
316 | } |
317 | |
318 | /* **************************** std printer **************************** |
319 | * The stdprinter is used during the boot strapping phase as a preliminary |
320 | * file like object for sys.stderr. |
321 | */ |
322 | |
323 | typedef struct { |
324 | PyObject_HEAD |
325 | int fd; |
326 | } PyStdPrinter_Object; |
327 | |
328 | PyObject * |
329 | PyFile_NewStdPrinter(int fd) |
330 | { |
331 | PyStdPrinter_Object *self; |
332 | |
333 | if (fd != fileno(stdout) && fd != fileno(stderr)) { |
334 | /* not enough infrastructure for PyErr_BadInternalCall() */ |
335 | return NULL; |
336 | } |
337 | |
338 | self = PyObject_New(PyStdPrinter_Object, |
339 | &PyStdPrinter_Type); |
340 | if (self != NULL) { |
341 | self->fd = fd; |
342 | } |
343 | return (PyObject*)self; |
344 | } |
345 | |
346 | static PyObject * |
347 | stdprinter_write(PyStdPrinter_Object *self, PyObject *args) |
348 | { |
349 | PyObject *unicode; |
350 | PyObject *bytes = NULL; |
351 | const char *str; |
352 | Py_ssize_t n; |
353 | int err; |
354 | |
355 | /* The function can clear the current exception */ |
356 | assert(!PyErr_Occurred()); |
357 | |
358 | if (self->fd < 0) { |
359 | /* fd might be invalid on Windows |
360 | * I can't raise an exception here. It may lead to an |
361 | * unlimited recursion in the case stderr is invalid. |
362 | */ |
363 | Py_RETURN_NONE; |
364 | } |
365 | |
366 | if (!PyArg_ParseTuple(args, "U" , &unicode)) { |
367 | return NULL; |
368 | } |
369 | |
370 | /* Encode Unicode to UTF-8/backslashreplace */ |
371 | str = PyUnicode_AsUTF8AndSize(unicode, &n); |
372 | if (str == NULL) { |
373 | PyErr_Clear(); |
374 | bytes = _PyUnicode_AsUTF8String(unicode, "backslashreplace" ); |
375 | if (bytes == NULL) |
376 | return NULL; |
377 | str = PyBytes_AS_STRING(bytes); |
378 | n = PyBytes_GET_SIZE(bytes); |
379 | } |
380 | |
381 | n = _Py_write(self->fd, str, n); |
382 | /* save errno, it can be modified indirectly by Py_XDECREF() */ |
383 | err = errno; |
384 | |
385 | Py_XDECREF(bytes); |
386 | |
387 | if (n == -1) { |
388 | if (err == EAGAIN) { |
389 | PyErr_Clear(); |
390 | Py_RETURN_NONE; |
391 | } |
392 | return NULL; |
393 | } |
394 | |
395 | return PyLong_FromSsize_t(n); |
396 | } |
397 | |
398 | static PyObject * |
399 | stdprinter_fileno(PyStdPrinter_Object *self, PyObject *Py_UNUSED(ignored)) |
400 | { |
401 | return PyLong_FromLong((long) self->fd); |
402 | } |
403 | |
404 | static PyObject * |
405 | stdprinter_repr(PyStdPrinter_Object *self) |
406 | { |
407 | return PyUnicode_FromFormat("<stdprinter(fd=%d) object at %p>" , |
408 | self->fd, self); |
409 | } |
410 | |
411 | static PyObject * |
412 | stdprinter_noop(PyStdPrinter_Object *self, PyObject *Py_UNUSED(ignored)) |
413 | { |
414 | Py_RETURN_NONE; |
415 | } |
416 | |
417 | static PyObject * |
418 | stdprinter_isatty(PyStdPrinter_Object *self, PyObject *Py_UNUSED(ignored)) |
419 | { |
420 | long res; |
421 | if (self->fd < 0) { |
422 | Py_RETURN_FALSE; |
423 | } |
424 | |
425 | Py_BEGIN_ALLOW_THREADS |
426 | res = isatty(self->fd); |
427 | Py_END_ALLOW_THREADS |
428 | |
429 | return PyBool_FromLong(res); |
430 | } |
431 | |
432 | static PyMethodDef stdprinter_methods[] = { |
433 | {"close" , (PyCFunction)stdprinter_noop, METH_NOARGS, "" }, |
434 | {"flush" , (PyCFunction)stdprinter_noop, METH_NOARGS, "" }, |
435 | {"fileno" , (PyCFunction)stdprinter_fileno, METH_NOARGS, "" }, |
436 | {"isatty" , (PyCFunction)stdprinter_isatty, METH_NOARGS, "" }, |
437 | {"write" , (PyCFunction)stdprinter_write, METH_VARARGS, "" }, |
438 | {NULL, NULL} /*sentinel */ |
439 | }; |
440 | |
441 | static PyObject * |
442 | get_closed(PyStdPrinter_Object *self, void *closure) |
443 | { |
444 | Py_RETURN_FALSE; |
445 | } |
446 | |
447 | static PyObject * |
448 | get_mode(PyStdPrinter_Object *self, void *closure) |
449 | { |
450 | return PyUnicode_FromString("w" ); |
451 | } |
452 | |
453 | static PyObject * |
454 | get_encoding(PyStdPrinter_Object *self, void *closure) |
455 | { |
456 | Py_RETURN_NONE; |
457 | } |
458 | |
459 | static PyGetSetDef stdprinter_getsetlist[] = { |
460 | {"closed" , (getter)get_closed, NULL, "True if the file is closed" }, |
461 | {"encoding" , (getter)get_encoding, NULL, "Encoding of the file" }, |
462 | {"mode" , (getter)get_mode, NULL, "String giving the file mode" }, |
463 | {0}, |
464 | }; |
465 | |
466 | PyTypeObject PyStdPrinter_Type = { |
467 | PyVarObject_HEAD_INIT(&PyType_Type, 0) |
468 | "stderrprinter" , /* tp_name */ |
469 | sizeof(PyStdPrinter_Object), /* tp_basicsize */ |
470 | 0, /* tp_itemsize */ |
471 | /* methods */ |
472 | 0, /* tp_dealloc */ |
473 | 0, /* tp_vectorcall_offset */ |
474 | 0, /* tp_getattr */ |
475 | 0, /* tp_setattr */ |
476 | 0, /* tp_as_async */ |
477 | (reprfunc)stdprinter_repr, /* tp_repr */ |
478 | 0, /* tp_as_number */ |
479 | 0, /* tp_as_sequence */ |
480 | 0, /* tp_as_mapping */ |
481 | 0, /* tp_hash */ |
482 | 0, /* tp_call */ |
483 | 0, /* tp_str */ |
484 | PyObject_GenericGetAttr, /* tp_getattro */ |
485 | 0, /* tp_setattro */ |
486 | 0, /* tp_as_buffer */ |
487 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION, /* tp_flags */ |
488 | 0, /* tp_doc */ |
489 | 0, /* tp_traverse */ |
490 | 0, /* tp_clear */ |
491 | 0, /* tp_richcompare */ |
492 | 0, /* tp_weaklistoffset */ |
493 | 0, /* tp_iter */ |
494 | 0, /* tp_iternext */ |
495 | stdprinter_methods, /* tp_methods */ |
496 | 0, /* tp_members */ |
497 | stdprinter_getsetlist, /* tp_getset */ |
498 | 0, /* tp_base */ |
499 | 0, /* tp_dict */ |
500 | 0, /* tp_descr_get */ |
501 | 0, /* tp_descr_set */ |
502 | 0, /* tp_dictoffset */ |
503 | 0, /* tp_init */ |
504 | PyType_GenericAlloc, /* tp_alloc */ |
505 | 0, /* tp_new */ |
506 | PyObject_Del, /* tp_free */ |
507 | }; |
508 | |
509 | |
510 | /* ************************** open_code hook *************************** |
511 | * The open_code hook allows embedders to override the method used to |
512 | * open files that are going to be used by the runtime to execute code |
513 | */ |
514 | |
515 | int |
516 | PyFile_SetOpenCodeHook(Py_OpenCodeHookFunction hook, void *userData) { |
517 | if (Py_IsInitialized() && |
518 | PySys_Audit("setopencodehook" , NULL) < 0) { |
519 | return -1; |
520 | } |
521 | |
522 | if (_PyRuntime.open_code_hook) { |
523 | if (Py_IsInitialized()) { |
524 | PyErr_SetString(PyExc_SystemError, |
525 | "failed to change existing open_code hook" ); |
526 | } |
527 | return -1; |
528 | } |
529 | |
530 | _PyRuntime.open_code_hook = hook; |
531 | _PyRuntime.open_code_userdata = userData; |
532 | return 0; |
533 | } |
534 | |
535 | PyObject * |
536 | PyFile_OpenCodeObject(PyObject *path) |
537 | { |
538 | PyObject *iomod, *f = NULL; |
539 | |
540 | if (!PyUnicode_Check(path)) { |
541 | PyErr_Format(PyExc_TypeError, "'path' must be 'str', not '%.200s'" , |
542 | Py_TYPE(path)->tp_name); |
543 | return NULL; |
544 | } |
545 | |
546 | Py_OpenCodeHookFunction hook = _PyRuntime.open_code_hook; |
547 | if (hook) { |
548 | f = hook(path, _PyRuntime.open_code_userdata); |
549 | } else { |
550 | iomod = PyImport_ImportModule("_io" ); |
551 | if (iomod) { |
552 | f = _PyObject_CallMethodId(iomod, &PyId_open, "Os" , |
553 | path, "rb" ); |
554 | Py_DECREF(iomod); |
555 | } |
556 | } |
557 | |
558 | return f; |
559 | } |
560 | |
561 | PyObject * |
562 | PyFile_OpenCode(const char *utf8path) |
563 | { |
564 | PyObject *pathobj = PyUnicode_FromString(utf8path); |
565 | PyObject *f; |
566 | if (!pathobj) { |
567 | return NULL; |
568 | } |
569 | f = PyFile_OpenCodeObject(pathobj); |
570 | Py_DECREF(pathobj); |
571 | return f; |
572 | } |
573 | |
574 | |
575 | #ifdef __cplusplus |
576 | } |
577 | #endif |
578 | |