1 | /* stringlib: bytes joining implementation */ |
2 | |
3 | #if STRINGLIB_IS_UNICODE |
4 | #error join.h only compatible with byte-wise strings |
5 | #endif |
6 | |
7 | Py_LOCAL_INLINE(PyObject *) |
8 | STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable) |
9 | { |
10 | const char *sepstr = STRINGLIB_STR(sep); |
11 | Py_ssize_t seplen = STRINGLIB_LEN(sep); |
12 | PyObject *res = NULL; |
13 | char *p; |
14 | Py_ssize_t seqlen = 0; |
15 | Py_ssize_t sz = 0; |
16 | Py_ssize_t i, nbufs; |
17 | PyObject *seq, *item; |
18 | Py_buffer *buffers = NULL; |
19 | #define NB_STATIC_BUFFERS 10 |
20 | Py_buffer static_buffers[NB_STATIC_BUFFERS]; |
21 | #define GIL_THRESHOLD 1048576 |
22 | int drop_gil = 1; |
23 | PyThreadState *save = NULL; |
24 | |
25 | seq = PySequence_Fast(iterable, "can only join an iterable" ); |
26 | if (seq == NULL) { |
27 | return NULL; |
28 | } |
29 | |
30 | seqlen = PySequence_Fast_GET_SIZE(seq); |
31 | if (seqlen == 0) { |
32 | Py_DECREF(seq); |
33 | return STRINGLIB_NEW(NULL, 0); |
34 | } |
35 | #ifndef STRINGLIB_MUTABLE |
36 | if (seqlen == 1) { |
37 | item = PySequence_Fast_GET_ITEM(seq, 0); |
38 | if (STRINGLIB_CHECK_EXACT(item)) { |
39 | Py_INCREF(item); |
40 | Py_DECREF(seq); |
41 | return item; |
42 | } |
43 | } |
44 | #endif |
45 | if (seqlen > NB_STATIC_BUFFERS) { |
46 | buffers = PyMem_NEW(Py_buffer, seqlen); |
47 | if (buffers == NULL) { |
48 | Py_DECREF(seq); |
49 | PyErr_NoMemory(); |
50 | return NULL; |
51 | } |
52 | } |
53 | else { |
54 | buffers = static_buffers; |
55 | } |
56 | |
57 | /* Here is the general case. Do a pre-pass to figure out the total |
58 | * amount of space we'll need (sz), and see whether all arguments are |
59 | * bytes-like. |
60 | */ |
61 | for (i = 0, nbufs = 0; i < seqlen; i++) { |
62 | Py_ssize_t itemlen; |
63 | item = PySequence_Fast_GET_ITEM(seq, i); |
64 | if (PyBytes_CheckExact(item)) { |
65 | /* Fast path. */ |
66 | Py_INCREF(item); |
67 | buffers[i].obj = item; |
68 | buffers[i].buf = PyBytes_AS_STRING(item); |
69 | buffers[i].len = PyBytes_GET_SIZE(item); |
70 | } |
71 | else { |
72 | if (PyObject_GetBuffer(item, &buffers[i], PyBUF_SIMPLE) != 0) { |
73 | PyErr_Format(PyExc_TypeError, |
74 | "sequence item %zd: expected a bytes-like object, " |
75 | "%.80s found" , |
76 | i, Py_TYPE(item)->tp_name); |
77 | goto error; |
78 | } |
79 | /* If the backing objects are mutable, then dropping the GIL |
80 | * opens up race conditions where another thread tries to modify |
81 | * the object which we hold a buffer on it. Such code has data |
82 | * races anyway, but this is a conservative approach that avoids |
83 | * changing the behaviour of that data race. |
84 | */ |
85 | drop_gil = 0; |
86 | } |
87 | nbufs = i + 1; /* for error cleanup */ |
88 | itemlen = buffers[i].len; |
89 | if (itemlen > PY_SSIZE_T_MAX - sz) { |
90 | PyErr_SetString(PyExc_OverflowError, |
91 | "join() result is too long" ); |
92 | goto error; |
93 | } |
94 | sz += itemlen; |
95 | if (i != 0) { |
96 | if (seplen > PY_SSIZE_T_MAX - sz) { |
97 | PyErr_SetString(PyExc_OverflowError, |
98 | "join() result is too long" ); |
99 | goto error; |
100 | } |
101 | sz += seplen; |
102 | } |
103 | if (seqlen != PySequence_Fast_GET_SIZE(seq)) { |
104 | PyErr_SetString(PyExc_RuntimeError, |
105 | "sequence changed size during iteration" ); |
106 | goto error; |
107 | } |
108 | } |
109 | |
110 | /* Allocate result space. */ |
111 | res = STRINGLIB_NEW(NULL, sz); |
112 | if (res == NULL) |
113 | goto error; |
114 | |
115 | /* Catenate everything. */ |
116 | p = STRINGLIB_STR(res); |
117 | if (sz < GIL_THRESHOLD) { |
118 | drop_gil = 0; /* Benefits are likely outweighed by the overheads */ |
119 | } |
120 | if (drop_gil) { |
121 | save = PyEval_SaveThread(); |
122 | } |
123 | if (!seplen) { |
124 | /* fast path */ |
125 | for (i = 0; i < nbufs; i++) { |
126 | Py_ssize_t n = buffers[i].len; |
127 | char *q = buffers[i].buf; |
128 | memcpy(p, q, n); |
129 | p += n; |
130 | } |
131 | } |
132 | else { |
133 | for (i = 0; i < nbufs; i++) { |
134 | Py_ssize_t n; |
135 | char *q; |
136 | if (i) { |
137 | memcpy(p, sepstr, seplen); |
138 | p += seplen; |
139 | } |
140 | n = buffers[i].len; |
141 | q = buffers[i].buf; |
142 | memcpy(p, q, n); |
143 | p += n; |
144 | } |
145 | } |
146 | if (drop_gil) { |
147 | PyEval_RestoreThread(save); |
148 | } |
149 | goto done; |
150 | |
151 | error: |
152 | res = NULL; |
153 | done: |
154 | Py_DECREF(seq); |
155 | for (i = 0; i < nbufs; i++) |
156 | PyBuffer_Release(&buffers[i]); |
157 | if (buffers != static_buffers) |
158 | PyMem_Free(buffers); |
159 | return res; |
160 | } |
161 | |
162 | #undef NB_STATIC_BUFFERS |
163 | #undef GIL_THRESHOLD |
164 | |