1/*
2 _BlocksOutputBuffer is used to maintain an output buffer
3 that has unpredictable size. Suitable for compression/decompression
4 API (bz2/lzma/zlib) that has stream->next_out and stream->avail_out:
5
6 stream->next_out: point to the next output position.
7 stream->avail_out: the number of available bytes left in the buffer.
8
9 It maintains a list of bytes object, so there is no overhead of resizing
10 the buffer.
11
12 Usage:
13
14 1, Initialize the struct instance like this:
15 _BlocksOutputBuffer buffer = {.list = NULL};
16 Set .list to NULL for _BlocksOutputBuffer_OnError()
17
18 2, Initialize the buffer use one of these functions:
19 _BlocksOutputBuffer_InitAndGrow()
20 _BlocksOutputBuffer_InitWithSize()
21
22 3, If (avail_out == 0), grow the buffer:
23 _BlocksOutputBuffer_Grow()
24
25 4, Get the current outputted data size:
26 _BlocksOutputBuffer_GetDataSize()
27
28 5, Finish the buffer, and return a bytes object:
29 _BlocksOutputBuffer_Finish()
30
31 6, Clean up the buffer when an error occurred:
32 _BlocksOutputBuffer_OnError()
33*/
34
35#ifndef Py_INTERNAL_BLOCKS_OUTPUT_BUFFER_H
36#define Py_INTERNAL_BLOCKS_OUTPUT_BUFFER_H
37#ifdef __cplusplus
38extern "C" {
39#endif
40
41#include "Python.h"
42
43typedef struct {
44 // List of bytes objects
45 PyObject *list;
46 // Number of whole allocated size
47 Py_ssize_t allocated;
48 // Max length of the buffer, negative number means unlimited length.
49 Py_ssize_t max_length;
50} _BlocksOutputBuffer;
51
52static const char unable_allocate_msg[] = "Unable to allocate output buffer.";
53
54/* In 32-bit build, the max block size should <= INT32_MAX. */
55#define OUTPUT_BUFFER_MAX_BLOCK_SIZE (256*1024*1024)
56
57/* Block size sequence */
58#define KB (1024)
59#define MB (1024*1024)
60static const Py_ssize_t BUFFER_BLOCK_SIZE[] =
61 { 32*KB, 64*KB, 256*KB, 1*MB, 4*MB, 8*MB, 16*MB, 16*MB,
62 32*MB, 32*MB, 32*MB, 32*MB, 64*MB, 64*MB, 128*MB, 128*MB,
63 OUTPUT_BUFFER_MAX_BLOCK_SIZE };
64#undef KB
65#undef MB
66
67/* According to the block sizes defined by BUFFER_BLOCK_SIZE, the whole
68 allocated size growth step is:
69 1 32 KB +32 KB
70 2 96 KB +64 KB
71 3 352 KB +256 KB
72 4 1.34 MB +1 MB
73 5 5.34 MB +4 MB
74 6 13.34 MB +8 MB
75 7 29.34 MB +16 MB
76 8 45.34 MB +16 MB
77 9 77.34 MB +32 MB
78 10 109.34 MB +32 MB
79 11 141.34 MB +32 MB
80 12 173.34 MB +32 MB
81 13 237.34 MB +64 MB
82 14 301.34 MB +64 MB
83 15 429.34 MB +128 MB
84 16 557.34 MB +128 MB
85 17 813.34 MB +256 MB
86 18 1069.34 MB +256 MB
87 19 1325.34 MB +256 MB
88 20 1581.34 MB +256 MB
89 21 1837.34 MB +256 MB
90 22 2093.34 MB +256 MB
91 ...
92*/
93
94/* Initialize the buffer, and grow the buffer.
95
96 max_length: Max length of the buffer, -1 for unlimited length.
97
98 On success, return allocated size (>=0)
99 On failure, return -1
100*/
101static inline Py_ssize_t
102_BlocksOutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer,
103 const Py_ssize_t max_length,
104 void **next_out)
105{
106 PyObject *b;
107 Py_ssize_t block_size;
108
109 // ensure .list was set to NULL
110 assert(buffer->list == NULL);
111
112 // get block size
113 if (0 <= max_length && max_length < BUFFER_BLOCK_SIZE[0]) {
114 block_size = max_length;
115 } else {
116 block_size = BUFFER_BLOCK_SIZE[0];
117 }
118
119 // the first block
120 b = PyBytes_FromStringAndSize(NULL, block_size);
121 if (b == NULL) {
122 return -1;
123 }
124
125 // create the list
126 buffer->list = PyList_New(1);
127 if (buffer->list == NULL) {
128 Py_DECREF(b);
129 return -1;
130 }
131 PyList_SET_ITEM(buffer->list, 0, b);
132
133 // set variables
134 buffer->allocated = block_size;
135 buffer->max_length = max_length;
136
137 *next_out = PyBytes_AS_STRING(b);
138 return block_size;
139}
140
141/* Initialize the buffer, with an initial size.
142
143 Check block size limit in the outer wrapper function. For example, some libs
144 accept UINT32_MAX as the maximum block size, then init_size should <= it.
145
146 On success, return allocated size (>=0)
147 On failure, return -1
148*/
149static inline Py_ssize_t
150_BlocksOutputBuffer_InitWithSize(_BlocksOutputBuffer *buffer,
151 const Py_ssize_t init_size,
152 void **next_out)
153{
154 PyObject *b;
155
156 // ensure .list was set to NULL
157 assert(buffer->list == NULL);
158
159 // the first block
160 b = PyBytes_FromStringAndSize(NULL, init_size);
161 if (b == NULL) {
162 PyErr_SetString(PyExc_MemoryError, unable_allocate_msg);
163 return -1;
164 }
165
166 // create the list
167 buffer->list = PyList_New(1);
168 if (buffer->list == NULL) {
169 Py_DECREF(b);
170 return -1;
171 }
172 PyList_SET_ITEM(buffer->list, 0, b);
173
174 // set variables
175 buffer->allocated = init_size;
176 buffer->max_length = -1;
177
178 *next_out = PyBytes_AS_STRING(b);
179 return init_size;
180}
181
182/* Grow the buffer. The avail_out must be 0, please check it before calling.
183
184 On success, return allocated size (>=0)
185 On failure, return -1
186*/
187static inline Py_ssize_t
188_BlocksOutputBuffer_Grow(_BlocksOutputBuffer *buffer,
189 void **next_out,
190 const Py_ssize_t avail_out)
191{
192 PyObject *b;
193 const Py_ssize_t list_len = Py_SIZE(buffer->list);
194 Py_ssize_t block_size;
195
196 // ensure no gaps in the data
197 if (avail_out != 0) {
198 PyErr_SetString(PyExc_SystemError,
199 "avail_out is non-zero in _BlocksOutputBuffer_Grow().");
200 return -1;
201 }
202
203 // get block size
204 if (list_len < (Py_ssize_t) Py_ARRAY_LENGTH(BUFFER_BLOCK_SIZE)) {
205 block_size = BUFFER_BLOCK_SIZE[list_len];
206 } else {
207 block_size = BUFFER_BLOCK_SIZE[Py_ARRAY_LENGTH(BUFFER_BLOCK_SIZE) - 1];
208 }
209
210 // check max_length
211 if (buffer->max_length >= 0) {
212 // if (rest == 0), should not grow the buffer.
213 Py_ssize_t rest = buffer->max_length - buffer->allocated;
214 assert(rest > 0);
215
216 // block_size of the last block
217 if (block_size > rest) {
218 block_size = rest;
219 }
220 }
221
222 // check buffer->allocated overflow
223 if (block_size > PY_SSIZE_T_MAX - buffer->allocated) {
224 PyErr_SetString(PyExc_MemoryError, unable_allocate_msg);
225 return -1;
226 }
227
228 // create the block
229 b = PyBytes_FromStringAndSize(NULL, block_size);
230 if (b == NULL) {
231 PyErr_SetString(PyExc_MemoryError, unable_allocate_msg);
232 return -1;
233 }
234 if (PyList_Append(buffer->list, b) < 0) {
235 Py_DECREF(b);
236 return -1;
237 }
238 Py_DECREF(b);
239
240 // set variables
241 buffer->allocated += block_size;
242
243 *next_out = PyBytes_AS_STRING(b);
244 return block_size;
245}
246
247/* Return the current outputted data size. */
248static inline Py_ssize_t
249_BlocksOutputBuffer_GetDataSize(_BlocksOutputBuffer *buffer,
250 const Py_ssize_t avail_out)
251{
252 return buffer->allocated - avail_out;
253}
254
255/* Finish the buffer.
256
257 Return a bytes object on success
258 Return NULL on failure
259*/
260static inline PyObject *
261_BlocksOutputBuffer_Finish(_BlocksOutputBuffer *buffer,
262 const Py_ssize_t avail_out)
263{
264 PyObject *result, *block;
265 const Py_ssize_t list_len = Py_SIZE(buffer->list);
266
267 // fast path for single block
268 if ((list_len == 1 && avail_out == 0) ||
269 (list_len == 2 && Py_SIZE(PyList_GET_ITEM(buffer->list, 1)) == avail_out))
270 {
271 block = PyList_GET_ITEM(buffer->list, 0);
272 Py_INCREF(block);
273
274 Py_CLEAR(buffer->list);
275 return block;
276 }
277
278 // final bytes object
279 result = PyBytes_FromStringAndSize(NULL, buffer->allocated - avail_out);
280 if (result == NULL) {
281 PyErr_SetString(PyExc_MemoryError, unable_allocate_msg);
282 return NULL;
283 }
284
285 // memory copy
286 if (list_len > 0) {
287 char *posi = PyBytes_AS_STRING(result);
288
289 // blocks except the last one
290 Py_ssize_t i = 0;
291 for (; i < list_len-1; i++) {
292 block = PyList_GET_ITEM(buffer->list, i);
293 memcpy(posi, PyBytes_AS_STRING(block), Py_SIZE(block));
294 posi += Py_SIZE(block);
295 }
296 // the last block
297 block = PyList_GET_ITEM(buffer->list, i);
298 memcpy(posi, PyBytes_AS_STRING(block), Py_SIZE(block) - avail_out);
299 } else {
300 assert(Py_SIZE(result) == 0);
301 }
302
303 Py_CLEAR(buffer->list);
304 return result;
305}
306
307/* Clean up the buffer when an error occurred. */
308static inline void
309_BlocksOutputBuffer_OnError(_BlocksOutputBuffer *buffer)
310{
311 Py_CLEAR(buffer->list);
312}
313
314#ifdef __cplusplus
315}
316#endif
317#endif /* Py_INTERNAL_BLOCKS_OUTPUT_BUFFER_H */