_csv.c source code [python/Modules/_csv.c]

1	/ csv module /
2
3	/*
4
5	This module provides the low-level underpinnings of a CSV reading/writing
6	module. Users should not use this module directly, but import the csv.py
7	module instead.
8
9	*/
10
11	#define MODULE_VERSION "1.0"
12
13	#include "Python.h"
14	#include "structmember.h" // PyMemberDef
15	#include <stdbool.h>
16
17
18	typedef struct {
19	PyObject error_obj; /* CSV exception /
20	PyObject dialects; /* Dialect registry /
21	PyTypeObject *dialect_type;
22	PyTypeObject *reader_type;
23	PyTypeObject *writer_type;
24	long field_limit; / max parsed field size /
25	} _csvstate;
26
27	static struct PyModuleDef _csvmodule;
28
29	static inline _csvstate*
30	get_csv_state(PyObject *module)
31	{
32	void *state = PyModule_GetState(module);
33	assert(state != NULL);
34	return (_csvstate *)state;
35	}
36
37	static int
38	_csv_clear(PyObject *module)
39	{
40	_csvstate *module_state = PyModule_GetState(module);
41	Py_CLEAR(module_state->error_obj);
42	Py_CLEAR(module_state->dialects);
43	Py_CLEAR(module_state->dialect_type);
44	Py_CLEAR(module_state->reader_type);
45	Py_CLEAR(module_state->writer_type);
46	return `0`;
47	}
48
49	static int
50	_csv_traverse(PyObject module, visitproc visit, void* *arg)
51	{
52	_csvstate *module_state = PyModule_GetState(module);
53	Py_VISIT(module_state->error_obj);
54	Py_VISIT(module_state->dialects);
55	Py_VISIT(module_state->dialect_type);
56	Py_VISIT(module_state->reader_type);
57	Py_VISIT(module_state->writer_type);
58	return `0`;
59	}
60
61	static void
62	_csv_free(void *module)
63	{
64	_csv_clear((PyObject *)module);
65	}
66
67	typedef enum {
68	START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
69	IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
70	EAT_CRNL,AFTER_ESCAPED_CRNL
71	} ParserState;
72
73	typedef enum {
74	QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
75	} QuoteStyle;
76
77	typedef struct {
78	QuoteStyle style;
79	const char *name;
80	} StyleDesc;
81
82	static const StyleDesc quote_styles[] = {
83	{ QUOTE_MINIMAL, "QUOTE_MINIMAL" },
84	{ QUOTE_ALL, "QUOTE_ALL" },
85	{ QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
86	{ QUOTE_NONE, "QUOTE_NONE" },
87	{ `0` }
88	};
89
90	typedef struct {
91	PyObject_HEAD
92
93	char doublequote; / is " represented by ""? /
94	char skipinitialspace; / ignore spaces following delimiter? /
95	char strict; / raise exception on bad CSV /
96	int quoting; / style of quoting to write /
97	Py_UCS4 delimiter; / field separator /
98	Py_UCS4 quotechar; / quote character /
99	Py_UCS4 escapechar; / escape character /
100	PyObject lineterminator; /* string to write between records /
101
102	} DialectObj;
103
104	typedef struct {
105	PyObject_HEAD
106
107	PyObject input_iter; /* iterate over this for input lines /
108
109	DialectObj dialect; /* parsing dialect /
110
111	PyObject fields; /* field list for current record /
112	ParserState state; / current CSV parse state /
113	Py_UCS4 field; /* temporary buffer /
114	Py_ssize_t field_size; / size of allocated buffer /
115	Py_ssize_t field_len; / length of current field /
116	int numeric_field; / treat field as numeric /
117	unsigned long line_num; / Source-file line number /
118	} ReaderObj;
119
120	typedef struct {
121	PyObject_HEAD
122
123	PyObject write; /* write output lines to this file /
124
125	DialectObj dialect; /* parsing dialect /
126
127	Py_UCS4 rec; /* buffer for parser.join /
128	Py_ssize_t rec_size; / size of allocated record /
129	Py_ssize_t rec_len; / length of record /
130	int num_fields; / number of fields in record /
131
132	PyObject error_obj; /* cached error object /
133	} WriterObj;
134
135	/*
136	* DIALECT class
137	*/
138
139	static PyObject *
140	get_dialect_from_registry(PyObject name_obj, _csvstate module_state)
141	{
142	PyObject *dialect_obj;
143
144	dialect_obj = PyDict_GetItemWithError(module_state->dialects, name_obj);
145	if (dialect_obj == NULL) {
146	if (!PyErr_Occurred())
147	PyErr_Format(module_state->error_obj, "unknown dialect");
148	}
149	else
150	Py_INCREF(dialect_obj);
151
152	return dialect_obj;
153	}
154
155	static PyObject *
156	get_nullchar_as_None(Py_UCS4 c)
157	{
158	if (c == `'\0'`) {
159	Py_RETURN_NONE;
160	}
161	else
162	return PyUnicode_FromOrdinal(c);
163	}
164
165	static PyObject *
166	Dialect_get_lineterminator(DialectObj self, void* *Py_UNUSED(ignored))
167	{
168	Py_XINCREF(self->lineterminator);
169	return self->lineterminator;
170	}
171
172	static PyObject *
173	Dialect_get_delimiter(DialectObj self, void* *Py_UNUSED(ignored))
174	{
175	return get_nullchar_as_None(self->delimiter);
176	}
177
178	static PyObject *
179	Dialect_get_escapechar(DialectObj self, void* *Py_UNUSED(ignored))
180	{
181	return get_nullchar_as_None(self->escapechar);
182	}
183
184	static PyObject *
185	Dialect_get_quotechar(DialectObj self, void* *Py_UNUSED(ignored))
186	{
187	return get_nullchar_as_None(self->quotechar);
188	}
189
190	static PyObject *
191	Dialect_get_quoting(DialectObj self, void* *Py_UNUSED(ignored))
192	{
193	return PyLong_FromLong(self->quoting);
194	}
195
196	static int
197	_set_bool(const char name, char* target, PyObject src, bool dflt)
198	{
199	if (src == NULL)
200	*target = dflt;
201	else {
202	int b = PyObject_IsTrue(src);
203	if (b < `0`)
204	return -`1`;
205	target = (char*)b;
206	}
207	return `0`;
208	}
209
210	static int
211	_set_int(const char name, int* target, PyObject src, int dflt)
212	{
213	if (src == NULL)
214	*target = dflt;
215	else {
216	int value;
217	if (!PyLong_CheckExact(src)) {
218	PyErr_Format(PyExc_TypeError,
219	"\"%s\" must be an integer", name);
220	return -`1`;
221	}
222	value = _PyLong_AsInt(src);
223	if (value == -`1` && PyErr_Occurred()) {
224	return -`1`;
225	}
226	*target = value;
227	}
228	return `0`;
229	}
230
231	static int
232	_set_char_or_none(const char name, Py_UCS4 target, PyObject *src, Py_UCS4 dflt)
233	{
234	if (src == NULL) {
235	*target = dflt;
236	}
237	else {
238	*target = `'\0'`;
239	if (src != Py_None) {
240	if (!PyUnicode_Check(src)) {
241	PyErr_Format(PyExc_TypeError,
242	"\"%s\" must be string or None, not %.200s", name,
243	Py_TYPE(src)->tp_name);
244	return -`1`;
245	}
246	Py_ssize_t len = PyUnicode_GetLength(src);
247	if (len < `0`) {
248	return -`1`;
249	}
250	if (len > `1`) {
251	PyErr_Format(PyExc_TypeError,
252	"\"%s\" must be a 1-character string",
253	name);
254	return -`1`;
255	}
256	/ PyUnicode_READY() is called in PyUnicode_GetLength() /
257	else {
258	*target = PyUnicode_READ_CHAR(src, `0`);
259	}
260	}
261	}
262	return `0`;
263	}
264
265	static int
266	_set_char(const char name, Py_UCS4 target, PyObject *src, Py_UCS4 dflt)
267	{
268	if (src == NULL) {
269	*target = dflt;
270	}
271	else {
272	*target = `'\0'`;
273	if (!PyUnicode_Check(src)) {
274	PyErr_Format(PyExc_TypeError,
275	"\"%s\" must be string, not %.200s", name,
276	Py_TYPE(src)->tp_name);
277	return -`1`;
278	}
279	Py_ssize_t len = PyUnicode_GetLength(src);
280	if (len < `0`) {
281	return -`1`;
282	}
283	if (len > `1`) {
284	PyErr_Format(PyExc_TypeError,
285	"\"%s\" must be a 1-character string",
286	name);
287	return -`1`;
288	}
289	/ PyUnicode_READY() is called in PyUnicode_GetLength() /
290	else {
291	*target = PyUnicode_READ_CHAR(src, `0`);
292	}
293	}
294	return `0`;
295	}
296
297	static int
298	_set_str(const char name, PyObject target, PyObject src, const char *dflt)
299	{
300	if (src == NULL)
301	*target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
302	else {
303	if (src == Py_None)
304	*target = NULL;
305	else if (!PyUnicode_Check(src)) {
306	PyErr_Format(PyExc_TypeError,
307	"\"%s\" must be a string", name);
308	return -`1`;
309	}
310	else {
311	if (PyUnicode_READY(src) == -`1`)
312	return -`1`;
313	Py_INCREF(src);
314	Py_XSETREF(*target, src);
315	}
316	}
317	return `0`;
318	}
319
320	static int
321	dialect_check_quoting(int quoting)
322	{
323	const StyleDesc *qs;
324
325	for (qs = quote_styles; qs->name; qs++) {
326	if ((int)qs->style == quoting)
327	return `0`;
328	}
329	PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
330	return -`1`;
331	}
332
333	#define D_OFF(x) offsetof(DialectObj, x)
334
335	static struct PyMemberDef Dialect_memberlist[] = {
336	{ "skipinitialspace", T_BOOL, D_OFF(skipinitialspace), READONLY },
337	{ "doublequote", T_BOOL, D_OFF(doublequote), READONLY },
338	{ "strict", T_BOOL, D_OFF(strict), READONLY },
339	{ NULL }
340	};
341
342	static PyGetSetDef Dialect_getsetlist[] = {
343	{ "delimiter", (getter)Dialect_get_delimiter},
344	{ "escapechar", (getter)Dialect_get_escapechar},
345	{ "lineterminator", (getter)Dialect_get_lineterminator},
346	{ "quotechar", (getter)Dialect_get_quotechar},
347	{ "quoting", (getter)Dialect_get_quoting},
348	{NULL},
349	};
350
351	static void
352	Dialect_dealloc(DialectObj *self)
353	{
354	PyTypeObject *tp = Py_TYPE(self);
355	PyObject_GC_UnTrack(self);
356	tp->tp_clear((PyObject *)self);
357	PyObject_GC_Del(self);
358	Py_DECREF(tp);
359	}
360
361	static char *dialect_kws[] = {
362	"dialect",
363	"delimiter",
364	"doublequote",
365	"escapechar",
366	"lineterminator",
367	"quotechar",
368	"quoting",
369	"skipinitialspace",
370	"strict",
371	NULL
372	};
373
374	static _csvstate *
375	_csv_state_from_type(PyTypeObject type, const* char *name)
376	{
377	PyObject *module = _PyType_GetModuleByDef(type, &_csvmodule);
378	if (module == NULL) {
379	return NULL;
380	}
381	_csvstate *module_state = PyModule_GetState(module);
382	if (module_state == NULL) {
383	PyErr_Format(PyExc_SystemError,
384	"%s: No _csv module state found", name);
385	return NULL;
386	}
387	return module_state;
388	}
389
390	static PyObject *
391	dialect_new(PyTypeObject type, PyObject args, PyObject *kwargs)
392	{
393	DialectObj *self;
394	PyObject *ret = NULL;
395	PyObject *dialect = NULL;
396	PyObject *delimiter = NULL;
397	PyObject *doublequote = NULL;
398	PyObject *escapechar = NULL;
399	PyObject *lineterminator = NULL;
400	PyObject *quotechar = NULL;
401	PyObject *quoting = NULL;
402	PyObject *skipinitialspace = NULL;
403	PyObject *strict = NULL;
404
405	if (!PyArg_ParseTupleAndKeywords(args, kwargs,
406	"\|OOOOOOOOO", dialect_kws,
407	&dialect,
408	&delimiter,
409	&doublequote,
410	&escapechar,
411	&lineterminator,
412	&quotechar,
413	&quoting,
414	&skipinitialspace,
415	&strict))
416	return NULL;
417
418	_csvstate *module_state = _csv_state_from_type(type, "dialect_new");
419	if (module_state == NULL) {
420	return NULL;
421	}
422
423	if (dialect != NULL) {
424	if (PyUnicode_Check(dialect)) {
425	dialect = get_dialect_from_registry(dialect, module_state);
426	if (dialect == NULL)
427	return NULL;
428	}
429	else
430	Py_INCREF(dialect);
431	/ Can we reuse this instance? /
432	if (PyObject_TypeCheck(dialect, module_state->dialect_type) &&
433	delimiter == NULL &&
434	doublequote == NULL &&
435	escapechar == NULL &&
436	lineterminator == NULL &&
437	quotechar == NULL &&
438	quoting == NULL &&
439	skipinitialspace == NULL &&
440	strict == NULL)
441	return dialect;
442	}
443
444	self = (DialectObj *)type->tp_alloc(type, `0`);
445	if (self == NULL) {
446	Py_CLEAR(dialect);
447	return NULL;
448	}
449	self->lineterminator = NULL;
450
451	Py_XINCREF(delimiter);
452	Py_XINCREF(doublequote);
453	Py_XINCREF(escapechar);
454	Py_XINCREF(lineterminator);
455	Py_XINCREF(quotechar);
456	Py_XINCREF(quoting);
457	Py_XINCREF(skipinitialspace);
458	Py_XINCREF(strict);
459	if (dialect != NULL) {
460	#define DIALECT_GETATTR(v, n) \
461	do { \
462	if (v == NULL) { \
463	v = PyObject_GetAttrString(dialect, n); \
464	if (v == NULL) \
465	PyErr_Clear(); \
466	} \
467	} while (0)
468	DIALECT_GETATTR(delimiter, "delimiter");
469	DIALECT_GETATTR(doublequote, "doublequote");
470	DIALECT_GETATTR(escapechar, "escapechar");
471	DIALECT_GETATTR(lineterminator, "lineterminator");
472	DIALECT_GETATTR(quotechar, "quotechar");
473	DIALECT_GETATTR(quoting, "quoting");
474	DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
475	DIALECT_GETATTR(strict, "strict");
476	}
477
478	/ check types and convert to C values /
479	#define DIASET(meth, name, target, src, dflt) \
480	if (meth(name, target, src, dflt)) \
481	goto err
482	DIASET(_set_char, "delimiter", &self->delimiter, delimiter, `','`);
483	DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, true);
484	DIASET(_set_char_or_none, "escapechar", &self->escapechar, escapechar, `0`);
485	DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
486	DIASET(_set_char_or_none, "quotechar", &self->quotechar, quotechar, `'"'`);
487	DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
488	DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, false);
489	DIASET(_set_bool, "strict", &self->strict, strict, false);
490
491	/ validate options /
492	if (dialect_check_quoting(self->quoting))
493	goto err;
494	if (self->delimiter == `0`) {
495	PyErr_SetString(PyExc_TypeError,
496	"\"delimiter\" must be a 1-character string");
497	goto err;
498	}
499	if (quotechar == Py_None && quoting == NULL)
500	self->quoting = QUOTE_NONE;
501	if (self->quoting != QUOTE_NONE && self->quotechar == `0`) {
502	PyErr_SetString(PyExc_TypeError,
503	"quotechar must be set if quoting enabled");
504	goto err;
505	}
506	if (self->lineterminator == `0`) {
507	PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
508	goto err;
509	}
510
511	ret = (PyObject *)self;
512	Py_INCREF(self);
513	err:
514	Py_CLEAR(self);
515	Py_CLEAR(dialect);
516	Py_CLEAR(delimiter);
517	Py_CLEAR(doublequote);
518	Py_CLEAR(escapechar);
519	Py_CLEAR(lineterminator);
520	Py_CLEAR(quotechar);
521	Py_CLEAR(quoting);
522	Py_CLEAR(skipinitialspace);
523	Py_CLEAR(strict);
524	return ret;
525	}
526
527	/ Since dialect is now a heap type, it inherits pickling method for*
528	* protocol 0 and 1 from object, therefore it needs to be overridden */
529
530	PyDoc_STRVAR(dialect_reduce_doc, "raises an exception to avoid pickling");
531
532	static PyObject *
533	Dialect_reduce(PyObject self, PyObject args) {
534	PyErr_Format(PyExc_TypeError,
535	"cannot pickle '%.100s' instances", _PyType_Name(Py_TYPE(self)));
536	return NULL;
537	}
538
539	static struct PyMethodDef dialect_methods[] = {
540	{"__reduce__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc},
541	{"__reduce_ex__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc},
542	{NULL, NULL}
543	};
544
545	PyDoc_STRVAR(Dialect_Type_doc,
546	"CSV dialect\n"
547	"\n"
548	"The Dialect type records CSV parsing and generation options.\n");
549
550	static int
551	Dialect_clear(DialectObj *self)
552	{
553	Py_CLEAR(self->lineterminator);
554	return `0`;
555	}
556
557	static int
558	Dialect_traverse(DialectObj self, visitproc visit, void* *arg)
559	{
560	Py_VISIT(self->lineterminator);
561	Py_VISIT(Py_TYPE(self));
562	return `0`;
563	}
564
565	static PyType_Slot Dialect_Type_slots[] = {
566	{Py_tp_doc, (char*)Dialect_Type_doc},
567	{Py_tp_members, Dialect_memberlist},
568	{Py_tp_getset, Dialect_getsetlist},
569	{Py_tp_new, dialect_new},
570	{Py_tp_methods, dialect_methods},
571	{Py_tp_dealloc, Dialect_dealloc},
572	{Py_tp_clear, Dialect_clear},
573	{Py_tp_traverse, Dialect_traverse},
574	{`0`, NULL}
575	};
576
577	PyType_Spec Dialect_Type_spec = {
578	.name = "_csv.Dialect",
579	.basicsize = sizeof(DialectObj),
580	.flags = (Py_TPFLAGS_DEFAULT \| Py_TPFLAGS_BASETYPE \| Py_TPFLAGS_HAVE_GC \|
581	Py_TPFLAGS_IMMUTABLETYPE),
582	.slots = Dialect_Type_slots,
583	};
584
585
586	/*
587	* Return an instance of the dialect type, given a Python instance or kwarg
588	* description of the dialect
589	*/
590	static PyObject *
591	_call_dialect(_csvstate module_state, PyObject dialect_inst, PyObject *kwargs)
592	{
593	PyObject type = (PyObject )module_state->dialect_type;
594	if (dialect_inst) {
595	return PyObject_VectorcallDict(type, &dialect_inst, `1`, kwargs);
596	}
597	else {
598	return PyObject_VectorcallDict(type, NULL, `0`, kwargs);
599	}
600	}
601
602	/*
603	* READER
604	*/
605	static int
606	parse_save_field(ReaderObj *self)
607	{
608	PyObject *field;
609
610	field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
611	(void *) self->field, self->field_len);
612	if (field == NULL)
613	return -`1`;
614	self->field_len = `0`;
615	if (self->numeric_field) {
616	PyObject *tmp;
617
618	self->numeric_field = `0`;
619	tmp = PyNumber_Float(field);
620	Py_DECREF(field);
621	if (tmp == NULL)
622	return -`1`;
623	field = tmp;
624	}
625	if (PyList_Append(self->fields, field) < `0`) {
626	Py_DECREF(field);
627	return -`1`;
628	}
629	Py_DECREF(field);
630	return `0`;
631	}
632
633	static int
634	parse_grow_buff(ReaderObj *self)
635	{
636	assert((size_t)self->field_size <= PY_SSIZE_T_MAX / sizeof(Py_UCS4));
637
638	Py_ssize_t field_size_new = self->field_size ? `2` * self->field_size : `4096`;
639	Py_UCS4 *field_new = self->field;
640	PyMem_Resize(field_new, Py_UCS4, field_size_new);
641	if (field_new == NULL) {
642	PyErr_NoMemory();
643	return `0`;
644	}
645	self->field = field_new;
646	self->field_size = field_size_new;
647	return `1`;
648	}
649
650	static int
651	parse_add_char(ReaderObj self, _csvstate module_state, Py_UCS4 c)
652	{
653	if (self->field_len >= module_state->field_limit) {
654	PyErr_Format(module_state->error_obj,
655	"field larger than field limit (%ld)",
656	module_state->field_limit);
657	return -`1`;
658	}
659	if (self->field_len == self->field_size && !parse_grow_buff(self))
660	return -`1`;
661	self->field[self->field_len++] = c;
662	return `0`;
663	}
664
665	static int
666	parse_process_char(ReaderObj self, _csvstate module_state, Py_UCS4 c)
667	{
668	DialectObj *dialect = self->dialect;
669
670	switch (self->state) {
671	case START_RECORD:
672	/ start of record /
673	if (c == `'\0'`)
674	/ empty line - return [] /
675	break;
676	else if (c == `'\n'` \|\| c == `'\r'`) {
677	self->state = EAT_CRNL;
678	break;
679	}
680	/ normal character - handle as START_FIELD /
681	self->state = START_FIELD;
682	/ fallthru /
683	case START_FIELD:
684	/ expecting field /
685	if (c == `'\n'` \|\| c == `'\r'` \|\| c == `'\0'`) {
686	/ save empty field - return [fields] /
687	if (parse_save_field(self) < `0`)
688	return -`1`;
689	self->state = (c == `'\0'` ? START_RECORD : EAT_CRNL);
690	}
691	else if (c == dialect->quotechar &&
692	dialect->quoting != QUOTE_NONE) {
693	/ start quoted field /
694	self->state = IN_QUOTED_FIELD;
695	}
696	else if (c == dialect->escapechar) {
697	/ possible escaped character /
698	self->state = ESCAPED_CHAR;
699	}
700	else if (c == `' '` && dialect->skipinitialspace)
701	/ ignore space at start of field /
702	;
703	else if (c == dialect->delimiter) {
704	/ save empty field /
705	if (parse_save_field(self) < `0`)
706	return -`1`;
707	}
708	else {
709	/ begin new unquoted field /
710	if (dialect->quoting == QUOTE_NONNUMERIC)
711	self->numeric_field = `1`;
712	if (parse_add_char(self, module_state, c) < `0`)
713	return -`1`;
714	self->state = IN_FIELD;
715	}
716	break;
717
718	case ESCAPED_CHAR:
719	if (c == `'\n'` \|\| c==`'\r'`) {
720	if (parse_add_char(self, module_state, c) < `0`)
721	return -`1`;
722	self->state = AFTER_ESCAPED_CRNL;
723	break;
724	}
725	if (c == `'\0'`)
726	c = `'\n'`;
727	if (parse_add_char(self, module_state, c) < `0`)
728	return -`1`;
729	self->state = IN_FIELD;
730	break;
731
732	case AFTER_ESCAPED_CRNL:
733	if (c == `'\0'`)
734	break;
735	/fallthru/
736
737	case IN_FIELD:
738	/ in unquoted field /
739	if (c == `'\n'` \|\| c == `'\r'` \|\| c == `'\0'`) {
740	/ end of line - return [fields] /
741	if (parse_save_field(self) < `0`)
742	return -`1`;
743	self->state = (c == `'\0'` ? START_RECORD : EAT_CRNL);
744	}
745	else if (c == dialect->escapechar) {
746	/ possible escaped character /
747	self->state = ESCAPED_CHAR;
748	}
749	else if (c == dialect->delimiter) {
750	/ save field - wait for new field /
751	if (parse_save_field(self) < `0`)
752	return -`1`;
753	self->state = START_FIELD;
754	}
755	else {
756	/ normal character - save in field /
757	if (parse_add_char(self, module_state, c) < `0`)
758	return -`1`;
759	}
760	break;
761
762	case IN_QUOTED_FIELD:
763	/ in quoted field /
764	if (c == `'\0'`)
765	;
766	else if (c == dialect->escapechar) {
767	/ Possible escape character /
768	self->state = ESCAPE_IN_QUOTED_FIELD;
769	}
770	else if (c == dialect->quotechar &&
771	dialect->quoting != QUOTE_NONE) {
772	if (dialect->doublequote) {
773	/ doublequote; " represented by "" /
774	self->state = QUOTE_IN_QUOTED_FIELD;
775	}
776	else {
777	/ end of quote part of field /
778	self->state = IN_FIELD;
779	}
780	}
781	else {
782	/ normal character - save in field /
783	if (parse_add_char(self, module_state, c) < `0`)
784	return -`1`;
785	}
786	break;
787
788	case ESCAPE_IN_QUOTED_FIELD:
789	if (c == `'\0'`)
790	c = `'\n'`;
791	if (parse_add_char(self, module_state, c) < `0`)
792	return -`1`;
793	self->state = IN_QUOTED_FIELD;
794	break;
795
796	case QUOTE_IN_QUOTED_FIELD:
797	/ doublequote - seen a quote in a quoted field /
798	if (dialect->quoting != QUOTE_NONE &&
799	c == dialect->quotechar) {
800	/ save "" as " /
801	if (parse_add_char(self, module_state, c) < `0`)
802	return -`1`;
803	self->state = IN_QUOTED_FIELD;
804	}
805	else if (c == dialect->delimiter) {
806	/ save field - wait for new field /
807	if (parse_save_field(self) < `0`)
808	return -`1`;
809	self->state = START_FIELD;
810	}
811	else if (c == `'\n'` \|\| c == `'\r'` \|\| c == `'\0'`) {
812	/ end of line - return [fields] /
813	if (parse_save_field(self) < `0`)
814	return -`1`;
815	self->state = (c == `'\0'` ? START_RECORD : EAT_CRNL);
816	}
817	else if (!dialect->strict) {
818	if (parse_add_char(self, module_state, c) < `0`)
819	return -`1`;
820	self->state = IN_FIELD;
821	}
822	else {
823	/ illegal /
824	PyErr_Format(module_state->error_obj, "'%c' expected after '%c'",
825	dialect->delimiter,
826	dialect->quotechar);
827	return -`1`;
828	}
829	break;
830
831	case EAT_CRNL:
832	if (c == `'\n'` \|\| c == `'\r'`)
833	;
834	else if (c == `'\0'`)
835	self->state = START_RECORD;
836	else {
837	PyErr_Format(module_state->error_obj,
838	"new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
839	return -`1`;
840	}
841	break;
842
843	}
844	return `0`;
845	}
846
847	static int
848	parse_reset(ReaderObj *self)
849	{
850	Py_XSETREF(self->fields, PyList_New(`0`));
851	if (self->fields == NULL)
852	return -`1`;
853	self->field_len = `0`;
854	self->state = START_RECORD;
855	self->numeric_field = `0`;
856	return `0`;
857	}
858
859	static PyObject *
860	Reader_iternext(ReaderObj *self)
861	{
862	PyObject *fields = NULL;
863	Py_UCS4 c;
864	Py_ssize_t pos, linelen;
865	unsigned int kind;
866	const void *data;
867	PyObject *lineobj;
868
869	_csvstate *module_state = _csv_state_from_type(Py_TYPE(self),
870	"Reader.__next__");
871	if (module_state == NULL) {
872	return NULL;
873	}
874
875	if (parse_reset(self) < `0`)
876	return NULL;
877	do {
878	lineobj = PyIter_Next(self->input_iter);
879	if (lineobj == NULL) {
880	/ End of input OR exception /
881	if (!PyErr_Occurred() && (self->field_len != `0` \|\|
882	self->state == IN_QUOTED_FIELD)) {
883	if (self->dialect->strict)
884	PyErr_SetString(module_state->error_obj,
885	"unexpected end of data");
886	else if (parse_save_field(self) >= `0`)
887	break;
888	}
889	return NULL;
890	}
891	if (!PyUnicode_Check(lineobj)) {
892	PyErr_Format(module_state->error_obj,
893	"iterator should return strings, "
894	"not %.200s "
895	"(the file should be opened in text mode)",
896	Py_TYPE(lineobj)->tp_name
897	);
898	Py_DECREF(lineobj);
899	return NULL;
900	}
901	if (PyUnicode_READY(lineobj) == -`1`) {
902	Py_DECREF(lineobj);
903	return NULL;
904	}
905	++self->line_num;
906	kind = PyUnicode_KIND(lineobj);
907	data = PyUnicode_DATA(lineobj);
908	pos = `0`;
909	linelen = PyUnicode_GET_LENGTH(lineobj);
910	while (linelen--) {
911	c = PyUnicode_READ(kind, data, pos);
912	if (c == `'\0'`) {
913	Py_DECREF(lineobj);
914	PyErr_Format(module_state->error_obj,
915	"line contains NUL");
916	goto err;
917	}
918	if (parse_process_char(self, module_state, c) < `0`) {
919	Py_DECREF(lineobj);
920	goto err;
921	}
922	pos++;
923	}
924	Py_DECREF(lineobj);
925	if (parse_process_char(self, module_state, `0`) < `0`)
926	goto err;
927	} while (self->state != START_RECORD);
928
929	fields = self->fields;
930	self->fields = NULL;
931	err:
932	return fields;
933	}
934
935	static void
936	Reader_dealloc(ReaderObj *self)
937	{
938	PyTypeObject *tp = Py_TYPE(self);
939	PyObject_GC_UnTrack(self);
940	tp->tp_clear((PyObject *)self);
941	if (self->field != NULL) {
942	PyMem_Free(self->field);
943	self->field = NULL;
944	}
945	PyObject_GC_Del(self);
946	Py_DECREF(tp);
947	}
948
949	static int
950	Reader_traverse(ReaderObj self, visitproc visit, void* *arg)
951	{
952	Py_VISIT(self->dialect);
953	Py_VISIT(self->input_iter);
954	Py_VISIT(self->fields);
955	Py_VISIT(Py_TYPE(self));
956	return `0`;
957	}
958
959	static int
960	Reader_clear(ReaderObj *self)
961	{
962	Py_CLEAR(self->dialect);
963	Py_CLEAR(self->input_iter);
964	Py_CLEAR(self->fields);
965	return `0`;
966	}
967
968	PyDoc_STRVAR(Reader_Type_doc,
969	"CSV reader\n"
970	"\n"
971	"Reader objects are responsible for reading and parsing tabular data\n"
972	"in CSV format.\n"
973	);
974
975	static struct PyMethodDef Reader_methods[] = {
976	{ NULL, NULL }
977	};
978	#define R_OFF(x) offsetof(ReaderObj, x)
979
980	static struct PyMemberDef Reader_memberlist[] = {
981	{ "dialect", T_OBJECT, R_OFF(dialect), READONLY },
982	{ "line_num", T_ULONG, R_OFF(line_num), READONLY },
983	{ NULL }
984	};
985
986
987	static PyType_Slot Reader_Type_slots[] = {
988	{Py_tp_doc, (char*)Reader_Type_doc},
989	{Py_tp_traverse, Reader_traverse},
990	{Py_tp_iter, PyObject_SelfIter},
991	{Py_tp_iternext, Reader_iternext},
992	{Py_tp_methods, Reader_methods},
993	{Py_tp_members, Reader_memberlist},
994	{Py_tp_clear, Reader_clear},
995	{Py_tp_dealloc, Reader_dealloc},
996	{`0`, NULL}
997	};
998
999	PyType_Spec Reader_Type_spec = {
1000	.name = "_csv.reader",
1001	.basicsize = sizeof(ReaderObj),
1002	.flags = (Py_TPFLAGS_DEFAULT \| Py_TPFLAGS_BASETYPE \| Py_TPFLAGS_HAVE_GC \|
1003	Py_TPFLAGS_IMMUTABLETYPE),
1004	.slots = Reader_Type_slots
1005	};
1006
1007
1008	static PyObject *
1009	csv_reader(PyObject module, PyObject args, PyObject *keyword_args)
1010	{
1011	PyObject * iterator, * dialect = NULL;
1012	_csvstate *module_state = get_csv_state(module);
1013	ReaderObj * self = PyObject_GC_New(
1014	ReaderObj,
1015	module_state->reader_type);
1016
1017	if (!self)
1018	return NULL;
1019
1020	self->dialect = NULL;
1021	self->fields = NULL;
1022	self->input_iter = NULL;
1023	self->field = NULL;
1024	self->field_size = `0`;
1025	self->line_num = `0`;
1026
1027	if (parse_reset(self) < `0`) {
1028	Py_DECREF(self);
1029	return NULL;
1030	}
1031
1032	if (!PyArg_UnpackTuple(args, "", `1`, `2`, &iterator, &dialect)) {
1033	Py_DECREF(self);
1034	return NULL;
1035	}
1036	self->input_iter = PyObject_GetIter(iterator);
1037	if (self->input_iter == NULL) {
1038	Py_DECREF(self);
1039	return NULL;
1040	}
1041	self->dialect = (DialectObj *)_call_dialect(module_state, dialect,
1042	keyword_args);
1043	if (self->dialect == NULL) {
1044	Py_DECREF(self);
1045	return NULL;
1046	}
1047
1048	PyObject_GC_Track(self);
1049	return (PyObject *)self;
1050	}
1051
1052	/*
1053	* WRITER
1054	*/
1055	/ ---------------------------------------------------------------- /
1056	static void
1057	join_reset(WriterObj *self)
1058	{
1059	self->rec_len = `0`;
1060	self->num_fields = `0`;
1061	}
1062
1063	#define MEM_INCR 32768
1064
1065	/ Calculate new record length or append field to record. Return new*
1066	* record length.
1067	*/
1068	static Py_ssize_t
1069	join_append_data(WriterObj self, unsigned* int field_kind, const void *field_data,
1070	Py_ssize_t field_len, int *quoted,
1071	int copy_phase)
1072	{
1073	DialectObj *dialect = self->dialect;
1074	int i;
1075	Py_ssize_t rec_len;
1076
1077	#define INCLEN \
1078	do {\
1079	if (!copy_phase && rec_len == PY_SSIZE_T_MAX) { \
1080	goto overflow; \
1081	} \
1082	rec_len++; \
1083	} while(0)
1084
1085	#define ADDCH(c) \
1086	do {\
1087	if (copy_phase) \
1088	self->rec[rec_len] = c;\
1089	INCLEN;\
1090	} while(0)
1091
1092	rec_len = self->rec_len;
1093
1094	/ If this is not the first field we need a field separator /
1095	if (self->num_fields > `0`)
1096	ADDCH(dialect->delimiter);
1097
1098	/ Handle preceding quote /
1099	if (copy_phase && *quoted)
1100	ADDCH(dialect->quotechar);
1101
1102	/ Copy/count field data /
1103	/ If field is null just pass over /
1104	for (i = `0`; field_data && (i < field_len); i++) {
1105	Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
1106	int want_escape = `0`;
1107
1108	if (c == dialect->delimiter \|\|
1109	c == dialect->escapechar \|\|
1110	c == dialect->quotechar \|\|
1111	PyUnicode_FindChar(
1112	dialect->lineterminator, c, `0`,
1113	PyUnicode_GET_LENGTH(dialect->lineterminator), `1`) >= `0`) {
1114	if (dialect->quoting == QUOTE_NONE)
1115	want_escape = `1`;
1116	else {
1117	if (c == dialect->quotechar) {
1118	if (dialect->doublequote)
1119	ADDCH(dialect->quotechar);
1120	else
1121	want_escape = `1`;
1122	}
1123	else if (c == dialect->escapechar) {
1124	want_escape = `1`;
1125	}
1126	if (!want_escape)
1127	*quoted = `1`;
1128	}
1129	if (want_escape) {
1130	if (!dialect->escapechar) {
1131	PyErr_Format(self->error_obj,
1132	"need to escape, but no escapechar set");
1133	return -`1`;
1134	}
1135	ADDCH(dialect->escapechar);
1136	}
1137	}
1138	/ Copy field character into record buffer.*
1139	*/
1140	ADDCH(c);
1141	}
1142
1143	if (*quoted) {
1144	if (copy_phase)
1145	ADDCH(dialect->quotechar);
1146	else {
1147	INCLEN; / starting quote /
1148	INCLEN; / ending quote /
1149	}
1150	}
1151	return rec_len;
1152
1153	overflow:
1154	PyErr_NoMemory();
1155	return -`1`;
1156	#undef ADDCH
1157	#undef INCLEN
1158	}
1159
1160	static int
1161	join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
1162	{
1163	assert(rec_len >= `0`);
1164
1165	if (rec_len > self->rec_size) {
1166	size_t rec_size_new = (size_t)(rec_len / MEM_INCR + `1`) * MEM_INCR;
1167	Py_UCS4 *rec_new = self->rec;
1168	PyMem_Resize(rec_new, Py_UCS4, rec_size_new);
1169	if (rec_new == NULL) {
1170	PyErr_NoMemory();
1171	return `0`;
1172	}
1173	self->rec = rec_new;
1174	self->rec_size = (Py_ssize_t)rec_size_new;
1175	}
1176	return `1`;
1177	}
1178
1179	static int
1180	join_append(WriterObj self, PyObject field, int quoted)
1181	{
1182	unsigned int field_kind = -`1`;
1183	const void *field_data = NULL;
1184	Py_ssize_t field_len = `0`;
1185	Py_ssize_t rec_len;
1186
1187	if (field != NULL) {
1188	if (PyUnicode_READY(field) == -`1`)
1189	return `0`;
1190	field_kind = PyUnicode_KIND(field);
1191	field_data = PyUnicode_DATA(field);
1192	field_len = PyUnicode_GET_LENGTH(field);
1193	}
1194	rec_len = join_append_data(self, field_kind, field_data, field_len,
1195	&quoted, `0`);
1196	if (rec_len < `0`)
1197	return `0`;
1198
1199	/ grow record buffer if necessary /
1200	if (!join_check_rec_size(self, rec_len))
1201	return `0`;
1202
1203	self->rec_len = join_append_data(self, field_kind, field_data, field_len,
1204	&quoted, `1`);
1205	self->num_fields++;
1206
1207	return `1`;
1208	}
1209
1210	static int
1211	join_append_lineterminator(WriterObj *self)
1212	{
1213	Py_ssize_t terminator_len, i;
1214	unsigned int term_kind;
1215	const void *term_data;
1216
1217	terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
1218	if (terminator_len == -`1`)
1219	return `0`;
1220
1221	/ grow record buffer if necessary /
1222	if (!join_check_rec_size(self, self->rec_len + terminator_len))
1223	return `0`;
1224
1225	term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1226	term_data = PyUnicode_DATA(self->dialect->lineterminator);
1227	for (i = `0`; i < terminator_len; i++)
1228	self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
1229	self->rec_len += terminator_len;
1230
1231	return `1`;
1232	}
1233
1234	PyDoc_STRVAR(csv_writerow_doc,
1235	"writerow(iterable)\n"
1236	"\n"
1237	"Construct and write a CSV record from an iterable of fields. Non-string\n"
1238	"elements will be converted to string.");
1239
1240	static PyObject *
1241	csv_writerow(WriterObj self, PyObject seq)
1242	{
1243	DialectObj *dialect = self->dialect;
1244	PyObject iter, field, line, result;
1245
1246	iter = PyObject_GetIter(seq);
1247	if (iter == NULL) {
1248	if (PyErr_ExceptionMatches(PyExc_TypeError)) {
1249	PyErr_Format(self->error_obj,
1250	"iterable expected, not %.200s",
1251	Py_TYPE(seq)->tp_name);
1252	}
1253	return NULL;
1254	}
1255
1256	/ Join all fields in internal buffer.*
1257	*/
1258	join_reset(self);
1259	while ((field = PyIter_Next(iter))) {
1260	int append_ok;
1261	int quoted;
1262
1263	switch (dialect->quoting) {
1264	case QUOTE_NONNUMERIC:
1265	quoted = !PyNumber_Check(field);
1266	break;
1267	case QUOTE_ALL:
1268	quoted = `1`;
1269	break;
1270	default:
1271	quoted = `0`;
1272	break;
1273	}
1274
1275	if (PyUnicode_Check(field)) {
1276	append_ok = join_append(self, field, quoted);
1277	Py_DECREF(field);
1278	}
1279	else if (field == Py_None) {
1280	append_ok = join_append(self, NULL, quoted);
1281	Py_DECREF(field);
1282	}
1283	else {
1284	PyObject *str;
1285
1286	str = PyObject_Str(field);
1287	Py_DECREF(field);
1288	if (str == NULL) {
1289	Py_DECREF(iter);
1290	return NULL;
1291	}
1292	append_ok = join_append(self, str, quoted);
1293	Py_DECREF(str);
1294	}
1295	if (!append_ok) {
1296	Py_DECREF(iter);
1297	return NULL;
1298	}
1299	}
1300	Py_DECREF(iter);
1301	if (PyErr_Occurred())
1302	return NULL;
1303
1304	if (self->num_fields > `0` && self->rec_len == `0`) {
1305	if (dialect->quoting == QUOTE_NONE) {
1306	PyErr_Format(self->error_obj,
1307	"single empty field record must be quoted");
1308	return NULL;
1309	}
1310	self->num_fields--;
1311	if (!join_append(self, NULL, `1`))
1312	return NULL;
1313	}
1314
1315	/ Add line terminator.*
1316	*/
1317	if (!join_append_lineterminator(self)) {
1318	return NULL;
1319	}
1320
1321	line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1322	(void *) self->rec, self->rec_len);
1323	if (line == NULL) {
1324	return NULL;
1325	}
1326	result = PyObject_CallOneArg(self->write, line);
1327	Py_DECREF(line);
1328	return result;
1329	}
1330
1331	PyDoc_STRVAR(csv_writerows_doc,
1332	"writerows(iterable of iterables)\n"
1333	"\n"
1334	"Construct and write a series of iterables to a csv file. Non-string\n"
1335	"elements will be converted to string.");
1336
1337	static PyObject *
1338	csv_writerows(WriterObj self, PyObject seqseq)
1339	{
1340	PyObject row_iter, row_obj, *result;
1341
1342	row_iter = PyObject_GetIter(seqseq);
1343	if (row_iter == NULL) {
1344	return NULL;
1345	}
1346	while ((row_obj = PyIter_Next(row_iter))) {
1347	result = csv_writerow(self, row_obj);
1348	Py_DECREF(row_obj);
1349	if (!result) {
1350	Py_DECREF(row_iter);
1351	return NULL;
1352	}
1353	else
1354	Py_DECREF(result);
1355	}
1356	Py_DECREF(row_iter);
1357	if (PyErr_Occurred())
1358	return NULL;
1359	Py_RETURN_NONE;
1360	}
1361
1362	static struct PyMethodDef Writer_methods[] = {
1363	{ "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1364	{ "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1365	{ NULL, NULL }
1366	};
1367
1368	#define W_OFF(x) offsetof(WriterObj, x)
1369
1370	static struct PyMemberDef Writer_memberlist[] = {
1371	{ "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1372	{ NULL }
1373	};
1374
1375	static int
1376	Writer_traverse(WriterObj self, visitproc visit, void* *arg)
1377	{
1378	Py_VISIT(self->dialect);
1379	Py_VISIT(self->write);
1380	Py_VISIT(self->error_obj);
1381	Py_VISIT(Py_TYPE(self));
1382	return `0`;
1383	}
1384
1385	static int
1386	Writer_clear(WriterObj *self)
1387	{
1388	Py_CLEAR(self->dialect);
1389	Py_CLEAR(self->write);
1390	Py_CLEAR(self->error_obj);
1391	return `0`;
1392	}
1393
1394	static void
1395	Writer_dealloc(WriterObj *self)
1396	{
1397	PyTypeObject *tp = Py_TYPE(self);
1398	PyObject_GC_UnTrack(self);
1399	tp->tp_clear((PyObject *)self);
1400	if (self->rec != NULL) {
1401	PyMem_Free(self->rec);
1402	}
1403	PyObject_GC_Del(self);
1404	Py_DECREF(tp);
1405	}
1406
1407	PyDoc_STRVAR(Writer_Type_doc,
1408	"CSV writer\n"
1409	"\n"
1410	"Writer objects are responsible for generating tabular data\n"
1411	"in CSV format from sequence input.\n"
1412	);
1413
1414	static PyType_Slot Writer_Type_slots[] = {
1415	{Py_tp_doc, (char*)Writer_Type_doc},
1416	{Py_tp_traverse, Writer_traverse},
1417	{Py_tp_clear, Writer_clear},
1418	{Py_tp_dealloc, Writer_dealloc},
1419	{Py_tp_methods, Writer_methods},
1420	{Py_tp_members, Writer_memberlist},
1421	{`0`, NULL}
1422	};
1423
1424	PyType_Spec Writer_Type_spec = {
1425	.name = "_csv.writer",
1426	.basicsize = sizeof(WriterObj),
1427	.flags = (Py_TPFLAGS_DEFAULT \| Py_TPFLAGS_BASETYPE \| Py_TPFLAGS_HAVE_GC \|
1428	Py_TPFLAGS_IMMUTABLETYPE),
1429	.slots = Writer_Type_slots,
1430	};
1431
1432
1433	static PyObject *
1434	csv_writer(PyObject module, PyObject args, PyObject *keyword_args)
1435	{
1436	PyObject * output_file, * dialect = NULL;
1437	_csvstate *module_state = get_csv_state(module);
1438	WriterObj * self = PyObject_GC_New(WriterObj, module_state->writer_type);
1439	_Py_IDENTIFIER(write);
1440
1441	if (!self)
1442	return NULL;
1443
1444	self->dialect = NULL;
1445	self->write = NULL;
1446
1447	self->rec = NULL;
1448	self->rec_size = `0`;
1449	self->rec_len = `0`;
1450	self->num_fields = `0`;
1451
1452	self->error_obj = Py_NewRef(module_state->error_obj);
1453
1454	if (!PyArg_UnpackTuple(args, "", `1`, `2`, &output_file, &dialect)) {
1455	Py_DECREF(self);
1456	return NULL;
1457	}
1458	if (_PyObject_LookupAttrId(output_file, &PyId_write, &self->write) < `0`) {
1459	Py_DECREF(self);
1460	return NULL;
1461	}
1462	if (self->write == NULL \|\| !PyCallable_Check(self->write)) {
1463	PyErr_SetString(PyExc_TypeError,
1464	"argument 1 must have a \"write\" method");
1465	Py_DECREF(self);
1466	return NULL;
1467	}
1468	self->dialect = (DialectObj *)_call_dialect(module_state, dialect,
1469	keyword_args);
1470	if (self->dialect == NULL) {
1471	Py_DECREF(self);
1472	return NULL;
1473	}
1474	PyObject_GC_Track(self);
1475	return (PyObject *)self;
1476	}
1477
1478	/*
1479	* DIALECT REGISTRY
1480	*/
1481	static PyObject *
1482	csv_list_dialects(PyObject module, PyObject args)
1483	{
1484	return PyDict_Keys(get_csv_state(module)->dialects);
1485	}
1486
1487	static PyObject *
1488	csv_register_dialect(PyObject module, PyObject args, PyObject *kwargs)
1489	{
1490	PyObject name_obj, dialect_obj = NULL;
1491	_csvstate *module_state = get_csv_state(module);
1492	PyObject *dialect;
1493
1494	if (!PyArg_UnpackTuple(args, "", `1`, `2`, &name_obj, &dialect_obj))
1495	return NULL;
1496	if (!PyUnicode_Check(name_obj)) {
1497	PyErr_SetString(PyExc_TypeError,
1498	"dialect name must be a string");
1499	return NULL;
1500	}
1501	if (PyUnicode_READY(name_obj) == -`1`)
1502	return NULL;
1503	dialect = _call_dialect(module_state, dialect_obj, kwargs);
1504	if (dialect == NULL)
1505	return NULL;
1506	if (PyDict_SetItem(module_state->dialects, name_obj, dialect) < `0`) {
1507	Py_DECREF(dialect);
1508	return NULL;
1509	}
1510	Py_DECREF(dialect);
1511	Py_RETURN_NONE;
1512	}
1513
1514	static PyObject *
1515	csv_unregister_dialect(PyObject module, PyObject name_obj)
1516	{
1517	_csvstate *module_state = get_csv_state(module);
1518	if (PyDict_DelItem(module_state->dialects, name_obj) < `0`) {
1519	if (PyErr_ExceptionMatches(PyExc_KeyError)) {
1520	PyErr_Format(module_state->error_obj, "unknown dialect");
1521	}
1522	return NULL;
1523	}
1524	Py_RETURN_NONE;
1525	}
1526
1527	static PyObject *
1528	csv_get_dialect(PyObject module, PyObject name_obj)
1529	{
1530	return get_dialect_from_registry(name_obj, get_csv_state(module));
1531	}
1532
1533	static PyObject *
1534	csv_field_size_limit(PyObject module, PyObject args)
1535	{
1536	PyObject *new_limit = NULL;
1537	_csvstate *module_state = get_csv_state(module);
1538	long old_limit = module_state->field_limit;
1539
1540	if (!PyArg_UnpackTuple(args, "field_size_limit", `0`, `1`, &new_limit))
1541	return NULL;
1542	if (new_limit != NULL) {
1543	if (!PyLong_CheckExact(new_limit)) {
1544	PyErr_Format(PyExc_TypeError,
1545	"limit must be an integer");
1546	return NULL;
1547	}
1548	module_state->field_limit = PyLong_AsLong(new_limit);
1549	if (module_state->field_limit == -`1` && PyErr_Occurred()) {
1550	module_state->field_limit = old_limit;
1551	return NULL;
1552	}
1553	}
1554	return PyLong_FromLong(old_limit);
1555	}
1556
1557	static PyType_Slot error_slots[] = {
1558	{`0`, NULL},
1559	};
1560
1561	PyType_Spec error_spec = {
1562	.name = "_csv.Error",
1563	.flags = Py_TPFLAGS_DEFAULT \| Py_TPFLAGS_BASETYPE,
1564	.slots = error_slots,
1565	};
1566
1567	/*
1568	* MODULE
1569	*/
1570
1571	PyDoc_STRVAR(csv_module_doc,
1572	"CSV parsing and writing.\n"
1573	"\n"
1574	"This module provides classes that assist in the reading and writing\n"
1575	"of Comma Separated Value (CSV) files, and implements the interface\n"
1576	"described by PEP 305. Although many CSV files are simple to parse,\n"
1577	"the format is not formally defined by a stable specification and\n"
1578	"is subtle enough that parsing lines of a CSV file with something\n"
1579	"like line.split(\",\") is bound to fail. The module supports three\n"
1580	"basic APIs: reading, writing, and registration of dialects.\n"
1581	"\n"
1582	"\n"
1583	"DIALECT REGISTRATION:\n"
1584	"\n"
1585	"Readers and writers support a dialect argument, which is a convenient\n"
1586	"handle on a group of settings. When the dialect argument is a string,\n"
1587	"it identifies one of the dialects previously registered with the module.\n"
1588	"If it is a class or instance, the attributes of the argument are used as\n"
1589	"the settings for the reader or writer:\n"
1590	"\n"
1591	" class excel:\n"
1592	" delimiter = ','\n"
1593	" quotechar = '\"'\n"
1594	" escapechar = None\n"
1595	" doublequote = True\n"
1596	" skipinitialspace = False\n"
1597	" lineterminator = '\\r\\n'\n"
1598	" quoting = QUOTE_MINIMAL\n"
1599	"\n"
1600	"SETTINGS:\n"
1601	"\n"
1602	" * quotechar - specifies a one-character string to use as the\n"
1603	" quoting character. It defaults to '\"'.\n"
1604	" * delimiter - specifies a one-character string to use as the\n"
1605	" field separator. It defaults to ','.\n"
1606	" * skipinitialspace - specifies how to interpret whitespace which\n"
1607	" immediately follows a delimiter. It defaults to False, which\n"
1608	" means that whitespace immediately following a delimiter is part\n"
1609	" of the following field.\n"
1610	" * lineterminator - specifies the character sequence which should\n"
1611	" terminate rows.\n"
1612	" * quoting - controls when quotes should be generated by the writer.\n"
1613	" It can take on any of the following module constants:\n"
1614	"\n"
1615	" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1616	" field contains either the quotechar or the delimiter\n"
1617	" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1618	" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
1619	" fields which do not parse as integers or floating point\n"
1620	" numbers.\n"
1621	" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1622	" * escapechar - specifies a one-character string used to escape\n"
1623	" the delimiter when quoting is set to QUOTE_NONE.\n"
1624	" * doublequote - controls the handling of quotes inside fields. When\n"
1625	" True, two consecutive quotes are interpreted as one during read,\n"
1626	" and when writing, each quote character embedded in the data is\n"
1627	" written as two quotes\n");
1628
1629	PyDoc_STRVAR(csv_reader_doc,
1630	" csv_reader = reader(iterable [, dialect='excel']\n"
1631	" [optional keyword args])\n"
1632	" for row in csv_reader:\n"
1633	" process(row)\n"
1634	"\n"
1635	"The \"iterable\" argument can be any object that returns a line\n"
1636	"of input for each iteration, such as a file object or a list. The\n"
1637	"optional \"dialect\" parameter is discussed below. The function\n"
1638	"also accepts optional keyword arguments which override settings\n"
1639	"provided by the dialect.\n"
1640	"\n"
1641	"The returned object is an iterator. Each iteration returns a row\n"
1642	"of the CSV file (which can span multiple input lines).\n");
1643
1644	PyDoc_STRVAR(csv_writer_doc,
1645	" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1646	" [optional keyword args])\n"
1647	" for row in sequence:\n"
1648	" csv_writer.writerow(row)\n"
1649	"\n"
1650	" [or]\n"
1651	"\n"
1652	" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1653	" [optional keyword args])\n"
1654	" csv_writer.writerows(rows)\n"
1655	"\n"
1656	"The \"fileobj\" argument can be any object that supports the file API.\n");
1657
1658	PyDoc_STRVAR(csv_list_dialects_doc,
1659	"Return a list of all know dialect names.\n"
1660	" names = csv.list_dialects()");
1661
1662	PyDoc_STRVAR(csv_get_dialect_doc,
1663	"Return the dialect instance associated with name.\n"
1664	" dialect = csv.get_dialect(name)");
1665
1666	PyDoc_STRVAR(csv_register_dialect_doc,
1667	"Create a mapping from a string name to a dialect class.\n"
1668	" dialect = csv.register_dialect(name[, dialect[, **fmtparams]])");
1669
1670	PyDoc_STRVAR(csv_unregister_dialect_doc,
1671	"Delete the name/dialect mapping associated with a string name.\n"
1672	" csv.unregister_dialect(name)");
1673
1674	PyDoc_STRVAR(csv_field_size_limit_doc,
1675	"Sets an upper limit on parsed fields.\n"
1676	" csv.field_size_limit([limit])\n"
1677	"\n"
1678	"Returns old limit. If limit is not given, no new limit is set and\n"
1679	"the old limit is returned");
1680
1681	static struct PyMethodDef csv_methods[] = {
1682	{ "reader", (PyCFunction)(void()(void*))csv_reader,
1683	METH_VARARGS \| METH_KEYWORDS, csv_reader_doc},
1684	{ "writer", (PyCFunction)(void()(void*))csv_writer,
1685	METH_VARARGS \| METH_KEYWORDS, csv_writer_doc},
1686	{ "list_dialects", (PyCFunction)csv_list_dialects,
1687	METH_NOARGS, csv_list_dialects_doc},
1688	{ "register_dialect", (PyCFunction)(void()(void*))csv_register_dialect,
1689	METH_VARARGS \| METH_KEYWORDS, csv_register_dialect_doc},
1690	{ "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1691	METH_O, csv_unregister_dialect_doc},
1692	{ "get_dialect", (PyCFunction)csv_get_dialect,
1693	METH_O, csv_get_dialect_doc},
1694	{ "field_size_limit", (PyCFunction)csv_field_size_limit,
1695	METH_VARARGS, csv_field_size_limit_doc},
1696	{ NULL, NULL }
1697	};
1698
1699	static int
1700	csv_exec(PyObject *module) {
1701	const StyleDesc *style;
1702	PyObject *temp;
1703	_csvstate *module_state = get_csv_state(module);
1704
1705	temp = PyType_FromModuleAndSpec(module, &Dialect_Type_spec, NULL);
1706	module_state->dialect_type = (PyTypeObject *)temp;
1707	if (PyModule_AddObjectRef(module, "Dialect", temp) < `0`) {
1708	return -`1`;
1709	}
1710
1711	temp = PyType_FromModuleAndSpec(module, &Reader_Type_spec, NULL);
1712	module_state->reader_type = (PyTypeObject *)temp;
1713	if (PyModule_AddObjectRef(module, "Reader", temp) < `0`) {
1714	return -`1`;
1715	}
1716
1717	temp = PyType_FromModuleAndSpec(module, &Writer_Type_spec, NULL);
1718	module_state->writer_type = (PyTypeObject *)temp;
1719	if (PyModule_AddObjectRef(module, "Writer", temp) < `0`) {
1720	return -`1`;
1721	}
1722
1723	/ Add version to the module. /
1724	if (PyModule_AddStringConstant(module, "__version__",
1725	MODULE_VERSION) == -`1`) {
1726	return -`1`;
1727	}
1728
1729	/ Set the field limit /
1730	module_state->field_limit = `128` * `1024`;
1731
1732	/ Add _dialects dictionary /
1733	module_state->dialects = PyDict_New();
1734	if (PyModule_AddObjectRef(module, "_dialects", module_state->dialects) < `0`) {
1735	return -`1`;
1736	}
1737
1738	/ Add quote styles into dictionary /
1739	for (style = quote_styles; style->name; style++) {
1740	if (PyModule_AddIntConstant(module, style->name,
1741	style->style) == -`1`)
1742	return -`1`;
1743	}
1744
1745	/ Add the CSV exception object to the module. /
1746	PyObject *bases = PyTuple_Pack(`1`, PyExc_Exception);
1747	if (bases == NULL) {
1748	return -`1`;
1749	}
1750	module_state->error_obj = PyType_FromModuleAndSpec(module, &error_spec,
1751	bases);
1752	Py_DECREF(bases);
1753	if (module_state->error_obj == NULL) {
1754	return -`1`;
1755	}
1756	if (PyModule_AddType(module, (PyTypeObject *)module_state->error_obj) != `0`) {
1757	return -`1`;
1758	}
1759
1760	return `0`;
1761	}
1762
1763	static PyModuleDef_Slot csv_slots[] = {
1764	{Py_mod_exec, csv_exec},
1765	{`0`, NULL}
1766	};
1767
1768	static struct PyModuleDef _csvmodule = {
1769	PyModuleDef_HEAD_INIT,
1770	"_csv",
1771	csv_module_doc,
1772	sizeof(_csvstate),
1773	csv_methods,
1774	csv_slots,
1775	_csv_traverse,
1776	_csv_clear,
1777	_csv_free
1778	};
1779
1780	PyMODINIT_FUNC
1781	PyInit__csv(void)
1782	{
1783	return PyModuleDef_Init(&_csvmodule);
1784	}
1785

Browse the source code of python/Modules/_csv.c