unicode_format.h source code [python/Objects/stringlib/unicode_format.h]

1	/*
2	unicode_format.h -- implementation of str.format().
3	*/
4
5	/**********************************************************************/
6	/******** Global data structures and forward declarations ******/
7	/**********************************************************************/
8
9	/*
10	A SubString consists of the characters between two string or
11	unicode pointers.
12	*/
13	typedef struct {
14	PyObject str; /* borrowed reference /
15	Py_ssize_t start, end;
16	} SubString;
17
18
19	typedef enum {
20	ANS_INIT,
21	ANS_AUTO,
22	ANS_MANUAL
23	} AutoNumberState; / Keep track if we're auto-numbering fields /
24
25	/ Keeps track of our auto-numbering state, and which number field we're on /
26	typedef struct {
27	AutoNumberState an_state;
28	int an_field_number;
29	} AutoNumber;
30
31
32	/ forward declaration for recursion /
33	static PyObject *
34	build_string(SubString input, PyObject args, PyObject *kwargs,
35	int recursion_depth, AutoNumber *auto_number);
36
37
38
39	/**********************************************************************/
40	/*********************** Utility functions *********************/
41	/**********************************************************************/
42
43	static void
44	AutoNumber_Init(AutoNumber *auto_number)
45	{
46	auto_number->an_state = ANS_INIT;
47	auto_number->an_field_number = `0`;
48	}
49
50	/ fill in a SubString from a pointer and length /
51	Py_LOCAL_INLINE(void)
52	SubString_init(SubString str, PyObject s, Py_ssize_t start, Py_ssize_t end)
53	{
54	str->str = s;
55	str->start = start;
56	str->end = end;
57	}
58
59	/ return a new string. if str->str is NULL, return None /
60	Py_LOCAL_INLINE(PyObject *)
61	SubString_new_object(SubString *str)
62	{
63	if (str->str == NULL)
64	Py_RETURN_NONE;
65	return PyUnicode_Substring(str->str, str->start, str->end);
66	}
67
68	/ return a new string. if str->str is NULL, return a new empty string /
69	Py_LOCAL_INLINE(PyObject *)
70	SubString_new_object_or_empty(SubString *str)
71	{
72	if (str->str == NULL) {
73	return PyUnicode_New(`0`, `0`);
74	}
75	return SubString_new_object(str);
76	}
77
78	/ Return 1 if an error has been detected switching between automatic*
79	field numbering and manual field specification, else return 0. Set
80	ValueError on error. /*
81	static int
82	autonumber_state_error(AutoNumberState state, int field_name_is_empty)
83	{
84	if (state == ANS_MANUAL) {
85	if (field_name_is_empty) {
86	PyErr_SetString(PyExc_ValueError, "cannot switch from "
87	"manual field specification to "
88	"automatic field numbering");
89	return `1`;
90	}
91	}
92	else {
93	if (!field_name_is_empty) {
94	PyErr_SetString(PyExc_ValueError, "cannot switch from "
95	"automatic field numbering to "
96	"manual field specification");
97	return `1`;
98	}
99	}
100	return `0`;
101	}
102
103
104	/**********************************************************************/
105	/******** Format string parsing -- integers and identifiers ******/
106	/**********************************************************************/
107
108	static Py_ssize_t
109	get_integer(const SubString *str)
110	{
111	Py_ssize_t accumulator = `0`;
112	Py_ssize_t digitval;
113	Py_ssize_t i;
114
115	/ empty string is an error /
116	if (str->start >= str->end)
117	return -`1`;
118
119	for (i = str->start; i < str->end; i++) {
120	digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str->str, i));
121	if (digitval < `0`)
122	return -`1`;
123	/*
124	Detect possible overflow before it happens:
125
126	accumulator 10 + digitval > PY_SSIZE_T_MAX if and only if*
127	accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
128	*/
129	if (accumulator > (PY_SSIZE_T_MAX - digitval) / `10`) {
130	PyErr_Format(PyExc_ValueError,
131	"Too many decimal digits in format string");
132	return -`1`;
133	}
134	accumulator = accumulator * `10` + digitval;
135	}
136	return accumulator;
137	}
138
139	/**********************************************************************/
140	/***** Functions to get field objects and specification strings ***/
141	/**********************************************************************/
142
143	/ do the equivalent of obj.name /
144	static PyObject *
145	getattr(PyObject obj, SubString name)
146	{
147	PyObject *newobj;
148	PyObject *str = SubString_new_object(name);
149	if (str == NULL)
150	return NULL;
151	newobj = PyObject_GetAttr(obj, str);
152	Py_DECREF(str);
153	return newobj;
154	}
155
156	/ do the equivalent of obj[idx], where obj is a sequence /
157	static PyObject *
158	getitem_sequence(PyObject *obj, Py_ssize_t idx)
159	{
160	return PySequence_GetItem(obj, idx);
161	}
162
163	/ do the equivalent of obj[idx], where obj is not a sequence /
164	static PyObject *
165	getitem_idx(PyObject *obj, Py_ssize_t idx)
166	{
167	PyObject *newobj;
168	PyObject *idx_obj = PyLong_FromSsize_t(idx);
169	if (idx_obj == NULL)
170	return NULL;
171	newobj = PyObject_GetItem(obj, idx_obj);
172	Py_DECREF(idx_obj);
173	return newobj;
174	}
175
176	/ do the equivalent of obj[name] /
177	static PyObject *
178	getitem_str(PyObject obj, SubString name)
179	{
180	PyObject *newobj;
181	PyObject *str = SubString_new_object(name);
182	if (str == NULL)
183	return NULL;
184	newobj = PyObject_GetItem(obj, str);
185	Py_DECREF(str);
186	return newobj;
187	}
188
189	typedef struct {
190	/ the entire string we're parsing. we assume that someone else*
191	is managing its lifetime, and that it will exist for the
192	lifetime of the iterator. can be empty /*
193	SubString str;
194
195	/ index to where we are inside field_name /
196	Py_ssize_t index;
197	} FieldNameIterator;
198
199
200	static int
201	FieldNameIterator_init(FieldNameIterator self, PyObject s,
202	Py_ssize_t start, Py_ssize_t end)
203	{
204	SubString_init(&self->str, s, start, end);
205	self->index = start;
206	return `1`;
207	}
208
209	static int
210	_FieldNameIterator_attr(FieldNameIterator self, SubString name)
211	{
212	Py_UCS4 c;
213
214	name->str = self->str.str;
215	name->start = self->index;
216
217	/ return everything until '.' or '[' /
218	while (self->index < self->str.end) {
219	c = PyUnicode_READ_CHAR(self->str.str, self->index++);
220	switch (c) {
221	case `'['`:
222	case `'.'`:
223	/ backup so that we this character will be seen next time /
224	self->index--;
225	break;
226	default:
227	continue;
228	}
229	break;
230	}
231	/ end of string is okay /
232	name->end = self->index;
233	return `1`;
234	}
235
236	static int
237	_FieldNameIterator_item(FieldNameIterator self, SubString name)
238	{
239	int bracket_seen = `0`;
240	Py_UCS4 c;
241
242	name->str = self->str.str;
243	name->start = self->index;
244
245	/ return everything until ']' /
246	while (self->index < self->str.end) {
247	c = PyUnicode_READ_CHAR(self->str.str, self->index++);
248	switch (c) {
249	case `']'`:
250	bracket_seen = `1`;
251	break;
252	default:
253	continue;
254	}
255	break;
256	}
257	/ make sure we ended with a ']' /
258	if (!bracket_seen) {
259	PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
260	return `0`;
261	}
262
263	/ end of string is okay /
264	/ don't include the ']' /
265	name->end = self->index-`1`;
266	return `1`;
267	}
268
269	/ returns 0 on error, 1 on non-error termination, and 2 if it returns a value /
270	static int
271	FieldNameIterator_next(FieldNameIterator self, int* *is_attribute,
272	Py_ssize_t name_idx, SubString name)
273	{
274	/ check at end of input /
275	if (self->index >= self->str.end)
276	return `1`;
277
278	switch (PyUnicode_READ_CHAR(self->str.str, self->index++)) {
279	case `'.'`:
280	*is_attribute = `1`;
281	if (_FieldNameIterator_attr(self, name) == `0`)
282	return `0`;
283	*name_idx = -`1`;
284	break;
285	case `'['`:
286	*is_attribute = `0`;
287	if (_FieldNameIterator_item(self, name) == `0`)
288	return `0`;
289	*name_idx = get_integer(name);
290	if (*name_idx == -`1` && PyErr_Occurred())
291	return `0`;
292	break;
293	default:
294	/ Invalid character follows ']' /
295	PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may "
296	"follow ']' in format field specifier");
297	return `0`;
298	}
299
300	/ empty string is an error /
301	if (name->start == name->end) {
302	PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
303	return `0`;
304	}
305
306	return `2`;
307	}
308
309
310	/ input: field_name*
311	output: 'first' points to the part before the first '[' or '.'
312	'first_idx' is -1 if 'first' is not an integer, otherwise
313	it's the value of first converted to an integer
314	'rest' is an iterator to return the rest
315	*/
316	static int
317	field_name_split(PyObject str, Py_ssize_t start, Py_ssize_t end, SubString first,
318	Py_ssize_t first_idx, FieldNameIterator rest,
319	AutoNumber *auto_number)
320	{
321	Py_UCS4 c;
322	Py_ssize_t i = start;
323	int field_name_is_empty;
324	int using_numeric_index;
325
326	/ find the part up until the first '.' or '[' /
327	while (i < end) {
328	switch (c = PyUnicode_READ_CHAR(str, i++)) {
329	case `'['`:
330	case `'.'`:
331	/ backup so that we this character is available to the*
332	"rest" iterator /*
333	i--;
334	break;
335	default:
336	continue;
337	}
338	break;
339	}
340
341	/ set up the return values /
342	SubString_init(first, str, start, i);
343	FieldNameIterator_init(rest, str, i, end);
344
345	/ see if "first" is an integer, in which case it's used as an index /
346	*first_idx = get_integer(first);
347	if (*first_idx == -`1` && PyErr_Occurred())
348	return `0`;
349
350	field_name_is_empty = first->start >= first->end;
351
352	/ If the field name is omitted or if we have a numeric index*
353	specified, then we're doing numeric indexing into args. /*
354	using_numeric_index = field_name_is_empty \|\| *first_idx != -`1`;
355
356	/ We always get here exactly one time for each field we're*
357	processing. And we get here in field order (counting by left
358	braces). So this is the perfect place to handle automatic field
359	numbering if the field name is omitted. /*
360
361	/ Check if we need to do the auto-numbering. It's not needed if*
362	we're called from string.Format routines, because it's handled
363	in that class by itself. /*
364	if (auto_number) {
365	/ Initialize our auto numbering state if this is the first*
366	time we're either auto-numbering or manually numbering. /*
367	if (auto_number->an_state == ANS_INIT && using_numeric_index)
368	auto_number->an_state = field_name_is_empty ?
369	ANS_AUTO : ANS_MANUAL;
370
371	/ Make sure our state is consistent with what we're doing*
372	this time through. Only check if we're using a numeric
373	index. /*
374	if (using_numeric_index)
375	if (autonumber_state_error(auto_number->an_state,
376	field_name_is_empty))
377	return `0`;
378	/ Zero length field means we want to do auto-numbering of the*
379	fields. /*
380	if (field_name_is_empty)
381	*first_idx = (auto_number->an_field_number)++;
382	}
383
384	return `1`;
385	}
386
387
388	/*
389	get_field_object returns the object inside {}, before the
390	format_spec. It handles getindex and getattr lookups and consumes
391	the entire input string.
392	*/
393	static PyObject *
394	get_field_object(SubString input, PyObject args, PyObject *kwargs,
395	AutoNumber *auto_number)
396	{
397	PyObject *obj = NULL;
398	int ok;
399	int is_attribute;
400	SubString name;
401	SubString first;
402	Py_ssize_t index;
403	FieldNameIterator rest;
404
405	if (!field_name_split(input->str, input->start, input->end, &first,
406	&index, &rest, auto_number)) {
407	goto error;
408	}
409
410	if (index == -`1`) {
411	/ look up in kwargs /
412	PyObject *key = SubString_new_object(&first);
413	if (key == NULL) {
414	goto error;
415	}
416	if (kwargs == NULL) {
417	PyErr_SetObject(PyExc_KeyError, key);
418	Py_DECREF(key);
419	goto error;
420	}
421	/ Use PyObject_GetItem instead of PyDict_GetItem because this*
422	code is no longer just used with kwargs. It might be passed
423	a non-dict when called through format_map. /*
424	obj = PyObject_GetItem(kwargs, key);
425	Py_DECREF(key);
426	if (obj == NULL) {
427	goto error;
428	}
429	}
430	else {
431	/ If args is NULL, we have a format string with a positional field*
432	with only kwargs to retrieve it from. This can only happen when
433	used with format_map(), where positional arguments are not
434	allowed. /*
435	if (args == NULL) {
436	PyErr_SetString(PyExc_ValueError, "Format string contains "
437	"positional fields");
438	goto error;
439	}
440
441	/ look up in args /
442	obj = PySequence_GetItem(args, index);
443	if (obj == NULL) {
444	PyErr_Format(PyExc_IndexError,
445	"Replacement index %zd out of range for positional "
446	"args tuple",
447	index);
448	goto error;
449	}
450	}
451
452	/ iterate over the rest of the field_name /
453	while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
454	&name)) == `2`) {
455	PyObject *tmp;
456
457	if (is_attribute)
458	/ getattr lookup "." /
459	tmp = getattr(obj, &name);
460	else
461	/ getitem lookup "[]" /
462	if (index == -`1`)
463	tmp = getitem_str(obj, &name);
464	else
465	if (PySequence_Check(obj))
466	tmp = getitem_sequence(obj, index);
467	else
468	/ not a sequence /
469	tmp = getitem_idx(obj, index);
470	if (tmp == NULL)
471	goto error;
472
473	/ assign to obj /
474	Py_DECREF(obj);
475	obj = tmp;
476	}
477	/ end of iterator, this is the non-error case /
478	if (ok == `1`)
479	return obj;
480	error:
481	Py_XDECREF(obj);
482	return NULL;
483	}
484
485	/**********************************************************************/
486	/************** Field rendering functions ***********************/
487	/**********************************************************************/
488
489	/*
490	render_field() is the main function in this section. It takes the
491	field object and field specification string generated by
492	get_field_and_spec, and renders the field into the output string.
493
494	render_field calls fieldobj.__format__(format_spec) method, and
495	appends to the output.
496	*/
497	static int
498	render_field(PyObject fieldobj, SubString format_spec, _PyUnicodeWriter *writer)
499	{
500	int ok = `0`;
501	PyObject *result = NULL;
502	PyObject *format_spec_object = NULL;
503	int (formatter) (_PyUnicodeWriter, PyObject , PyObject , Py_ssize_t, Py_ssize_t) = NULL;
504	int err;
505
506	/ If we know the type exactly, skip the lookup of __format__ and just*
507	call the formatter directly. /*
508	if (PyUnicode_CheckExact(fieldobj))
509	formatter = _PyUnicode_FormatAdvancedWriter;
510	else if (PyLong_CheckExact(fieldobj))
511	formatter = _PyLong_FormatAdvancedWriter;
512	else if (PyFloat_CheckExact(fieldobj))
513	formatter = _PyFloat_FormatAdvancedWriter;
514	else if (PyComplex_CheckExact(fieldobj))
515	formatter = _PyComplex_FormatAdvancedWriter;
516
517	if (formatter) {
518	/ we know exactly which formatter will be called when __format__ is*
519	looked up, so call it directly, instead. /*
520	err = formatter(writer, fieldobj, format_spec->str,
521	format_spec->start, format_spec->end);
522	return (err == `0`);
523	}
524	else {
525	/ We need to create an object out of the pointers we have, because*
526	__format__ takes a string/unicode object for format_spec. /*
527	if (format_spec->str)
528	format_spec_object = PyUnicode_Substring(format_spec->str,
529	format_spec->start,
530	format_spec->end);
531	else
532	format_spec_object = PyUnicode_New(`0`, `0`);
533	if (format_spec_object == NULL)
534	goto done;
535
536	result = PyObject_Format(fieldobj, format_spec_object);
537	}
538	if (result == NULL)
539	goto done;
540
541	if (_PyUnicodeWriter_WriteStr(writer, result) == -`1`)
542	goto done;
543	ok = `1`;
544
545	done:
546	Py_XDECREF(format_spec_object);
547	Py_XDECREF(result);
548	return ok;
549	}
550
551	static int
552	parse_field(SubString str, SubString field_name, SubString *format_spec,
553	int format_spec_needs_expanding, Py_UCS4 conversion)
554	{
555	/ Note this function works if the field name is zero length,*
556	which is good. Zero length field names are handled later, in
557	field_name_split. /*
558
559	Py_UCS4 c = `0`;
560
561	/ initialize these, as they may be empty /
562	*conversion = `'\0'`;
563	SubString_init(format_spec, NULL, `0`, `0`);
564
565	/ Search for the field name. it's terminated by the end of*
566	the string, or a ':' or '!' /*
567	field_name->str = str->str;
568	field_name->start = str->start;
569	while (str->start < str->end) {
570	switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
571	case `'{'`:
572	PyErr_SetString(PyExc_ValueError, "unexpected '{' in field name");
573	return `0`;
574	case `'['`:
575	for (; str->start < str->end; str->start++)
576	if (PyUnicode_READ_CHAR(str->str, str->start) == `']'`)
577	break;
578	continue;
579	case `'}'`:
580	case `':'`:
581	case `'!'`:
582	break;
583	default:
584	continue;
585	}
586	break;
587	}
588
589	field_name->end = str->start - `1`;
590	if (c == `'!'` \|\| c == `':'`) {
591	Py_ssize_t count;
592	/ we have a format specifier and/or a conversion /
593	/ don't include the last character /
594
595	/ see if there's a conversion specifier /
596	if (c == `'!'`) {
597	/ there must be another character present /
598	if (str->start >= str->end) {
599	PyErr_SetString(PyExc_ValueError,
600	"end of string while looking for conversion "
601	"specifier");
602	return `0`;
603	}
604	*conversion = PyUnicode_READ_CHAR(str->str, str->start++);
605
606	if (str->start < str->end) {
607	c = PyUnicode_READ_CHAR(str->str, str->start++);
608	if (c == `'}'`)
609	return `1`;
610	if (c != `':'`) {
611	PyErr_SetString(PyExc_ValueError,
612	"expected ':' after conversion specifier");
613	return `0`;
614	}
615	}
616	}
617	format_spec->str = str->str;
618	format_spec->start = str->start;
619	count = `1`;
620	while (str->start < str->end) {
621	switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
622	case `'{'`:
623	*format_spec_needs_expanding = `1`;
624	count++;
625	break;
626	case `'}'`:
627	count--;
628	if (count == `0`) {
629	format_spec->end = str->start - `1`;
630	return `1`;
631	}
632	break;
633	default:
634	break;
635	}
636	}
637
638	PyErr_SetString(PyExc_ValueError, "unmatched '{' in format spec");
639	return `0`;
640	}
641	else if (c != `'}'`) {
642	PyErr_SetString(PyExc_ValueError, "expected '}' before end of string");
643	return `0`;
644	}
645
646	return `1`;
647	}
648
649	/**********************************************************************/
650	/**** Output string allocation and escape-to-markup processing ***/
651	/**********************************************************************/
652
653	/ MarkupIterator breaks the string into pieces of either literal*
654	text, or things inside {} that need to be marked up. it is
655	designed to make it easy to wrap a Python iterator around it, for
656	use with the Formatter class /*
657
658	typedef struct {
659	SubString str;
660	} MarkupIterator;
661
662	static int
663	MarkupIterator_init(MarkupIterator self, PyObject str,
664	Py_ssize_t start, Py_ssize_t end)
665	{
666	SubString_init(&self->str, str, start, end);
667	return `1`;
668	}
669
670	/ returns 0 on error, 1 on non-error termination, and 2 if it got a*
671	string (or something to be expanded) /*
672	static int
673	MarkupIterator_next(MarkupIterator self, SubString literal,
674	int field_present, SubString field_name,
675	SubString format_spec, Py_UCS4 conversion,
676	int *format_spec_needs_expanding)
677	{
678	int at_end;
679	Py_UCS4 c = `0`;
680	Py_ssize_t start;
681	Py_ssize_t len;
682	int markup_follows = `0`;
683
684	/ initialize all of the output variables /
685	SubString_init(literal, NULL, `0`, `0`);
686	SubString_init(field_name, NULL, `0`, `0`);
687	SubString_init(format_spec, NULL, `0`, `0`);
688	*conversion = `'\0'`;
689	*format_spec_needs_expanding = `0`;
690	*field_present = `0`;
691
692	/ No more input, end of iterator. This is the normal exit*
693	path. /*
694	if (self->str.start >= self->str.end)
695	return `1`;
696
697	start = self->str.start;
698
699	/ First read any literal text. Read until the end of string, an*
700	escaped '{' or '}', or an unescaped '{'. In order to never
701	allocate memory and so I can just pass pointers around, if
702	there's an escaped '{' or '}' then we'll return the literal
703	including the brace, but no format object. The next time
704	through, we'll return the rest of the literal, skipping past
705	the second consecutive brace. /*
706	while (self->str.start < self->str.end) {
707	switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) {
708	case `'{'`:
709	case `'}'`:
710	markup_follows = `1`;
711	break;
712	default:
713	continue;
714	}
715	break;
716	}
717
718	at_end = self->str.start >= self->str.end;
719	len = self->str.start - start;
720
721	if ((c == `'}'`) && (at_end \|\|
722	(c != PyUnicode_READ_CHAR(self->str.str,
723	self->str.start)))) {
724	PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
725	"in format string");
726	return `0`;
727	}
728	if (at_end && c == `'{'`) {
729	PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
730	"in format string");
731	return `0`;
732	}
733	if (!at_end) {
734	if (c == PyUnicode_READ_CHAR(self->str.str, self->str.start)) {
735	/ escaped } or {, skip it in the input. there is no*
736	markup object following us, just this literal text /*
737	self->str.start++;
738	markup_follows = `0`;
739	}
740	else
741	len--;
742	}
743
744	/ record the literal text /
745	literal->str = self->str.str;
746	literal->start = start;
747	literal->end = start + len;
748
749	if (!markup_follows)
750	return `2`;
751
752	/ this is markup; parse the field /
753	*field_present = `1`;
754	if (!parse_field(&self->str, field_name, format_spec,
755	format_spec_needs_expanding, conversion))
756	return `0`;
757	return `2`;
758	}
759
760
761	/ do the !r or !s conversion on obj /
762	static PyObject *
763	do_conversion(PyObject *obj, Py_UCS4 conversion)
764	{
765	/ XXX in pre-3.0, do we need to convert this to unicode, since it*
766	might have returned a string? /*
767	switch (conversion) {
768	case `'r'`:
769	return PyObject_Repr(obj);
770	case `'s'`:
771	return PyObject_Str(obj);
772	case `'a'`:
773	return PyObject_ASCII(obj);
774	default:
775	if (conversion > `32` && conversion < `127`) {
776	/ It's the ASCII subrange; casting to char is safe*
777	(assuming the execution character set is an ASCII
778	superset). /*
779	PyErr_Format(PyExc_ValueError,
780	"Unknown conversion specifier %c",
781	(char)conversion);
782	} else
783	PyErr_Format(PyExc_ValueError,
784	"Unknown conversion specifier \\x%x",
785	(unsigned int)conversion);
786	return NULL;
787	}
788	}
789
790	/ given:*
791
792	{field_name!conversion:format_spec}
793
794	compute the result and write it to output.
795	format_spec_needs_expanding is an optimization. if it's false,
796	just output the string directly, otherwise recursively expand the
797	format_spec string.
798
799	field_name is allowed to be zero length, in which case we
800	are doing auto field numbering.
801	*/
802
803	static int
804	output_markup(SubString field_name, SubString format_spec,
805	int format_spec_needs_expanding, Py_UCS4 conversion,
806	_PyUnicodeWriter writer, PyObject args, PyObject *kwargs,
807	int recursion_depth, AutoNumber *auto_number)
808	{
809	PyObject *tmp = NULL;
810	PyObject *fieldobj = NULL;
811	SubString expanded_format_spec;
812	SubString *actual_format_spec;
813	int result = `0`;
814
815	/ convert field_name to an object /
816	fieldobj = get_field_object(field_name, args, kwargs, auto_number);
817	if (fieldobj == NULL)
818	goto done;
819
820	if (conversion != `'\0'`) {
821	tmp = do_conversion(fieldobj, conversion);
822	if (tmp == NULL \|\| PyUnicode_READY(tmp) == -`1`)
823	goto done;
824
825	/ do the assignment, transferring ownership: fieldobj = tmp /
826	Py_DECREF(fieldobj);
827	fieldobj = tmp;
828	tmp = NULL;
829	}
830
831	/ if needed, recursively compute the format_spec /
832	if (format_spec_needs_expanding) {
833	tmp = build_string(format_spec, args, kwargs, recursion_depth-`1`,
834	auto_number);
835	if (tmp == NULL \|\| PyUnicode_READY(tmp) == -`1`)
836	goto done;
837
838	/ note that in the case we're expanding the format string,*
839	tmp must be kept around until after the call to
840	render_field. /*
841	SubString_init(&expanded_format_spec, tmp, `0`, PyUnicode_GET_LENGTH(tmp));
842	actual_format_spec = &expanded_format_spec;
843	}
844	else
845	actual_format_spec = format_spec;
846
847	if (render_field(fieldobj, actual_format_spec, writer) == `0`)
848	goto done;
849
850	result = `1`;
851
852	done:
853	Py_XDECREF(fieldobj);
854	Py_XDECREF(tmp);
855
856	return result;
857	}
858
859	/*
860	do_markup is the top-level loop for the format() method. It
861	searches through the format string for escapes to markup codes, and
862	calls other functions to move non-markup text to the output,
863	and to perform the markup to the output.
864	*/
865	static int
866	do_markup(SubString input, PyObject args, PyObject *kwargs,
867	_PyUnicodeWriter writer, int* recursion_depth, AutoNumber *auto_number)
868	{
869	MarkupIterator iter;
870	int format_spec_needs_expanding;
871	int result;
872	int field_present;
873	SubString literal;
874	SubString field_name;
875	SubString format_spec;
876	Py_UCS4 conversion;
877
878	MarkupIterator_init(&iter, input->str, input->start, input->end);
879	while ((result = MarkupIterator_next(&iter, &literal, &field_present,
880	&field_name, &format_spec,
881	&conversion,
882	&format_spec_needs_expanding)) == `2`) {
883	if (literal.end != literal.start) {
884	if (!field_present && iter.str.start == iter.str.end)
885	writer->overallocate = `0`;
886	if (_PyUnicodeWriter_WriteSubstring(writer, literal.str,
887	literal.start, literal.end) < `0`)
888	return `0`;
889	}
890
891	if (field_present) {
892	if (iter.str.start == iter.str.end)
893	writer->overallocate = `0`;
894	if (!output_markup(&field_name, &format_spec,
895	format_spec_needs_expanding, conversion, writer,
896	args, kwargs, recursion_depth, auto_number))
897	return `0`;
898	}
899	}
900	return result;
901	}
902
903
904	/*
905	build_string allocates the output string and then
906	calls do_markup to do the heavy lifting.
907	*/
908	static PyObject *
909	build_string(SubString input, PyObject args, PyObject *kwargs,
910	int recursion_depth, AutoNumber *auto_number)
911	{
912	_PyUnicodeWriter writer;
913
914	/ check the recursion level /
915	if (recursion_depth <= `0`) {
916	PyErr_SetString(PyExc_ValueError,
917	"Max string recursion exceeded");
918	return NULL;
919	}
920
921	_PyUnicodeWriter_Init(&writer);
922	writer.overallocate = `1`;
923	writer.min_length = PyUnicode_GET_LENGTH(input->str) + `100`;
924
925	if (!do_markup(input, args, kwargs, &writer, recursion_depth,
926	auto_number)) {
927	_PyUnicodeWriter_Dealloc(&writer);
928	return NULL;
929	}
930
931	return _PyUnicodeWriter_Finish(&writer);
932	}
933
934	/**********************************************************************/
935	/******** main routine ********************************************/
936	/**********************************************************************/
937
938	/ this is the main entry point /
939	static PyObject *
940	do_string_format(PyObject self, PyObject args, PyObject *kwargs)
941	{
942	SubString input;
943
944	/ PEP 3101 says only 2 levels, so that*
945	"{0:{1}}".format('abc', 's') # works
946	"{0:{1:{2}}}".format('abc', 's', '') # fails
947	*/
948	int recursion_depth = `2`;
949
950	AutoNumber auto_number;
951
952	if (PyUnicode_READY(self) == -`1`)
953	return NULL;
954
955	AutoNumber_Init(&auto_number);
956	SubString_init(&input, self, `0`, PyUnicode_GET_LENGTH(self));
957	return build_string(&input, args, kwargs, recursion_depth, &auto_number);
958	}
959
960	static PyObject *
961	do_string_format_map(PyObject self, PyObject obj)
962	{
963	return do_string_format(self, NULL, obj);
964	}
965
966
967	/**********************************************************************/
968	/******** formatteriterator ***************************************/
969	/**********************************************************************/
970
971	/ This is used to implement string.Formatter.vparse(). It exists so*
972	Formatter can share code with the built in unicode.format() method.
973	It's really just a wrapper around MarkupIterator that is callable
974	from Python. /*
975
976	typedef struct {
977	PyObject_HEAD
978	PyObject *str;
979	MarkupIterator it_markup;
980	} formatteriterobject;
981
982	static void
983	formatteriter_dealloc(formatteriterobject *it)
984	{
985	Py_XDECREF(it->str);
986	PyObject_Free(it);
987	}
988
989	/ returns a tuple:*
990	(literal, field_name, format_spec, conversion)
991
992	literal is any literal text to output. might be zero length
993	field_name is the string before the ':'. might be None
994	format_spec is the string after the ':'. mibht be None
995	conversion is either None, or the string after the '!'
996	*/
997	static PyObject *
998	formatteriter_next(formatteriterobject *it)
999	{
1000	SubString literal;
1001	SubString field_name;
1002	SubString format_spec;
1003	Py_UCS4 conversion;
1004	int format_spec_needs_expanding;
1005	int field_present;
1006	int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,
1007	&field_name, &format_spec, &conversion,
1008	&format_spec_needs_expanding);
1009
1010	/ all of the SubString objects point into it->str, so no*
1011	memory management needs to be done on them /*
1012	assert(`0` <= result && result <= `2`);
1013	if (result == `0` \|\| result == `1`)
1014	/ if 0, error has already been set, if 1, iterator is empty /
1015	return NULL;
1016	else {
1017	PyObject *literal_str = NULL;
1018	PyObject *field_name_str = NULL;
1019	PyObject *format_spec_str = NULL;
1020	PyObject *conversion_str = NULL;
1021	PyObject *tuple = NULL;
1022
1023	literal_str = SubString_new_object(&literal);
1024	if (literal_str == NULL)
1025	goto done;
1026
1027	field_name_str = SubString_new_object(&field_name);
1028	if (field_name_str == NULL)
1029	goto done;
1030
1031	/ if field_name is non-zero length, return a string for*
1032	format_spec (even if zero length), else return None /*
1033	format_spec_str = (field_present ?
1034	SubString_new_object_or_empty :
1035	SubString_new_object)(&format_spec);
1036	if (format_spec_str == NULL)
1037	goto done;
1038
1039	/ if the conversion is not specified, return a None,*
1040	otherwise create a one length string with the conversion
1041	character /*
1042	if (conversion == `'\0'`) {
1043	conversion_str = Py_None;
1044	Py_INCREF(conversion_str);
1045	}
1046	else
1047	conversion_str = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1048	&conversion, `1`);
1049	if (conversion_str == NULL)
1050	goto done;
1051
1052	tuple = PyTuple_Pack(`4`, literal_str, field_name_str, format_spec_str,
1053	conversion_str);
1054	done:
1055	Py_XDECREF(literal_str);
1056	Py_XDECREF(field_name_str);
1057	Py_XDECREF(format_spec_str);
1058	Py_XDECREF(conversion_str);
1059	return tuple;
1060	}
1061	}
1062
1063	static PyMethodDef formatteriter_methods[] = {
1064	{NULL, NULL} / sentinel /
1065	};
1066
1067	static PyTypeObject PyFormatterIter_Type = {
1068	PyVarObject_HEAD_INIT(&PyType_Type, `0`)
1069	"formatteriterator", / tp_name /
1070	sizeof(formatteriterobject), / tp_basicsize /
1071	`0`, / tp_itemsize /
1072	/ methods /
1073	(destructor)formatteriter_dealloc, / tp_dealloc /
1074	`0`, / tp_vectorcall_offset /
1075	`0`, / tp_getattr /
1076	`0`, / tp_setattr /
1077	`0`, / tp_as_async /
1078	`0`, / tp_repr /
1079	`0`, / tp_as_number /
1080	`0`, / tp_as_sequence /
1081	`0`, / tp_as_mapping /
1082	`0`, / tp_hash /
1083	`0`, / tp_call /
1084	`0`, / tp_str /
1085	PyObject_GenericGetAttr, / tp_getattro /
1086	`0`, / tp_setattro /
1087	`0`, / tp_as_buffer /
1088	Py_TPFLAGS_DEFAULT, / tp_flags /
1089	`0`, / tp_doc /
1090	`0`, / tp_traverse /
1091	`0`, / tp_clear /
1092	`0`, / tp_richcompare /
1093	`0`, / tp_weaklistoffset /
1094	PyObject_SelfIter, / tp_iter /
1095	(iternextfunc)formatteriter_next, / tp_iternext /
1096	formatteriter_methods, / tp_methods /
1097	`0`,
1098	};
1099
1100	/ unicode_formatter_parser is used to implement*
1101	string.Formatter.vformat. it parses a string and returns tuples
1102	describing the parsed elements. It's a wrapper around
1103	stringlib/string_format.h's MarkupIterator /*
1104	static PyObject *
1105	formatter_parser(PyObject ignored, PyObject self)
1106	{
1107	formatteriterobject *it;
1108
1109	if (!PyUnicode_Check(self)) {
1110	PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
1111	return NULL;
1112	}
1113
1114	if (PyUnicode_READY(self) == -`1`)
1115	return NULL;
1116
1117	it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
1118	if (it == NULL)
1119	return NULL;
1120
1121	/ take ownership, give the object to the iterator /
1122	Py_INCREF(self);
1123	it->str = self;
1124
1125	/ initialize the contained MarkupIterator /
1126	MarkupIterator_init(&it->it_markup, (PyObject*)self, `0`, PyUnicode_GET_LENGTH(self));
1127	return (PyObject *)it;
1128	}
1129
1130
1131	/**********************************************************************/
1132	/******** fieldnameiterator ***************************************/
1133	/**********************************************************************/
1134
1135
1136	/ This is used to implement string.Formatter.vparse(). It parses the*
1137	field name into attribute and item values. It's a Python-callable
1138	wrapper around FieldNameIterator /*
1139
1140	typedef struct {
1141	PyObject_HEAD
1142	PyObject *str;
1143	FieldNameIterator it_field;
1144	} fieldnameiterobject;
1145
1146	static void
1147	fieldnameiter_dealloc(fieldnameiterobject *it)
1148	{
1149	Py_XDECREF(it->str);
1150	PyObject_Free(it);
1151	}
1152
1153	/ returns a tuple:*
1154	(is_attr, value)
1155	is_attr is true if we used attribute syntax (e.g., '.foo')
1156	false if we used index syntax (e.g., '[foo]')
1157	value is an integer or string
1158	*/
1159	static PyObject *
1160	fieldnameiter_next(fieldnameiterobject *it)
1161	{
1162	int result;
1163	int is_attr;
1164	Py_ssize_t idx;
1165	SubString name;
1166
1167	result = FieldNameIterator_next(&it->it_field, &is_attr,
1168	&idx, &name);
1169	if (result == `0` \|\| result == `1`)
1170	/ if 0, error has already been set, if 1, iterator is empty /
1171	return NULL;
1172	else {
1173	PyObject* result = NULL;
1174	PyObject* is_attr_obj = NULL;
1175	PyObject* obj = NULL;
1176
1177	is_attr_obj = PyBool_FromLong(is_attr);
1178	if (is_attr_obj == NULL)
1179	goto done;
1180
1181	/ either an integer or a string /
1182	if (idx != -`1`)
1183	obj = PyLong_FromSsize_t(idx);
1184	else
1185	obj = SubString_new_object(&name);
1186	if (obj == NULL)
1187	goto done;
1188
1189	/ return a tuple of values /
1190	result = PyTuple_Pack(`2`, is_attr_obj, obj);
1191
1192	done:
1193	Py_XDECREF(is_attr_obj);
1194	Py_XDECREF(obj);
1195	return result;
1196	}
1197	}
1198
1199	static PyMethodDef fieldnameiter_methods[] = {
1200	{NULL, NULL} / sentinel /
1201	};
1202
1203	static PyTypeObject PyFieldNameIter_Type = {
1204	PyVarObject_HEAD_INIT(&PyType_Type, `0`)
1205	"fieldnameiterator", / tp_name /
1206	sizeof(fieldnameiterobject), / tp_basicsize /
1207	`0`, / tp_itemsize /
1208	/ methods /
1209	(destructor)fieldnameiter_dealloc, / tp_dealloc /
1210	`0`, / tp_vectorcall_offset /
1211	`0`, / tp_getattr /
1212	`0`, / tp_setattr /
1213	`0`, / tp_as_async /
1214	`0`, / tp_repr /
1215	`0`, / tp_as_number /
1216	`0`, / tp_as_sequence /
1217	`0`, / tp_as_mapping /
1218	`0`, / tp_hash /
1219	`0`, / tp_call /
1220	`0`, / tp_str /
1221	PyObject_GenericGetAttr, / tp_getattro /
1222	`0`, / tp_setattro /
1223	`0`, / tp_as_buffer /
1224	Py_TPFLAGS_DEFAULT, / tp_flags /
1225	`0`, / tp_doc /
1226	`0`, / tp_traverse /
1227	`0`, / tp_clear /
1228	`0`, / tp_richcompare /
1229	`0`, / tp_weaklistoffset /
1230	PyObject_SelfIter, / tp_iter /
1231	(iternextfunc)fieldnameiter_next, / tp_iternext /
1232	fieldnameiter_methods, / tp_methods /
1233	`0`};
1234
1235	/ unicode_formatter_field_name_split is used to implement*
1236	string.Formatter.vformat. it takes a PEP 3101 "field name", and
1237	returns a tuple of (first, rest): "first", the part before the
1238	first '.' or '['; and "rest", an iterator for the rest of the field
1239	name. it's a wrapper around stringlib/string_format.h's
1240	field_name_split. The iterator it returns is a
1241	FieldNameIterator /*
1242	static PyObject *
1243	formatter_field_name_split(PyObject ignored, PyObject self)
1244	{
1245	SubString first;
1246	Py_ssize_t first_idx;
1247	fieldnameiterobject *it;
1248
1249	PyObject *first_obj = NULL;
1250	PyObject *result = NULL;
1251
1252	if (!PyUnicode_Check(self)) {
1253	PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
1254	return NULL;
1255	}
1256
1257	if (PyUnicode_READY(self) == -`1`)
1258	return NULL;
1259
1260	it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
1261	if (it == NULL)
1262	return NULL;
1263
1264	/ take ownership, give the object to the iterator. this is*
1265	just to keep the field_name alive /*
1266	Py_INCREF(self);
1267	it->str = self;
1268
1269	/ Pass in auto_number = NULL. We'll return an empty string for*
1270	first_obj in that case. /*
1271	if (!field_name_split((PyObject*)self, `0`, PyUnicode_GET_LENGTH(self),
1272	&first, &first_idx, &it->it_field, NULL))
1273	goto done;
1274
1275	/ first becomes an integer, if possible; else a string /
1276	if (first_idx != -`1`)
1277	first_obj = PyLong_FromSsize_t(first_idx);
1278	else
1279	/ convert "first" into a string object /
1280	first_obj = SubString_new_object(&first);
1281	if (first_obj == NULL)
1282	goto done;
1283
1284	/ return a tuple of values /
1285	result = PyTuple_Pack(`2`, first_obj, it);
1286
1287	done:
1288	Py_XDECREF(it);
1289	Py_XDECREF(first_obj);
1290	return result;
1291	}
1292

Browse the source code of python/Objects/stringlib/unicode_format.h