quote.c source code [tensorflow/external/nasm/asm/quote.c]

1	/* ----------------------------------------------------------------------- *
2	*
3	* Copyright 1996-2016 The NASM Authors - All Rights Reserved
4	* See the file AUTHORS included with the NASM distribution for
5	* the specific copyright holders.
6	*
7	* Redistribution and use in source and binary forms, with or without
8	* modification, are permitted provided that the following
9	* conditions are met:
10	*
11	* * Redistributions of source code must retain the above copyright
12	* notice, this list of conditions and the following disclaimer.
13	* * Redistributions in binary form must reproduce the above
14	* copyright notice, this list of conditions and the following
15	* disclaimer in the documentation and/or other materials provided
16	* with the distribution.
17	*
18	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
19	* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
20	* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
21	* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22	* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23	* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24	* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26	* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
29	* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
30	* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31	*
32	* ----------------------------------------------------------------------- */
33
34	/*
35	* quote.c
36	*/
37
38	#include "compiler.h"
39
40	#include <stdlib.h>
41
42	#include "nasmlib.h"
43	#include "quote.h"
44
45	char nasm_quote(const* char *str, size_t len)
46	{
47	const char p, ep;
48	char c, c1, q, nstr;
49	unsigned char uc;
50	bool sq_ok, dq_ok;
51	size_t qlen;
52
53	sq_ok = dq_ok = true;
54	ep = str+len;
55	qlen = `0`; / Length if we need `...` quotes /
56	for (p = str; p < ep; p++) {
57	c = *p;
58	switch (c) {
59	case `'\''`:
60	sq_ok = false;
61	qlen++;
62	break;
63	case `'\"'`:
64	dq_ok = false;
65	qlen++;
66	break;
67	case '`':
68	case `'\\'`:
69	qlen += `2`;
70	break;
71	default:
72	if (c < `' '` \|\| c > `'~'`) {
73	sq_ok = dq_ok = false;
74	switch (c) {
75	case `'\a'`:
76	case `'\b'`:
77	case `'\t'`:
78	case `'\n'`:
79	case `'\v'`:
80	case `'\f'`:
81	case `'\r'`:
82	case `27`:
83	qlen += `2`;
84	break;
85	default:
86	c1 = (p+`1` < ep) ? p[`1`] : `0`;
87	if (c1 >= `'0'` && c1 <= `'7'`)
88	uc = `0377`; / Must use the full form /
89	else
90	uc = c;
91	if (uc > `077`)
92	qlen++;
93	if (uc > `07`)
94	qlen++;
95	qlen += `2`;
96	break;
97	}
98	} else {
99	qlen++;
100	}
101	break;
102	}
103	}
104
105	if (sq_ok \|\| dq_ok) {
106	/ Use '...' or "..." /
107	nstr = nasm_malloc(len+`3`);
108	nstr[`0`] = nstr[len+`1`] = sq_ok ? `'\''` : `'\"'`;
109	nstr[len+`2`] = `'\0'`;
110	if (len > `0`)
111	memcpy(nstr+`1`, str, len);
112	} else {
113	/ Need to use `...` quoted syntax /
114	nstr = nasm_malloc(qlen+`3`);
115	q = nstr;
116	*q++ = '`';
117	for (p = str; p < ep; p++) {
118	c = *p;
119	switch (c) {
120	case '`':
121	case `'\\'`:
122	*q++ = `'\\'`;
123	*q++ = c;
124	break;
125	case `7`:
126	*q++ = `'\\'`;
127	*q++ = `'a'`;
128	break;
129	case `8`:
130	*q++ = `'\\'`;
131	*q++ = `'b'`;
132	break;
133	case `9`:
134	*q++ = `'\\'`;
135	*q++ = `'t'`;
136	break;
137	case `10`:
138	*q++ = `'\\'`;
139	*q++ = `'n'`;
140	break;
141	case `11`:
142	*q++ = `'\\'`;
143	*q++ = `'v'`;
144	break;
145	case `12`:
146	*q++ = `'\\'`;
147	*q++ = `'f'`;
148	break;
149	case `13`:
150	*q++ = `'\\'`;
151	*q++ = `'r'`;
152	break;
153	case `27`:
154	*q++ = `'\\'`;
155	*q++ = `'e'`;
156	break;
157	default:
158	if (c < `' '` \|\| c > `'~'`) {
159	c1 = (p+`1` < ep) ? p[`1`] : `0`;
160	if (c1 >= `'0'` && c1 <= `'7'`)
161	uc = `0377`; / Must use the full form /
162	else
163	uc = c;
164	*q++ = `'\\'`;
165	if (uc > `077`)
166	q++ = ((unsigned* char)c >> `6`) + `'0'`;
167	if (uc > `07`)
168	q++ = (((unsigned* char)c >> `3`) & `7`) + `'0'`;
169	q++ = ((unsigned* char)c & `7`) + `'0'`;
170	break;
171	} else {
172	*q++ = c;
173	}
174	break;
175	}
176	}
177	*q++ = '`';
178	*q++ = `'\0'`;
179	nasm_assert((size_t)(q-nstr) == qlen+`3`);
180	}
181	return nstr;
182	}
183
184	static char emit_utf8(char* *q, int32_t v)
185	{
186	if (v < `0`) {
187	/ Impossible - do nothing /
188	} else if (v <= `0x7f`) {
189	*q++ = v;
190	} else if (v <= `0x000007ff`) {
191	*q++ = `0xc0` \| (v >> `6`);
192	*q++ = `0x80` \| (v & `63`);
193	} else if (v <= `0x0000ffff`) {
194	*q++ = `0xe0` \| (v >> `12`);
195	*q++ = `0x80` \| ((v >> `6`) & `63`);
196	*q++ = `0x80` \| (v & `63`);
197	} else if (v <= `0x001fffff`) {
198	*q++ = `0xf0` \| (v >> `18`);
199	*q++ = `0x80` \| ((v >> `12`) & `63`);
200	*q++ = `0x80` \| ((v >> `6`) & `63`);
201	*q++ = `0x80` \| (v & `63`);
202	} else if (v <= `0x03ffffff`) {
203	*q++ = `0xf8` \| (v >> `24`);
204	*q++ = `0x80` \| ((v >> `18`) & `63`);
205	*q++ = `0x80` \| ((v >> `12`) & `63`);
206	*q++ = `0x80` \| ((v >> `6`) & `63`);
207	*q++ = `0x80` \| (v & `63`);
208	} else {
209	*q++ = `0xfc` \| (v >> `30`);
210	*q++ = `0x80` \| ((v >> `24`) & `63`);
211	*q++ = `0x80` \| ((v >> `18`) & `63`);
212	*q++ = `0x80` \| ((v >> `12`) & `63`);
213	*q++ = `0x80` \| ((v >> `6`) & `63`);
214	*q++ = `0x80` \| (v & `63`);
215	}
216	return q;
217	}
218
219	/*
220	* Do an in-place dequoting of the specified string, returning the
221	* resulting length (which may be containing embedded nulls.)
222	*
223	* In-place replacement is possible since the unquoted length is always
224	* shorter than or equal to the quoted length.
225	*
226	* *ep points to the final quote, or to the null if improperly quoted.
227	*/
228	size_t nasm_unquote(char str, char* **ep)
229	{
230	char bq;
231	char p, q;
232	char *escp = NULL;
233	char c;
234	enum unq_state {
235	st_start,
236	st_backslash,
237	st_hex,
238	st_oct,
239	st_ucs
240	} state;
241	int ndig = `0`;
242	int32_t nval = `0`;
243
244	p = q = str;
245
246	bq = *p++;
247	if (!bq)
248	return `0`;
249
250	switch (bq) {
251	case `'\''`:
252	case `'\"'`:
253	/ '...' or "..." string /
254	while ((c = *p) && c != bq) {
255	p++;
256	*q++ = c;
257	}
258	*q = `'\0'`;
259	break;
260
261	case '`':
262	/ `...` string /
263	state = st_start;
264
265	while ((c = *p)) {
266	p++;
267	switch (state) {
268	case st_start:
269	switch (c) {
270	case `'\\'`:
271	state = st_backslash;
272	break;
273	case '`':
274	p--;
275	goto out;
276	default:
277	*q++ = c;
278	break;
279	}
280	break;
281
282	case st_backslash:
283	state = st_start;
284	escp = p; / Beginning of argument sequence /
285	nval = `0`;
286	switch (c) {
287	case `'a'`:
288	*q++ = `7`;
289	break;
290	case `'b'`:
291	*q++ = `8`;
292	break;
293	case `'e'`:
294	*q++ = `27`;
295	break;
296	case `'f'`:
297	*q++ = `12`;
298	break;
299	case `'n'`:
300	*q++ = `10`;
301	break;
302	case `'r'`:
303	*q++ = `13`;
304	break;
305	case `'t'`:
306	*q++ = `9`;
307	break;
308	case `'u'`:
309	state = st_ucs;
310	ndig = `4`;
311	break;
312	case `'U'`:
313	state = st_ucs;
314	ndig = `8`;
315	break;
316	case `'v'`:
317	*q++ = `11`;
318	break;
319	case `'x'`:
320	case `'X'`:
321	state = st_hex;
322	ndig = `2`;
323	break;
324	case `'0'`:
325	case `'1'`:
326	case `'2'`:
327	case `'3'`:
328	case `'4'`:
329	case `'5'`:
330	case `'6'`:
331	case `'7'`:
332	state = st_oct;
333	ndig = `2`; / Up to two more digits /
334	nval = c - `'0'`;
335	break;
336	default:
337	*q++ = c;
338	break;
339	}
340	break;
341
342	case st_oct:
343	if (c >= `'0'` && c <= `'7'`) {
344	nval = (nval << `3`) + (c - `'0'`);
345	if (!--ndig) {
346	*q++ = nval;
347	state = st_start;
348	}
349	} else {
350	p--; / Process this character again /
351	*q++ = nval;
352	state = st_start;
353	}
354	break;
355
356	case st_hex:
357	if ((c >= `'0'` && c <= `'9'`) \|\|
358	(c >= `'A'` && c <= `'F'`) \|\|
359	(c >= `'a'` && c <= `'f'`)) {
360	nval = (nval << `4`) + numvalue(c);
361	if (!--ndig) {
362	*q++ = nval;
363	state = st_start;
364	}
365	} else {
366	p--; / Process this character again /
367	*q++ = (p > escp) ? nval : escp[-`1`];
368	state = st_start;
369	}
370	break;
371
372	case st_ucs:
373	if ((c >= `'0'` && c <= `'9'`) \|\|
374	(c >= `'A'` && c <= `'F'`) \|\|
375	(c >= `'a'` && c <= `'f'`)) {
376	nval = (nval << `4`) + numvalue(c);
377	if (!--ndig) {
378	q = emit_utf8(q, nval);
379	state = st_start;
380	}
381	} else {
382	p--; / Process this character again /
383	if (p > escp)
384	q = emit_utf8(q, nval);
385	else
386	*q++ = escp[-`1`];
387	state = st_start;
388	}
389	break;
390	}
391	}
392	switch (state) {
393	case st_start:
394	case st_backslash:
395	break;
396	case st_oct:
397	*q++ = nval;
398	break;
399	case st_hex:
400	*q++ = (p > escp) ? nval : escp[-`1`];
401	break;
402	case st_ucs:
403	if (p > escp)
404	q = emit_utf8(q, nval);
405	else
406	*q++ = escp[-`1`];
407	break;
408	}
409	out:
410	break;
411
412	default:
413	/ Not a quoted string, just return the input... /
414	p = q = strchr(str, `'\0'`);
415	break;
416	}
417
418	if (ep)
419	*ep = p;
420	return q-str;
421	}
422
423	/*
424	* Find the end of a quoted string; returns the pointer to the terminating
425	* character (either the ending quote or the null character, if unterminated.)
426	*/
427	char nasm_skip_string(char* *str)
428	{
429	char bq;
430	char *p;
431	char c;
432	enum unq_state {
433	st_start,
434	st_backslash
435	} state;
436
437	bq = str[`0`];
438	if (bq == `'\''` \|\| bq == `'\"'`) {
439	/ '...' or "..." string /
440	for (p = str+`1`; p && p != bq; p++)
441	;
442	return p;
443	} else if (bq == '`') {
444	/ `...` string /
445	state = st_start;
446	p = str+`1`;
447	if (!*p)
448	return p;
449
450	while ((c = *p++)) {
451	switch (state) {
452	case st_start:
453	switch (c) {
454	case `'\\'`:
455	state = st_backslash;
456	break;
457	case '`':
458	return p-`1`; / Found the end /
459	default:
460	break;
461	}
462	break;
463
464	case st_backslash:
465	/*
466	* Note: for the purpose of finding the end of the string,
467	* all successor states to st_backslash are functionally
468	* equivalent to st_start, since either a backslash or
469	* a backquote will force a return to the st_start state.
470	*/
471	state = st_start;
472	break;
473	}
474	}
475	return p-`1`; / Unterminated string... /
476	} else {
477	return str; / Not a string... /
478	}
479	}
480

Browse the source code of tensorflow/external/nasm/asm/quote.c