1 | /* ----------------------------------------------------------------------- * |
2 | * |
3 | * Copyright 1996-2016 The NASM Authors - All Rights Reserved |
4 | * See the file AUTHORS included with the NASM distribution for |
5 | * the specific copyright holders. |
6 | * |
7 | * Redistribution and use in source and binary forms, with or without |
8 | * modification, are permitted provided that the following |
9 | * conditions are met: |
10 | * |
11 | * * Redistributions of source code must retain the above copyright |
12 | * notice, this list of conditions and the following disclaimer. |
13 | * * Redistributions in binary form must reproduce the above |
14 | * copyright notice, this list of conditions and the following |
15 | * disclaimer in the documentation and/or other materials provided |
16 | * with the distribution. |
17 | * |
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
19 | * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, |
20 | * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF |
21 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
22 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR |
23 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
24 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
25 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
26 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
27 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
28 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR |
29 | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, |
30 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
31 | * |
32 | * ----------------------------------------------------------------------- */ |
33 | |
34 | /* |
35 | * quote.c |
36 | */ |
37 | |
38 | #include "compiler.h" |
39 | |
40 | #include <stdlib.h> |
41 | |
42 | #include "nasmlib.h" |
43 | #include "quote.h" |
44 | |
45 | char *nasm_quote(const char *str, size_t len) |
46 | { |
47 | const char *p, *ep; |
48 | char c, c1, *q, *nstr; |
49 | unsigned char uc; |
50 | bool sq_ok, dq_ok; |
51 | size_t qlen; |
52 | |
53 | sq_ok = dq_ok = true; |
54 | ep = str+len; |
55 | qlen = 0; /* Length if we need `...` quotes */ |
56 | for (p = str; p < ep; p++) { |
57 | c = *p; |
58 | switch (c) { |
59 | case '\'': |
60 | sq_ok = false; |
61 | qlen++; |
62 | break; |
63 | case '\"': |
64 | dq_ok = false; |
65 | qlen++; |
66 | break; |
67 | case '`': |
68 | case '\\': |
69 | qlen += 2; |
70 | break; |
71 | default: |
72 | if (c < ' ' || c > '~') { |
73 | sq_ok = dq_ok = false; |
74 | switch (c) { |
75 | case '\a': |
76 | case '\b': |
77 | case '\t': |
78 | case '\n': |
79 | case '\v': |
80 | case '\f': |
81 | case '\r': |
82 | case 27: |
83 | qlen += 2; |
84 | break; |
85 | default: |
86 | c1 = (p+1 < ep) ? p[1] : 0; |
87 | if (c1 >= '0' && c1 <= '7') |
88 | uc = 0377; /* Must use the full form */ |
89 | else |
90 | uc = c; |
91 | if (uc > 077) |
92 | qlen++; |
93 | if (uc > 07) |
94 | qlen++; |
95 | qlen += 2; |
96 | break; |
97 | } |
98 | } else { |
99 | qlen++; |
100 | } |
101 | break; |
102 | } |
103 | } |
104 | |
105 | if (sq_ok || dq_ok) { |
106 | /* Use '...' or "..." */ |
107 | nstr = nasm_malloc(len+3); |
108 | nstr[0] = nstr[len+1] = sq_ok ? '\'' : '\"'; |
109 | nstr[len+2] = '\0'; |
110 | if (len > 0) |
111 | memcpy(nstr+1, str, len); |
112 | } else { |
113 | /* Need to use `...` quoted syntax */ |
114 | nstr = nasm_malloc(qlen+3); |
115 | q = nstr; |
116 | *q++ = '`'; |
117 | for (p = str; p < ep; p++) { |
118 | c = *p; |
119 | switch (c) { |
120 | case '`': |
121 | case '\\': |
122 | *q++ = '\\'; |
123 | *q++ = c; |
124 | break; |
125 | case 7: |
126 | *q++ = '\\'; |
127 | *q++ = 'a'; |
128 | break; |
129 | case 8: |
130 | *q++ = '\\'; |
131 | *q++ = 'b'; |
132 | break; |
133 | case 9: |
134 | *q++ = '\\'; |
135 | *q++ = 't'; |
136 | break; |
137 | case 10: |
138 | *q++ = '\\'; |
139 | *q++ = 'n'; |
140 | break; |
141 | case 11: |
142 | *q++ = '\\'; |
143 | *q++ = 'v'; |
144 | break; |
145 | case 12: |
146 | *q++ = '\\'; |
147 | *q++ = 'f'; |
148 | break; |
149 | case 13: |
150 | *q++ = '\\'; |
151 | *q++ = 'r'; |
152 | break; |
153 | case 27: |
154 | *q++ = '\\'; |
155 | *q++ = 'e'; |
156 | break; |
157 | default: |
158 | if (c < ' ' || c > '~') { |
159 | c1 = (p+1 < ep) ? p[1] : 0; |
160 | if (c1 >= '0' && c1 <= '7') |
161 | uc = 0377; /* Must use the full form */ |
162 | else |
163 | uc = c; |
164 | *q++ = '\\'; |
165 | if (uc > 077) |
166 | *q++ = ((unsigned char)c >> 6) + '0'; |
167 | if (uc > 07) |
168 | *q++ = (((unsigned char)c >> 3) & 7) + '0'; |
169 | *q++ = ((unsigned char)c & 7) + '0'; |
170 | break; |
171 | } else { |
172 | *q++ = c; |
173 | } |
174 | break; |
175 | } |
176 | } |
177 | *q++ = '`'; |
178 | *q++ = '\0'; |
179 | nasm_assert((size_t)(q-nstr) == qlen+3); |
180 | } |
181 | return nstr; |
182 | } |
183 | |
184 | static char *emit_utf8(char *q, int32_t v) |
185 | { |
186 | if (v < 0) { |
187 | /* Impossible - do nothing */ |
188 | } else if (v <= 0x7f) { |
189 | *q++ = v; |
190 | } else if (v <= 0x000007ff) { |
191 | *q++ = 0xc0 | (v >> 6); |
192 | *q++ = 0x80 | (v & 63); |
193 | } else if (v <= 0x0000ffff) { |
194 | *q++ = 0xe0 | (v >> 12); |
195 | *q++ = 0x80 | ((v >> 6) & 63); |
196 | *q++ = 0x80 | (v & 63); |
197 | } else if (v <= 0x001fffff) { |
198 | *q++ = 0xf0 | (v >> 18); |
199 | *q++ = 0x80 | ((v >> 12) & 63); |
200 | *q++ = 0x80 | ((v >> 6) & 63); |
201 | *q++ = 0x80 | (v & 63); |
202 | } else if (v <= 0x03ffffff) { |
203 | *q++ = 0xf8 | (v >> 24); |
204 | *q++ = 0x80 | ((v >> 18) & 63); |
205 | *q++ = 0x80 | ((v >> 12) & 63); |
206 | *q++ = 0x80 | ((v >> 6) & 63); |
207 | *q++ = 0x80 | (v & 63); |
208 | } else { |
209 | *q++ = 0xfc | (v >> 30); |
210 | *q++ = 0x80 | ((v >> 24) & 63); |
211 | *q++ = 0x80 | ((v >> 18) & 63); |
212 | *q++ = 0x80 | ((v >> 12) & 63); |
213 | *q++ = 0x80 | ((v >> 6) & 63); |
214 | *q++ = 0x80 | (v & 63); |
215 | } |
216 | return q; |
217 | } |
218 | |
219 | /* |
220 | * Do an *in-place* dequoting of the specified string, returning the |
221 | * resulting length (which may be containing embedded nulls.) |
222 | * |
223 | * In-place replacement is possible since the unquoted length is always |
224 | * shorter than or equal to the quoted length. |
225 | * |
226 | * *ep points to the final quote, or to the null if improperly quoted. |
227 | */ |
228 | size_t nasm_unquote(char *str, char **ep) |
229 | { |
230 | char bq; |
231 | char *p, *q; |
232 | char *escp = NULL; |
233 | char c; |
234 | enum unq_state { |
235 | st_start, |
236 | st_backslash, |
237 | st_hex, |
238 | st_oct, |
239 | st_ucs |
240 | } state; |
241 | int ndig = 0; |
242 | int32_t nval = 0; |
243 | |
244 | p = q = str; |
245 | |
246 | bq = *p++; |
247 | if (!bq) |
248 | return 0; |
249 | |
250 | switch (bq) { |
251 | case '\'': |
252 | case '\"': |
253 | /* '...' or "..." string */ |
254 | while ((c = *p) && c != bq) { |
255 | p++; |
256 | *q++ = c; |
257 | } |
258 | *q = '\0'; |
259 | break; |
260 | |
261 | case '`': |
262 | /* `...` string */ |
263 | state = st_start; |
264 | |
265 | while ((c = *p)) { |
266 | p++; |
267 | switch (state) { |
268 | case st_start: |
269 | switch (c) { |
270 | case '\\': |
271 | state = st_backslash; |
272 | break; |
273 | case '`': |
274 | p--; |
275 | goto out; |
276 | default: |
277 | *q++ = c; |
278 | break; |
279 | } |
280 | break; |
281 | |
282 | case st_backslash: |
283 | state = st_start; |
284 | escp = p; /* Beginning of argument sequence */ |
285 | nval = 0; |
286 | switch (c) { |
287 | case 'a': |
288 | *q++ = 7; |
289 | break; |
290 | case 'b': |
291 | *q++ = 8; |
292 | break; |
293 | case 'e': |
294 | *q++ = 27; |
295 | break; |
296 | case 'f': |
297 | *q++ = 12; |
298 | break; |
299 | case 'n': |
300 | *q++ = 10; |
301 | break; |
302 | case 'r': |
303 | *q++ = 13; |
304 | break; |
305 | case 't': |
306 | *q++ = 9; |
307 | break; |
308 | case 'u': |
309 | state = st_ucs; |
310 | ndig = 4; |
311 | break; |
312 | case 'U': |
313 | state = st_ucs; |
314 | ndig = 8; |
315 | break; |
316 | case 'v': |
317 | *q++ = 11; |
318 | break; |
319 | case 'x': |
320 | case 'X': |
321 | state = st_hex; |
322 | ndig = 2; |
323 | break; |
324 | case '0': |
325 | case '1': |
326 | case '2': |
327 | case '3': |
328 | case '4': |
329 | case '5': |
330 | case '6': |
331 | case '7': |
332 | state = st_oct; |
333 | ndig = 2; /* Up to two more digits */ |
334 | nval = c - '0'; |
335 | break; |
336 | default: |
337 | *q++ = c; |
338 | break; |
339 | } |
340 | break; |
341 | |
342 | case st_oct: |
343 | if (c >= '0' && c <= '7') { |
344 | nval = (nval << 3) + (c - '0'); |
345 | if (!--ndig) { |
346 | *q++ = nval; |
347 | state = st_start; |
348 | } |
349 | } else { |
350 | p--; /* Process this character again */ |
351 | *q++ = nval; |
352 | state = st_start; |
353 | } |
354 | break; |
355 | |
356 | case st_hex: |
357 | if ((c >= '0' && c <= '9') || |
358 | (c >= 'A' && c <= 'F') || |
359 | (c >= 'a' && c <= 'f')) { |
360 | nval = (nval << 4) + numvalue(c); |
361 | if (!--ndig) { |
362 | *q++ = nval; |
363 | state = st_start; |
364 | } |
365 | } else { |
366 | p--; /* Process this character again */ |
367 | *q++ = (p > escp) ? nval : escp[-1]; |
368 | state = st_start; |
369 | } |
370 | break; |
371 | |
372 | case st_ucs: |
373 | if ((c >= '0' && c <= '9') || |
374 | (c >= 'A' && c <= 'F') || |
375 | (c >= 'a' && c <= 'f')) { |
376 | nval = (nval << 4) + numvalue(c); |
377 | if (!--ndig) { |
378 | q = emit_utf8(q, nval); |
379 | state = st_start; |
380 | } |
381 | } else { |
382 | p--; /* Process this character again */ |
383 | if (p > escp) |
384 | q = emit_utf8(q, nval); |
385 | else |
386 | *q++ = escp[-1]; |
387 | state = st_start; |
388 | } |
389 | break; |
390 | } |
391 | } |
392 | switch (state) { |
393 | case st_start: |
394 | case st_backslash: |
395 | break; |
396 | case st_oct: |
397 | *q++ = nval; |
398 | break; |
399 | case st_hex: |
400 | *q++ = (p > escp) ? nval : escp[-1]; |
401 | break; |
402 | case st_ucs: |
403 | if (p > escp) |
404 | q = emit_utf8(q, nval); |
405 | else |
406 | *q++ = escp[-1]; |
407 | break; |
408 | } |
409 | out: |
410 | break; |
411 | |
412 | default: |
413 | /* Not a quoted string, just return the input... */ |
414 | p = q = strchr(str, '\0'); |
415 | break; |
416 | } |
417 | |
418 | if (ep) |
419 | *ep = p; |
420 | return q-str; |
421 | } |
422 | |
423 | /* |
424 | * Find the end of a quoted string; returns the pointer to the terminating |
425 | * character (either the ending quote or the null character, if unterminated.) |
426 | */ |
427 | char *nasm_skip_string(char *str) |
428 | { |
429 | char bq; |
430 | char *p; |
431 | char c; |
432 | enum unq_state { |
433 | st_start, |
434 | st_backslash |
435 | } state; |
436 | |
437 | bq = str[0]; |
438 | if (bq == '\'' || bq == '\"') { |
439 | /* '...' or "..." string */ |
440 | for (p = str+1; *p && *p != bq; p++) |
441 | ; |
442 | return p; |
443 | } else if (bq == '`') { |
444 | /* `...` string */ |
445 | state = st_start; |
446 | p = str+1; |
447 | if (!*p) |
448 | return p; |
449 | |
450 | while ((c = *p++)) { |
451 | switch (state) { |
452 | case st_start: |
453 | switch (c) { |
454 | case '\\': |
455 | state = st_backslash; |
456 | break; |
457 | case '`': |
458 | return p-1; /* Found the end */ |
459 | default: |
460 | break; |
461 | } |
462 | break; |
463 | |
464 | case st_backslash: |
465 | /* |
466 | * Note: for the purpose of finding the end of the string, |
467 | * all successor states to st_backslash are functionally |
468 | * equivalent to st_start, since either a backslash or |
469 | * a backquote will force a return to the st_start state. |
470 | */ |
471 | state = st_start; |
472 | break; |
473 | } |
474 | } |
475 | return p-1; /* Unterminated string... */ |
476 | } else { |
477 | return str; /* Not a string... */ |
478 | } |
479 | } |
480 | |