1 | /* |
2 | __ __ _ |
3 | ___\ \/ /_ __ __ _| |_ |
4 | / _ \\ /| '_ \ / _` | __| |
5 | | __// \| |_) | (_| | |_ |
6 | \___/_/\_\ .__/ \__,_|\__| |
7 | |_| XML parser |
8 | |
9 | Copyright (c) 1997-2000 Thai Open Source Software Center Ltd |
10 | Copyright (c) 2000 Clark Cooper <[email protected]> |
11 | Copyright (c) 2002 Greg Stein <[email protected]> |
12 | Copyright (c) 2002-2006 Karl Waclawek <[email protected]> |
13 | Copyright (c) 2002-2003 Fred L. Drake, Jr. <[email protected]> |
14 | Copyright (c) 2005-2009 Steven Solie <[email protected]> |
15 | Copyright (c) 2016-2021 Sebastian Pipping <[email protected]> |
16 | Copyright (c) 2017 Rhodri James <[email protected]> |
17 | Copyright (c) 2019 David Loffredo <[email protected]> |
18 | Copyright (c) 2021 Dong-hee Na <[email protected]> |
19 | Licensed under the MIT license: |
20 | |
21 | Permission is hereby granted, free of charge, to any person obtaining |
22 | a copy of this software and associated documentation files (the |
23 | "Software"), to deal in the Software without restriction, including |
24 | without limitation the rights to use, copy, modify, merge, publish, |
25 | distribute, sublicense, and/or sell copies of the Software, and to permit |
26 | persons to whom the Software is furnished to do so, subject to the |
27 | following conditions: |
28 | |
29 | The above copyright notice and this permission notice shall be included |
30 | in all copies or substantial portions of the Software. |
31 | |
32 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
33 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
34 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN |
35 | NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, |
36 | DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
37 | OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
38 | USE OR OTHER DEALINGS IN THE SOFTWARE. |
39 | */ |
40 | |
41 | #include <expat_config.h> |
42 | |
43 | #include <stddef.h> |
44 | |
45 | #ifdef _WIN32 |
46 | # include "winconfig.h" |
47 | #endif |
48 | |
49 | #include "expat_external.h" |
50 | #include "internal.h" |
51 | #include "xmlrole.h" |
52 | #include "ascii.h" |
53 | |
54 | /* Doesn't check: |
55 | |
56 | that ,| are not mixed in a model group |
57 | content of literals |
58 | |
59 | */ |
60 | |
61 | static const char KW_ANY[] = {ASCII_A, ASCII_N, ASCII_Y, '\0'}; |
62 | static const char KW_ATTLIST[] |
63 | = {ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0'}; |
64 | static const char KW_CDATA[] |
65 | = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'}; |
66 | static const char KW_DOCTYPE[] |
67 | = {ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0'}; |
68 | static const char KW_ELEMENT[] |
69 | = {ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0'}; |
70 | static const char KW_EMPTY[] |
71 | = {ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0'}; |
72 | static const char KW_ENTITIES[] = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, |
73 | ASCII_I, ASCII_E, ASCII_S, '\0'}; |
74 | static const char KW_ENTITY[] |
75 | = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'}; |
76 | static const char KW_FIXED[] |
77 | = {ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0'}; |
78 | static const char KW_ID[] = {ASCII_I, ASCII_D, '\0'}; |
79 | static const char KW_IDREF[] |
80 | = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'}; |
81 | static const char KW_IDREFS[] |
82 | = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'}; |
83 | #ifdef XML_DTD |
84 | static const char KW_IGNORE[] |
85 | = {ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0'}; |
86 | #endif |
87 | static const char KW_IMPLIED[] |
88 | = {ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0'}; |
89 | #ifdef XML_DTD |
90 | static const char KW_INCLUDE[] |
91 | = {ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0'}; |
92 | #endif |
93 | static const char KW_NDATA[] |
94 | = {ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'}; |
95 | static const char KW_NMTOKEN[] |
96 | = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'}; |
97 | static const char KW_NMTOKENS[] = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, |
98 | ASCII_E, ASCII_N, ASCII_S, '\0'}; |
99 | static const char KW_NOTATION[] = {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, |
100 | ASCII_I, ASCII_O, ASCII_N, '\0'}; |
101 | static const char KW_PCDATA[] |
102 | = {ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'}; |
103 | static const char KW_PUBLIC[] |
104 | = {ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0'}; |
105 | static const char KW_REQUIRED[] = {ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I, |
106 | ASCII_R, ASCII_E, ASCII_D, '\0'}; |
107 | static const char KW_SYSTEM[] |
108 | = {ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0'}; |
109 | |
110 | #ifndef MIN_BYTES_PER_CHAR |
111 | # define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar) |
112 | #endif |
113 | |
114 | #ifdef XML_DTD |
115 | # define setTopLevel(state) \ |
116 | ((state)->handler \ |
117 | = ((state)->documentEntity ? internalSubset : externalSubset1)) |
118 | #else /* not XML_DTD */ |
119 | # define setTopLevel(state) ((state)->handler = internalSubset) |
120 | #endif /* not XML_DTD */ |
121 | |
122 | typedef int PTRCALL PROLOG_HANDLER(PROLOG_STATE *state, int tok, |
123 | const char *ptr, const char *end, |
124 | const ENCODING *enc); |
125 | |
126 | static PROLOG_HANDLER prolog0, prolog1, prolog2, doctype0, doctype1, doctype2, |
127 | doctype3, doctype4, doctype5, internalSubset, entity0, entity1, entity2, |
128 | entity3, entity4, entity5, entity6, entity7, entity8, entity9, entity10, |
129 | notation0, notation1, notation2, notation3, notation4, attlist0, attlist1, |
130 | attlist2, attlist3, attlist4, attlist5, attlist6, attlist7, attlist8, |
131 | attlist9, element0, element1, element2, element3, element4, element5, |
132 | element6, element7, |
133 | #ifdef XML_DTD |
134 | externalSubset0, externalSubset1, condSect0, condSect1, condSect2, |
135 | #endif /* XML_DTD */ |
136 | declClose, error; |
137 | |
138 | static int FASTCALL common(PROLOG_STATE *state, int tok); |
139 | |
140 | static int PTRCALL |
141 | prolog0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
142 | const ENCODING *enc) { |
143 | switch (tok) { |
144 | case XML_TOK_PROLOG_S: |
145 | state->handler = prolog1; |
146 | return XML_ROLE_NONE; |
147 | case XML_TOK_XML_DECL: |
148 | state->handler = prolog1; |
149 | return XML_ROLE_XML_DECL; |
150 | case XML_TOK_PI: |
151 | state->handler = prolog1; |
152 | return XML_ROLE_PI; |
153 | case XML_TOK_COMMENT: |
154 | state->handler = prolog1; |
155 | return XML_ROLE_COMMENT; |
156 | case XML_TOK_BOM: |
157 | return XML_ROLE_NONE; |
158 | case XML_TOK_DECL_OPEN: |
159 | if (! XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, |
160 | KW_DOCTYPE)) |
161 | break; |
162 | state->handler = doctype0; |
163 | return XML_ROLE_DOCTYPE_NONE; |
164 | case XML_TOK_INSTANCE_START: |
165 | state->handler = error; |
166 | return XML_ROLE_INSTANCE_START; |
167 | } |
168 | return common(state, tok); |
169 | } |
170 | |
171 | static int PTRCALL |
172 | prolog1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
173 | const ENCODING *enc) { |
174 | switch (tok) { |
175 | case XML_TOK_PROLOG_S: |
176 | return XML_ROLE_NONE; |
177 | case XML_TOK_PI: |
178 | return XML_ROLE_PI; |
179 | case XML_TOK_COMMENT: |
180 | return XML_ROLE_COMMENT; |
181 | case XML_TOK_BOM: |
182 | /* This case can never arise. To reach this role function, the |
183 | * parse must have passed through prolog0 and therefore have had |
184 | * some form of input, even if only a space. At that point, a |
185 | * byte order mark is no longer a valid character (though |
186 | * technically it should be interpreted as a non-breaking space), |
187 | * so will be rejected by the tokenizing stages. |
188 | */ |
189 | return XML_ROLE_NONE; /* LCOV_EXCL_LINE */ |
190 | case XML_TOK_DECL_OPEN: |
191 | if (! XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, |
192 | KW_DOCTYPE)) |
193 | break; |
194 | state->handler = doctype0; |
195 | return XML_ROLE_DOCTYPE_NONE; |
196 | case XML_TOK_INSTANCE_START: |
197 | state->handler = error; |
198 | return XML_ROLE_INSTANCE_START; |
199 | } |
200 | return common(state, tok); |
201 | } |
202 | |
203 | static int PTRCALL |
204 | prolog2(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
205 | const ENCODING *enc) { |
206 | UNUSED_P(ptr); |
207 | UNUSED_P(end); |
208 | UNUSED_P(enc); |
209 | switch (tok) { |
210 | case XML_TOK_PROLOG_S: |
211 | return XML_ROLE_NONE; |
212 | case XML_TOK_PI: |
213 | return XML_ROLE_PI; |
214 | case XML_TOK_COMMENT: |
215 | return XML_ROLE_COMMENT; |
216 | case XML_TOK_INSTANCE_START: |
217 | state->handler = error; |
218 | return XML_ROLE_INSTANCE_START; |
219 | } |
220 | return common(state, tok); |
221 | } |
222 | |
223 | static int PTRCALL |
224 | doctype0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
225 | const ENCODING *enc) { |
226 | UNUSED_P(ptr); |
227 | UNUSED_P(end); |
228 | UNUSED_P(enc); |
229 | switch (tok) { |
230 | case XML_TOK_PROLOG_S: |
231 | return XML_ROLE_DOCTYPE_NONE; |
232 | case XML_TOK_NAME: |
233 | case XML_TOK_PREFIXED_NAME: |
234 | state->handler = doctype1; |
235 | return XML_ROLE_DOCTYPE_NAME; |
236 | } |
237 | return common(state, tok); |
238 | } |
239 | |
240 | static int PTRCALL |
241 | doctype1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
242 | const ENCODING *enc) { |
243 | switch (tok) { |
244 | case XML_TOK_PROLOG_S: |
245 | return XML_ROLE_DOCTYPE_NONE; |
246 | case XML_TOK_OPEN_BRACKET: |
247 | state->handler = internalSubset; |
248 | return XML_ROLE_DOCTYPE_INTERNAL_SUBSET; |
249 | case XML_TOK_DECL_CLOSE: |
250 | state->handler = prolog2; |
251 | return XML_ROLE_DOCTYPE_CLOSE; |
252 | case XML_TOK_NAME: |
253 | if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { |
254 | state->handler = doctype3; |
255 | return XML_ROLE_DOCTYPE_NONE; |
256 | } |
257 | if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { |
258 | state->handler = doctype2; |
259 | return XML_ROLE_DOCTYPE_NONE; |
260 | } |
261 | break; |
262 | } |
263 | return common(state, tok); |
264 | } |
265 | |
266 | static int PTRCALL |
267 | doctype2(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
268 | const ENCODING *enc) { |
269 | UNUSED_P(ptr); |
270 | UNUSED_P(end); |
271 | UNUSED_P(enc); |
272 | switch (tok) { |
273 | case XML_TOK_PROLOG_S: |
274 | return XML_ROLE_DOCTYPE_NONE; |
275 | case XML_TOK_LITERAL: |
276 | state->handler = doctype3; |
277 | return XML_ROLE_DOCTYPE_PUBLIC_ID; |
278 | } |
279 | return common(state, tok); |
280 | } |
281 | |
282 | static int PTRCALL |
283 | doctype3(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
284 | const ENCODING *enc) { |
285 | UNUSED_P(ptr); |
286 | UNUSED_P(end); |
287 | UNUSED_P(enc); |
288 | switch (tok) { |
289 | case XML_TOK_PROLOG_S: |
290 | return XML_ROLE_DOCTYPE_NONE; |
291 | case XML_TOK_LITERAL: |
292 | state->handler = doctype4; |
293 | return XML_ROLE_DOCTYPE_SYSTEM_ID; |
294 | } |
295 | return common(state, tok); |
296 | } |
297 | |
298 | static int PTRCALL |
299 | doctype4(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
300 | const ENCODING *enc) { |
301 | UNUSED_P(ptr); |
302 | UNUSED_P(end); |
303 | UNUSED_P(enc); |
304 | switch (tok) { |
305 | case XML_TOK_PROLOG_S: |
306 | return XML_ROLE_DOCTYPE_NONE; |
307 | case XML_TOK_OPEN_BRACKET: |
308 | state->handler = internalSubset; |
309 | return XML_ROLE_DOCTYPE_INTERNAL_SUBSET; |
310 | case XML_TOK_DECL_CLOSE: |
311 | state->handler = prolog2; |
312 | return XML_ROLE_DOCTYPE_CLOSE; |
313 | } |
314 | return common(state, tok); |
315 | } |
316 | |
317 | static int PTRCALL |
318 | doctype5(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
319 | const ENCODING *enc) { |
320 | UNUSED_P(ptr); |
321 | UNUSED_P(end); |
322 | UNUSED_P(enc); |
323 | switch (tok) { |
324 | case XML_TOK_PROLOG_S: |
325 | return XML_ROLE_DOCTYPE_NONE; |
326 | case XML_TOK_DECL_CLOSE: |
327 | state->handler = prolog2; |
328 | return XML_ROLE_DOCTYPE_CLOSE; |
329 | } |
330 | return common(state, tok); |
331 | } |
332 | |
333 | static int PTRCALL |
334 | internalSubset(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
335 | const ENCODING *enc) { |
336 | switch (tok) { |
337 | case XML_TOK_PROLOG_S: |
338 | return XML_ROLE_NONE; |
339 | case XML_TOK_DECL_OPEN: |
340 | if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, |
341 | KW_ENTITY)) { |
342 | state->handler = entity0; |
343 | return XML_ROLE_ENTITY_NONE; |
344 | } |
345 | if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, |
346 | KW_ATTLIST)) { |
347 | state->handler = attlist0; |
348 | return XML_ROLE_ATTLIST_NONE; |
349 | } |
350 | if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, |
351 | KW_ELEMENT)) { |
352 | state->handler = element0; |
353 | return XML_ROLE_ELEMENT_NONE; |
354 | } |
355 | if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, |
356 | KW_NOTATION)) { |
357 | state->handler = notation0; |
358 | return XML_ROLE_NOTATION_NONE; |
359 | } |
360 | break; |
361 | case XML_TOK_PI: |
362 | return XML_ROLE_PI; |
363 | case XML_TOK_COMMENT: |
364 | return XML_ROLE_COMMENT; |
365 | case XML_TOK_PARAM_ENTITY_REF: |
366 | return XML_ROLE_PARAM_ENTITY_REF; |
367 | case XML_TOK_CLOSE_BRACKET: |
368 | state->handler = doctype5; |
369 | return XML_ROLE_DOCTYPE_NONE; |
370 | case XML_TOK_NONE: |
371 | return XML_ROLE_NONE; |
372 | } |
373 | return common(state, tok); |
374 | } |
375 | |
376 | #ifdef XML_DTD |
377 | |
378 | static int PTRCALL |
379 | externalSubset0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
380 | const ENCODING *enc) { |
381 | state->handler = externalSubset1; |
382 | if (tok == XML_TOK_XML_DECL) |
383 | return XML_ROLE_TEXT_DECL; |
384 | return externalSubset1(state, tok, ptr, end, enc); |
385 | } |
386 | |
387 | static int PTRCALL |
388 | externalSubset1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
389 | const ENCODING *enc) { |
390 | switch (tok) { |
391 | case XML_TOK_COND_SECT_OPEN: |
392 | state->handler = condSect0; |
393 | return XML_ROLE_NONE; |
394 | case XML_TOK_COND_SECT_CLOSE: |
395 | if (state->includeLevel == 0) |
396 | break; |
397 | state->includeLevel -= 1; |
398 | return XML_ROLE_NONE; |
399 | case XML_TOK_PROLOG_S: |
400 | return XML_ROLE_NONE; |
401 | case XML_TOK_CLOSE_BRACKET: |
402 | break; |
403 | case XML_TOK_NONE: |
404 | if (state->includeLevel) |
405 | break; |
406 | return XML_ROLE_NONE; |
407 | default: |
408 | return internalSubset(state, tok, ptr, end, enc); |
409 | } |
410 | return common(state, tok); |
411 | } |
412 | |
413 | #endif /* XML_DTD */ |
414 | |
415 | static int PTRCALL |
416 | entity0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
417 | const ENCODING *enc) { |
418 | UNUSED_P(ptr); |
419 | UNUSED_P(end); |
420 | UNUSED_P(enc); |
421 | switch (tok) { |
422 | case XML_TOK_PROLOG_S: |
423 | return XML_ROLE_ENTITY_NONE; |
424 | case XML_TOK_PERCENT: |
425 | state->handler = entity1; |
426 | return XML_ROLE_ENTITY_NONE; |
427 | case XML_TOK_NAME: |
428 | state->handler = entity2; |
429 | return XML_ROLE_GENERAL_ENTITY_NAME; |
430 | } |
431 | return common(state, tok); |
432 | } |
433 | |
434 | static int PTRCALL |
435 | entity1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
436 | const ENCODING *enc) { |
437 | UNUSED_P(ptr); |
438 | UNUSED_P(end); |
439 | UNUSED_P(enc); |
440 | switch (tok) { |
441 | case XML_TOK_PROLOG_S: |
442 | return XML_ROLE_ENTITY_NONE; |
443 | case XML_TOK_NAME: |
444 | state->handler = entity7; |
445 | return XML_ROLE_PARAM_ENTITY_NAME; |
446 | } |
447 | return common(state, tok); |
448 | } |
449 | |
450 | static int PTRCALL |
451 | entity2(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
452 | const ENCODING *enc) { |
453 | switch (tok) { |
454 | case XML_TOK_PROLOG_S: |
455 | return XML_ROLE_ENTITY_NONE; |
456 | case XML_TOK_NAME: |
457 | if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { |
458 | state->handler = entity4; |
459 | return XML_ROLE_ENTITY_NONE; |
460 | } |
461 | if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { |
462 | state->handler = entity3; |
463 | return XML_ROLE_ENTITY_NONE; |
464 | } |
465 | break; |
466 | case XML_TOK_LITERAL: |
467 | state->handler = declClose; |
468 | state->role_none = XML_ROLE_ENTITY_NONE; |
469 | return XML_ROLE_ENTITY_VALUE; |
470 | } |
471 | return common(state, tok); |
472 | } |
473 | |
474 | static int PTRCALL |
475 | entity3(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
476 | const ENCODING *enc) { |
477 | UNUSED_P(ptr); |
478 | UNUSED_P(end); |
479 | UNUSED_P(enc); |
480 | switch (tok) { |
481 | case XML_TOK_PROLOG_S: |
482 | return XML_ROLE_ENTITY_NONE; |
483 | case XML_TOK_LITERAL: |
484 | state->handler = entity4; |
485 | return XML_ROLE_ENTITY_PUBLIC_ID; |
486 | } |
487 | return common(state, tok); |
488 | } |
489 | |
490 | static int PTRCALL |
491 | entity4(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
492 | const ENCODING *enc) { |
493 | UNUSED_P(ptr); |
494 | UNUSED_P(end); |
495 | UNUSED_P(enc); |
496 | switch (tok) { |
497 | case XML_TOK_PROLOG_S: |
498 | return XML_ROLE_ENTITY_NONE; |
499 | case XML_TOK_LITERAL: |
500 | state->handler = entity5; |
501 | return XML_ROLE_ENTITY_SYSTEM_ID; |
502 | } |
503 | return common(state, tok); |
504 | } |
505 | |
506 | static int PTRCALL |
507 | entity5(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
508 | const ENCODING *enc) { |
509 | switch (tok) { |
510 | case XML_TOK_PROLOG_S: |
511 | return XML_ROLE_ENTITY_NONE; |
512 | case XML_TOK_DECL_CLOSE: |
513 | setTopLevel(state); |
514 | return XML_ROLE_ENTITY_COMPLETE; |
515 | case XML_TOK_NAME: |
516 | if (XmlNameMatchesAscii(enc, ptr, end, KW_NDATA)) { |
517 | state->handler = entity6; |
518 | return XML_ROLE_ENTITY_NONE; |
519 | } |
520 | break; |
521 | } |
522 | return common(state, tok); |
523 | } |
524 | |
525 | static int PTRCALL |
526 | entity6(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
527 | const ENCODING *enc) { |
528 | UNUSED_P(ptr); |
529 | UNUSED_P(end); |
530 | UNUSED_P(enc); |
531 | switch (tok) { |
532 | case XML_TOK_PROLOG_S: |
533 | return XML_ROLE_ENTITY_NONE; |
534 | case XML_TOK_NAME: |
535 | state->handler = declClose; |
536 | state->role_none = XML_ROLE_ENTITY_NONE; |
537 | return XML_ROLE_ENTITY_NOTATION_NAME; |
538 | } |
539 | return common(state, tok); |
540 | } |
541 | |
542 | static int PTRCALL |
543 | entity7(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
544 | const ENCODING *enc) { |
545 | switch (tok) { |
546 | case XML_TOK_PROLOG_S: |
547 | return XML_ROLE_ENTITY_NONE; |
548 | case XML_TOK_NAME: |
549 | if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { |
550 | state->handler = entity9; |
551 | return XML_ROLE_ENTITY_NONE; |
552 | } |
553 | if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { |
554 | state->handler = entity8; |
555 | return XML_ROLE_ENTITY_NONE; |
556 | } |
557 | break; |
558 | case XML_TOK_LITERAL: |
559 | state->handler = declClose; |
560 | state->role_none = XML_ROLE_ENTITY_NONE; |
561 | return XML_ROLE_ENTITY_VALUE; |
562 | } |
563 | return common(state, tok); |
564 | } |
565 | |
566 | static int PTRCALL |
567 | entity8(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
568 | const ENCODING *enc) { |
569 | UNUSED_P(ptr); |
570 | UNUSED_P(end); |
571 | UNUSED_P(enc); |
572 | switch (tok) { |
573 | case XML_TOK_PROLOG_S: |
574 | return XML_ROLE_ENTITY_NONE; |
575 | case XML_TOK_LITERAL: |
576 | state->handler = entity9; |
577 | return XML_ROLE_ENTITY_PUBLIC_ID; |
578 | } |
579 | return common(state, tok); |
580 | } |
581 | |
582 | static int PTRCALL |
583 | entity9(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
584 | const ENCODING *enc) { |
585 | UNUSED_P(ptr); |
586 | UNUSED_P(end); |
587 | UNUSED_P(enc); |
588 | switch (tok) { |
589 | case XML_TOK_PROLOG_S: |
590 | return XML_ROLE_ENTITY_NONE; |
591 | case XML_TOK_LITERAL: |
592 | state->handler = entity10; |
593 | return XML_ROLE_ENTITY_SYSTEM_ID; |
594 | } |
595 | return common(state, tok); |
596 | } |
597 | |
598 | static int PTRCALL |
599 | entity10(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
600 | const ENCODING *enc) { |
601 | UNUSED_P(ptr); |
602 | UNUSED_P(end); |
603 | UNUSED_P(enc); |
604 | switch (tok) { |
605 | case XML_TOK_PROLOG_S: |
606 | return XML_ROLE_ENTITY_NONE; |
607 | case XML_TOK_DECL_CLOSE: |
608 | setTopLevel(state); |
609 | return XML_ROLE_ENTITY_COMPLETE; |
610 | } |
611 | return common(state, tok); |
612 | } |
613 | |
614 | static int PTRCALL |
615 | notation0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
616 | const ENCODING *enc) { |
617 | UNUSED_P(ptr); |
618 | UNUSED_P(end); |
619 | UNUSED_P(enc); |
620 | switch (tok) { |
621 | case XML_TOK_PROLOG_S: |
622 | return XML_ROLE_NOTATION_NONE; |
623 | case XML_TOK_NAME: |
624 | state->handler = notation1; |
625 | return XML_ROLE_NOTATION_NAME; |
626 | } |
627 | return common(state, tok); |
628 | } |
629 | |
630 | static int PTRCALL |
631 | notation1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
632 | const ENCODING *enc) { |
633 | switch (tok) { |
634 | case XML_TOK_PROLOG_S: |
635 | return XML_ROLE_NOTATION_NONE; |
636 | case XML_TOK_NAME: |
637 | if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { |
638 | state->handler = notation3; |
639 | return XML_ROLE_NOTATION_NONE; |
640 | } |
641 | if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { |
642 | state->handler = notation2; |
643 | return XML_ROLE_NOTATION_NONE; |
644 | } |
645 | break; |
646 | } |
647 | return common(state, tok); |
648 | } |
649 | |
650 | static int PTRCALL |
651 | notation2(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
652 | const ENCODING *enc) { |
653 | UNUSED_P(ptr); |
654 | UNUSED_P(end); |
655 | UNUSED_P(enc); |
656 | switch (tok) { |
657 | case XML_TOK_PROLOG_S: |
658 | return XML_ROLE_NOTATION_NONE; |
659 | case XML_TOK_LITERAL: |
660 | state->handler = notation4; |
661 | return XML_ROLE_NOTATION_PUBLIC_ID; |
662 | } |
663 | return common(state, tok); |
664 | } |
665 | |
666 | static int PTRCALL |
667 | notation3(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
668 | const ENCODING *enc) { |
669 | UNUSED_P(ptr); |
670 | UNUSED_P(end); |
671 | UNUSED_P(enc); |
672 | switch (tok) { |
673 | case XML_TOK_PROLOG_S: |
674 | return XML_ROLE_NOTATION_NONE; |
675 | case XML_TOK_LITERAL: |
676 | state->handler = declClose; |
677 | state->role_none = XML_ROLE_NOTATION_NONE; |
678 | return XML_ROLE_NOTATION_SYSTEM_ID; |
679 | } |
680 | return common(state, tok); |
681 | } |
682 | |
683 | static int PTRCALL |
684 | notation4(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
685 | const ENCODING *enc) { |
686 | UNUSED_P(ptr); |
687 | UNUSED_P(end); |
688 | UNUSED_P(enc); |
689 | switch (tok) { |
690 | case XML_TOK_PROLOG_S: |
691 | return XML_ROLE_NOTATION_NONE; |
692 | case XML_TOK_LITERAL: |
693 | state->handler = declClose; |
694 | state->role_none = XML_ROLE_NOTATION_NONE; |
695 | return XML_ROLE_NOTATION_SYSTEM_ID; |
696 | case XML_TOK_DECL_CLOSE: |
697 | setTopLevel(state); |
698 | return XML_ROLE_NOTATION_NO_SYSTEM_ID; |
699 | } |
700 | return common(state, tok); |
701 | } |
702 | |
703 | static int PTRCALL |
704 | attlist0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
705 | const ENCODING *enc) { |
706 | UNUSED_P(ptr); |
707 | UNUSED_P(end); |
708 | UNUSED_P(enc); |
709 | switch (tok) { |
710 | case XML_TOK_PROLOG_S: |
711 | return XML_ROLE_ATTLIST_NONE; |
712 | case XML_TOK_NAME: |
713 | case XML_TOK_PREFIXED_NAME: |
714 | state->handler = attlist1; |
715 | return XML_ROLE_ATTLIST_ELEMENT_NAME; |
716 | } |
717 | return common(state, tok); |
718 | } |
719 | |
720 | static int PTRCALL |
721 | attlist1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
722 | const ENCODING *enc) { |
723 | UNUSED_P(ptr); |
724 | UNUSED_P(end); |
725 | UNUSED_P(enc); |
726 | switch (tok) { |
727 | case XML_TOK_PROLOG_S: |
728 | return XML_ROLE_ATTLIST_NONE; |
729 | case XML_TOK_DECL_CLOSE: |
730 | setTopLevel(state); |
731 | return XML_ROLE_ATTLIST_NONE; |
732 | case XML_TOK_NAME: |
733 | case XML_TOK_PREFIXED_NAME: |
734 | state->handler = attlist2; |
735 | return XML_ROLE_ATTRIBUTE_NAME; |
736 | } |
737 | return common(state, tok); |
738 | } |
739 | |
740 | static int PTRCALL |
741 | attlist2(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
742 | const ENCODING *enc) { |
743 | switch (tok) { |
744 | case XML_TOK_PROLOG_S: |
745 | return XML_ROLE_ATTLIST_NONE; |
746 | case XML_TOK_NAME: { |
747 | static const char *const types[] = { |
748 | KW_CDATA, KW_ID, KW_IDREF, KW_IDREFS, |
749 | KW_ENTITY, KW_ENTITIES, KW_NMTOKEN, KW_NMTOKENS, |
750 | }; |
751 | int i; |
752 | for (i = 0; i < (int)(sizeof(types) / sizeof(types[0])); i++) |
753 | if (XmlNameMatchesAscii(enc, ptr, end, types[i])) { |
754 | state->handler = attlist8; |
755 | return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i; |
756 | } |
757 | } |
758 | if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) { |
759 | state->handler = attlist5; |
760 | return XML_ROLE_ATTLIST_NONE; |
761 | } |
762 | break; |
763 | case XML_TOK_OPEN_PAREN: |
764 | state->handler = attlist3; |
765 | return XML_ROLE_ATTLIST_NONE; |
766 | } |
767 | return common(state, tok); |
768 | } |
769 | |
770 | static int PTRCALL |
771 | attlist3(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
772 | const ENCODING *enc) { |
773 | UNUSED_P(ptr); |
774 | UNUSED_P(end); |
775 | UNUSED_P(enc); |
776 | switch (tok) { |
777 | case XML_TOK_PROLOG_S: |
778 | return XML_ROLE_ATTLIST_NONE; |
779 | case XML_TOK_NMTOKEN: |
780 | case XML_TOK_NAME: |
781 | case XML_TOK_PREFIXED_NAME: |
782 | state->handler = attlist4; |
783 | return XML_ROLE_ATTRIBUTE_ENUM_VALUE; |
784 | } |
785 | return common(state, tok); |
786 | } |
787 | |
788 | static int PTRCALL |
789 | attlist4(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
790 | const ENCODING *enc) { |
791 | UNUSED_P(ptr); |
792 | UNUSED_P(end); |
793 | UNUSED_P(enc); |
794 | switch (tok) { |
795 | case XML_TOK_PROLOG_S: |
796 | return XML_ROLE_ATTLIST_NONE; |
797 | case XML_TOK_CLOSE_PAREN: |
798 | state->handler = attlist8; |
799 | return XML_ROLE_ATTLIST_NONE; |
800 | case XML_TOK_OR: |
801 | state->handler = attlist3; |
802 | return XML_ROLE_ATTLIST_NONE; |
803 | } |
804 | return common(state, tok); |
805 | } |
806 | |
807 | static int PTRCALL |
808 | attlist5(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
809 | const ENCODING *enc) { |
810 | UNUSED_P(ptr); |
811 | UNUSED_P(end); |
812 | UNUSED_P(enc); |
813 | switch (tok) { |
814 | case XML_TOK_PROLOG_S: |
815 | return XML_ROLE_ATTLIST_NONE; |
816 | case XML_TOK_OPEN_PAREN: |
817 | state->handler = attlist6; |
818 | return XML_ROLE_ATTLIST_NONE; |
819 | } |
820 | return common(state, tok); |
821 | } |
822 | |
823 | static int PTRCALL |
824 | attlist6(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
825 | const ENCODING *enc) { |
826 | UNUSED_P(ptr); |
827 | UNUSED_P(end); |
828 | UNUSED_P(enc); |
829 | switch (tok) { |
830 | case XML_TOK_PROLOG_S: |
831 | return XML_ROLE_ATTLIST_NONE; |
832 | case XML_TOK_NAME: |
833 | state->handler = attlist7; |
834 | return XML_ROLE_ATTRIBUTE_NOTATION_VALUE; |
835 | } |
836 | return common(state, tok); |
837 | } |
838 | |
839 | static int PTRCALL |
840 | attlist7(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
841 | const ENCODING *enc) { |
842 | UNUSED_P(ptr); |
843 | UNUSED_P(end); |
844 | UNUSED_P(enc); |
845 | switch (tok) { |
846 | case XML_TOK_PROLOG_S: |
847 | return XML_ROLE_ATTLIST_NONE; |
848 | case XML_TOK_CLOSE_PAREN: |
849 | state->handler = attlist8; |
850 | return XML_ROLE_ATTLIST_NONE; |
851 | case XML_TOK_OR: |
852 | state->handler = attlist6; |
853 | return XML_ROLE_ATTLIST_NONE; |
854 | } |
855 | return common(state, tok); |
856 | } |
857 | |
858 | /* default value */ |
859 | static int PTRCALL |
860 | attlist8(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
861 | const ENCODING *enc) { |
862 | switch (tok) { |
863 | case XML_TOK_PROLOG_S: |
864 | return XML_ROLE_ATTLIST_NONE; |
865 | case XML_TOK_POUND_NAME: |
866 | if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end, |
867 | KW_IMPLIED)) { |
868 | state->handler = attlist1; |
869 | return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE; |
870 | } |
871 | if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end, |
872 | KW_REQUIRED)) { |
873 | state->handler = attlist1; |
874 | return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE; |
875 | } |
876 | if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end, |
877 | KW_FIXED)) { |
878 | state->handler = attlist9; |
879 | return XML_ROLE_ATTLIST_NONE; |
880 | } |
881 | break; |
882 | case XML_TOK_LITERAL: |
883 | state->handler = attlist1; |
884 | return XML_ROLE_DEFAULT_ATTRIBUTE_VALUE; |
885 | } |
886 | return common(state, tok); |
887 | } |
888 | |
889 | static int PTRCALL |
890 | attlist9(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
891 | const ENCODING *enc) { |
892 | UNUSED_P(ptr); |
893 | UNUSED_P(end); |
894 | UNUSED_P(enc); |
895 | switch (tok) { |
896 | case XML_TOK_PROLOG_S: |
897 | return XML_ROLE_ATTLIST_NONE; |
898 | case XML_TOK_LITERAL: |
899 | state->handler = attlist1; |
900 | return XML_ROLE_FIXED_ATTRIBUTE_VALUE; |
901 | } |
902 | return common(state, tok); |
903 | } |
904 | |
905 | static int PTRCALL |
906 | element0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
907 | const ENCODING *enc) { |
908 | UNUSED_P(ptr); |
909 | UNUSED_P(end); |
910 | UNUSED_P(enc); |
911 | switch (tok) { |
912 | case XML_TOK_PROLOG_S: |
913 | return XML_ROLE_ELEMENT_NONE; |
914 | case XML_TOK_NAME: |
915 | case XML_TOK_PREFIXED_NAME: |
916 | state->handler = element1; |
917 | return XML_ROLE_ELEMENT_NAME; |
918 | } |
919 | return common(state, tok); |
920 | } |
921 | |
922 | static int PTRCALL |
923 | element1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
924 | const ENCODING *enc) { |
925 | switch (tok) { |
926 | case XML_TOK_PROLOG_S: |
927 | return XML_ROLE_ELEMENT_NONE; |
928 | case XML_TOK_NAME: |
929 | if (XmlNameMatchesAscii(enc, ptr, end, KW_EMPTY)) { |
930 | state->handler = declClose; |
931 | state->role_none = XML_ROLE_ELEMENT_NONE; |
932 | return XML_ROLE_CONTENT_EMPTY; |
933 | } |
934 | if (XmlNameMatchesAscii(enc, ptr, end, KW_ANY)) { |
935 | state->handler = declClose; |
936 | state->role_none = XML_ROLE_ELEMENT_NONE; |
937 | return XML_ROLE_CONTENT_ANY; |
938 | } |
939 | break; |
940 | case XML_TOK_OPEN_PAREN: |
941 | state->handler = element2; |
942 | state->level = 1; |
943 | return XML_ROLE_GROUP_OPEN; |
944 | } |
945 | return common(state, tok); |
946 | } |
947 | |
948 | static int PTRCALL |
949 | element2(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
950 | const ENCODING *enc) { |
951 | switch (tok) { |
952 | case XML_TOK_PROLOG_S: |
953 | return XML_ROLE_ELEMENT_NONE; |
954 | case XML_TOK_POUND_NAME: |
955 | if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end, |
956 | KW_PCDATA)) { |
957 | state->handler = element3; |
958 | return XML_ROLE_CONTENT_PCDATA; |
959 | } |
960 | break; |
961 | case XML_TOK_OPEN_PAREN: |
962 | state->level = 2; |
963 | state->handler = element6; |
964 | return XML_ROLE_GROUP_OPEN; |
965 | case XML_TOK_NAME: |
966 | case XML_TOK_PREFIXED_NAME: |
967 | state->handler = element7; |
968 | return XML_ROLE_CONTENT_ELEMENT; |
969 | case XML_TOK_NAME_QUESTION: |
970 | state->handler = element7; |
971 | return XML_ROLE_CONTENT_ELEMENT_OPT; |
972 | case XML_TOK_NAME_ASTERISK: |
973 | state->handler = element7; |
974 | return XML_ROLE_CONTENT_ELEMENT_REP; |
975 | case XML_TOK_NAME_PLUS: |
976 | state->handler = element7; |
977 | return XML_ROLE_CONTENT_ELEMENT_PLUS; |
978 | } |
979 | return common(state, tok); |
980 | } |
981 | |
982 | static int PTRCALL |
983 | element3(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
984 | const ENCODING *enc) { |
985 | UNUSED_P(ptr); |
986 | UNUSED_P(end); |
987 | UNUSED_P(enc); |
988 | switch (tok) { |
989 | case XML_TOK_PROLOG_S: |
990 | return XML_ROLE_ELEMENT_NONE; |
991 | case XML_TOK_CLOSE_PAREN: |
992 | state->handler = declClose; |
993 | state->role_none = XML_ROLE_ELEMENT_NONE; |
994 | return XML_ROLE_GROUP_CLOSE; |
995 | case XML_TOK_CLOSE_PAREN_ASTERISK: |
996 | state->handler = declClose; |
997 | state->role_none = XML_ROLE_ELEMENT_NONE; |
998 | return XML_ROLE_GROUP_CLOSE_REP; |
999 | case XML_TOK_OR: |
1000 | state->handler = element4; |
1001 | return XML_ROLE_ELEMENT_NONE; |
1002 | } |
1003 | return common(state, tok); |
1004 | } |
1005 | |
1006 | static int PTRCALL |
1007 | element4(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
1008 | const ENCODING *enc) { |
1009 | UNUSED_P(ptr); |
1010 | UNUSED_P(end); |
1011 | UNUSED_P(enc); |
1012 | switch (tok) { |
1013 | case XML_TOK_PROLOG_S: |
1014 | return XML_ROLE_ELEMENT_NONE; |
1015 | case XML_TOK_NAME: |
1016 | case XML_TOK_PREFIXED_NAME: |
1017 | state->handler = element5; |
1018 | return XML_ROLE_CONTENT_ELEMENT; |
1019 | } |
1020 | return common(state, tok); |
1021 | } |
1022 | |
1023 | static int PTRCALL |
1024 | element5(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
1025 | const ENCODING *enc) { |
1026 | UNUSED_P(ptr); |
1027 | UNUSED_P(end); |
1028 | UNUSED_P(enc); |
1029 | switch (tok) { |
1030 | case XML_TOK_PROLOG_S: |
1031 | return XML_ROLE_ELEMENT_NONE; |
1032 | case XML_TOK_CLOSE_PAREN_ASTERISK: |
1033 | state->handler = declClose; |
1034 | state->role_none = XML_ROLE_ELEMENT_NONE; |
1035 | return XML_ROLE_GROUP_CLOSE_REP; |
1036 | case XML_TOK_OR: |
1037 | state->handler = element4; |
1038 | return XML_ROLE_ELEMENT_NONE; |
1039 | } |
1040 | return common(state, tok); |
1041 | } |
1042 | |
1043 | static int PTRCALL |
1044 | element6(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
1045 | const ENCODING *enc) { |
1046 | UNUSED_P(ptr); |
1047 | UNUSED_P(end); |
1048 | UNUSED_P(enc); |
1049 | switch (tok) { |
1050 | case XML_TOK_PROLOG_S: |
1051 | return XML_ROLE_ELEMENT_NONE; |
1052 | case XML_TOK_OPEN_PAREN: |
1053 | state->level += 1; |
1054 | return XML_ROLE_GROUP_OPEN; |
1055 | case XML_TOK_NAME: |
1056 | case XML_TOK_PREFIXED_NAME: |
1057 | state->handler = element7; |
1058 | return XML_ROLE_CONTENT_ELEMENT; |
1059 | case XML_TOK_NAME_QUESTION: |
1060 | state->handler = element7; |
1061 | return XML_ROLE_CONTENT_ELEMENT_OPT; |
1062 | case XML_TOK_NAME_ASTERISK: |
1063 | state->handler = element7; |
1064 | return XML_ROLE_CONTENT_ELEMENT_REP; |
1065 | case XML_TOK_NAME_PLUS: |
1066 | state->handler = element7; |
1067 | return XML_ROLE_CONTENT_ELEMENT_PLUS; |
1068 | } |
1069 | return common(state, tok); |
1070 | } |
1071 | |
1072 | static int PTRCALL |
1073 | element7(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
1074 | const ENCODING *enc) { |
1075 | UNUSED_P(ptr); |
1076 | UNUSED_P(end); |
1077 | UNUSED_P(enc); |
1078 | switch (tok) { |
1079 | case XML_TOK_PROLOG_S: |
1080 | return XML_ROLE_ELEMENT_NONE; |
1081 | case XML_TOK_CLOSE_PAREN: |
1082 | state->level -= 1; |
1083 | if (state->level == 0) { |
1084 | state->handler = declClose; |
1085 | state->role_none = XML_ROLE_ELEMENT_NONE; |
1086 | } |
1087 | return XML_ROLE_GROUP_CLOSE; |
1088 | case XML_TOK_CLOSE_PAREN_ASTERISK: |
1089 | state->level -= 1; |
1090 | if (state->level == 0) { |
1091 | state->handler = declClose; |
1092 | state->role_none = XML_ROLE_ELEMENT_NONE; |
1093 | } |
1094 | return XML_ROLE_GROUP_CLOSE_REP; |
1095 | case XML_TOK_CLOSE_PAREN_QUESTION: |
1096 | state->level -= 1; |
1097 | if (state->level == 0) { |
1098 | state->handler = declClose; |
1099 | state->role_none = XML_ROLE_ELEMENT_NONE; |
1100 | } |
1101 | return XML_ROLE_GROUP_CLOSE_OPT; |
1102 | case XML_TOK_CLOSE_PAREN_PLUS: |
1103 | state->level -= 1; |
1104 | if (state->level == 0) { |
1105 | state->handler = declClose; |
1106 | state->role_none = XML_ROLE_ELEMENT_NONE; |
1107 | } |
1108 | return XML_ROLE_GROUP_CLOSE_PLUS; |
1109 | case XML_TOK_COMMA: |
1110 | state->handler = element6; |
1111 | return XML_ROLE_GROUP_SEQUENCE; |
1112 | case XML_TOK_OR: |
1113 | state->handler = element6; |
1114 | return XML_ROLE_GROUP_CHOICE; |
1115 | } |
1116 | return common(state, tok); |
1117 | } |
1118 | |
1119 | #ifdef XML_DTD |
1120 | |
1121 | static int PTRCALL |
1122 | condSect0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
1123 | const ENCODING *enc) { |
1124 | switch (tok) { |
1125 | case XML_TOK_PROLOG_S: |
1126 | return XML_ROLE_NONE; |
1127 | case XML_TOK_NAME: |
1128 | if (XmlNameMatchesAscii(enc, ptr, end, KW_INCLUDE)) { |
1129 | state->handler = condSect1; |
1130 | return XML_ROLE_NONE; |
1131 | } |
1132 | if (XmlNameMatchesAscii(enc, ptr, end, KW_IGNORE)) { |
1133 | state->handler = condSect2; |
1134 | return XML_ROLE_NONE; |
1135 | } |
1136 | break; |
1137 | } |
1138 | return common(state, tok); |
1139 | } |
1140 | |
1141 | static int PTRCALL |
1142 | condSect1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
1143 | const ENCODING *enc) { |
1144 | UNUSED_P(ptr); |
1145 | UNUSED_P(end); |
1146 | UNUSED_P(enc); |
1147 | switch (tok) { |
1148 | case XML_TOK_PROLOG_S: |
1149 | return XML_ROLE_NONE; |
1150 | case XML_TOK_OPEN_BRACKET: |
1151 | state->handler = externalSubset1; |
1152 | state->includeLevel += 1; |
1153 | return XML_ROLE_NONE; |
1154 | } |
1155 | return common(state, tok); |
1156 | } |
1157 | |
1158 | static int PTRCALL |
1159 | condSect2(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
1160 | const ENCODING *enc) { |
1161 | UNUSED_P(ptr); |
1162 | UNUSED_P(end); |
1163 | UNUSED_P(enc); |
1164 | switch (tok) { |
1165 | case XML_TOK_PROLOG_S: |
1166 | return XML_ROLE_NONE; |
1167 | case XML_TOK_OPEN_BRACKET: |
1168 | state->handler = externalSubset1; |
1169 | return XML_ROLE_IGNORE_SECT; |
1170 | } |
1171 | return common(state, tok); |
1172 | } |
1173 | |
1174 | #endif /* XML_DTD */ |
1175 | |
1176 | static int PTRCALL |
1177 | declClose(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
1178 | const ENCODING *enc) { |
1179 | UNUSED_P(ptr); |
1180 | UNUSED_P(end); |
1181 | UNUSED_P(enc); |
1182 | switch (tok) { |
1183 | case XML_TOK_PROLOG_S: |
1184 | return state->role_none; |
1185 | case XML_TOK_DECL_CLOSE: |
1186 | setTopLevel(state); |
1187 | return state->role_none; |
1188 | } |
1189 | return common(state, tok); |
1190 | } |
1191 | |
1192 | /* This function will only be invoked if the internal logic of the |
1193 | * parser has broken down. It is used in two cases: |
1194 | * |
1195 | * 1: When the XML prolog has been finished. At this point the |
1196 | * processor (the parser level above these role handlers) should |
1197 | * switch from prologProcessor to contentProcessor and reinitialise |
1198 | * the handler function. |
1199 | * |
1200 | * 2: When an error has been detected (via common() below). At this |
1201 | * point again the processor should be switched to errorProcessor, |
1202 | * which will never call a handler. |
1203 | * |
1204 | * The result of this is that error() can only be called if the |
1205 | * processor switch failed to happen, which is an internal error and |
1206 | * therefore we shouldn't be able to provoke it simply by using the |
1207 | * library. It is a necessary backstop, however, so we merely exclude |
1208 | * it from the coverage statistics. |
1209 | * |
1210 | * LCOV_EXCL_START |
1211 | */ |
1212 | static int PTRCALL |
1213 | error(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
1214 | const ENCODING *enc) { |
1215 | UNUSED_P(state); |
1216 | UNUSED_P(tok); |
1217 | UNUSED_P(ptr); |
1218 | UNUSED_P(end); |
1219 | UNUSED_P(enc); |
1220 | return XML_ROLE_NONE; |
1221 | } |
1222 | /* LCOV_EXCL_STOP */ |
1223 | |
1224 | static int FASTCALL |
1225 | common(PROLOG_STATE *state, int tok) { |
1226 | #ifdef XML_DTD |
1227 | if (! state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF) |
1228 | return XML_ROLE_INNER_PARAM_ENTITY_REF; |
1229 | #else |
1230 | UNUSED_P(tok); |
1231 | #endif |
1232 | state->handler = error; |
1233 | return XML_ROLE_ERROR; |
1234 | } |
1235 | |
1236 | void |
1237 | XmlPrologStateInit(PROLOG_STATE *state) { |
1238 | state->handler = prolog0; |
1239 | #ifdef XML_DTD |
1240 | state->documentEntity = 1; |
1241 | state->includeLevel = 0; |
1242 | state->inEntityValue = 0; |
1243 | #endif /* XML_DTD */ |
1244 | } |
1245 | |
1246 | #ifdef XML_DTD |
1247 | |
1248 | void |
1249 | XmlPrologStateInitExternalEntity(PROLOG_STATE *state) { |
1250 | state->handler = externalSubset0; |
1251 | state->documentEntity = 0; |
1252 | state->includeLevel = 0; |
1253 | } |
1254 | |
1255 | #endif /* XML_DTD */ |
1256 | |