1 | /* |
2 | * _codecs_jp.c: Codecs collection for Japanese encodings |
3 | * |
4 | * Written by Hye-Shik Chang <[email protected]> |
5 | */ |
6 | |
7 | #define USING_BINARY_PAIR_SEARCH |
8 | #define EMPBASE 0x20000 |
9 | |
10 | #include "cjkcodecs.h" |
11 | #include "mappings_jp.h" |
12 | #include "mappings_jisx0213_pair.h" |
13 | #include "alg_jisx0201.h" |
14 | #include "emu_jisx0213_2000.h" |
15 | |
16 | /* |
17 | * CP932 codec |
18 | */ |
19 | |
20 | ENCODER(cp932) |
21 | { |
22 | while (*inpos < inlen) { |
23 | Py_UCS4 c = INCHAR1; |
24 | DBCHAR code; |
25 | unsigned char c1, c2; |
26 | |
27 | if (c <= 0x80) { |
28 | WRITEBYTE1((unsigned char)c); |
29 | NEXT(1, 1); |
30 | continue; |
31 | } |
32 | else if (c >= 0xff61 && c <= 0xff9f) { |
33 | WRITEBYTE1(c - 0xfec0); |
34 | NEXT(1, 1); |
35 | continue; |
36 | } |
37 | else if (c >= 0xf8f0 && c <= 0xf8f3) { |
38 | /* Windows compatibility */ |
39 | REQUIRE_OUTBUF(1); |
40 | if (c == 0xf8f0) |
41 | OUTBYTE1(0xa0); |
42 | else |
43 | OUTBYTE1(c - 0xf8f1 + 0xfd); |
44 | NEXT(1, 1); |
45 | continue; |
46 | } |
47 | |
48 | if (c > 0xFFFF) |
49 | return 1; |
50 | REQUIRE_OUTBUF(2); |
51 | |
52 | if (TRYMAP_ENC(cp932ext, code, c)) { |
53 | OUTBYTE1(code >> 8); |
54 | OUTBYTE2(code & 0xff); |
55 | } |
56 | else if (TRYMAP_ENC(jisxcommon, code, c)) { |
57 | if (code & 0x8000) /* MSB set: JIS X 0212 */ |
58 | return 1; |
59 | |
60 | /* JIS X 0208 */ |
61 | c1 = code >> 8; |
62 | c2 = code & 0xff; |
63 | c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21); |
64 | c1 = (c1 - 0x21) >> 1; |
65 | OUTBYTE1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1); |
66 | OUTBYTE2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41); |
67 | } |
68 | else if (c >= 0xe000 && c < 0xe758) { |
69 | /* User-defined area */ |
70 | c1 = (Py_UCS4)(c - 0xe000) / 188; |
71 | c2 = (Py_UCS4)(c - 0xe000) % 188; |
72 | OUTBYTE1(c1 + 0xf0); |
73 | OUTBYTE2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41); |
74 | } |
75 | else |
76 | return 1; |
77 | |
78 | NEXT(1, 2); |
79 | } |
80 | |
81 | return 0; |
82 | } |
83 | |
84 | DECODER(cp932) |
85 | { |
86 | while (inleft > 0) { |
87 | unsigned char c = INBYTE1, c2; |
88 | Py_UCS4 decoded; |
89 | |
90 | if (c <= 0x80) { |
91 | OUTCHAR(c); |
92 | NEXT_IN(1); |
93 | continue; |
94 | } |
95 | else if (c >= 0xa0 && c <= 0xdf) { |
96 | if (c == 0xa0) |
97 | OUTCHAR(0xf8f0); /* half-width katakana */ |
98 | else |
99 | OUTCHAR(0xfec0 + c); |
100 | NEXT_IN(1); |
101 | continue; |
102 | } |
103 | else if (c >= 0xfd/* && c <= 0xff*/) { |
104 | /* Windows compatibility */ |
105 | OUTCHAR(0xf8f1 - 0xfd + c); |
106 | NEXT_IN(1); |
107 | continue; |
108 | } |
109 | |
110 | REQUIRE_INBUF(2); |
111 | c2 = INBYTE2; |
112 | |
113 | if (TRYMAP_DEC(cp932ext, decoded, c, c2)) |
114 | OUTCHAR(decoded); |
115 | else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){ |
116 | if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc) |
117 | return 1; |
118 | |
119 | c = (c < 0xe0 ? c - 0x81 : c - 0xc1); |
120 | c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41); |
121 | c = (2 * c + (c2 < 0x5e ? 0 : 1) + 0x21); |
122 | c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21; |
123 | |
124 | if (TRYMAP_DEC(jisx0208, decoded, c, c2)) |
125 | OUTCHAR(decoded); |
126 | else |
127 | return 1; |
128 | } |
129 | else if (c >= 0xf0 && c <= 0xf9) { |
130 | if ((c2 >= 0x40 && c2 <= 0x7e) || |
131 | (c2 >= 0x80 && c2 <= 0xfc)) |
132 | OUTCHAR(0xe000 + 188 * (c - 0xf0) + |
133 | (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41)); |
134 | else |
135 | return 1; |
136 | } |
137 | else |
138 | return 1; |
139 | |
140 | NEXT_IN(2); |
141 | } |
142 | |
143 | return 0; |
144 | } |
145 | |
146 | |
147 | /* |
148 | * EUC-JIS-2004 codec |
149 | */ |
150 | |
151 | ENCODER(euc_jis_2004) |
152 | { |
153 | while (*inpos < inlen) { |
154 | Py_UCS4 c = INCHAR1; |
155 | DBCHAR code; |
156 | Py_ssize_t insize; |
157 | |
158 | if (c < 0x80) { |
159 | WRITEBYTE1(c); |
160 | NEXT(1, 1); |
161 | continue; |
162 | } |
163 | |
164 | insize = 1; |
165 | |
166 | if (c <= 0xFFFF) { |
167 | EMULATE_JISX0213_2000_ENCODE_BMP(code, c) |
168 | else if (TRYMAP_ENC(jisx0213_bmp, code, c)) { |
169 | if (code == MULTIC) { |
170 | if (inlen - *inpos < 2) { |
171 | if (flags & MBENC_FLUSH) { |
172 | code = find_pairencmap( |
173 | (ucs2_t)c, 0, |
174 | jisx0213_pair_encmap, |
175 | JISX0213_ENCPAIRS); |
176 | if (code == DBCINV) |
177 | return 1; |
178 | } |
179 | else |
180 | return MBERR_TOOFEW; |
181 | } |
182 | else { |
183 | Py_UCS4 c2 = INCHAR2; |
184 | code = find_pairencmap( |
185 | (ucs2_t)c, c2, |
186 | jisx0213_pair_encmap, |
187 | JISX0213_ENCPAIRS); |
188 | if (code == DBCINV) { |
189 | code = find_pairencmap( |
190 | (ucs2_t)c, 0, |
191 | jisx0213_pair_encmap, |
192 | JISX0213_ENCPAIRS); |
193 | if (code == DBCINV) |
194 | return 1; |
195 | } else |
196 | insize = 2; |
197 | } |
198 | } |
199 | } |
200 | else if (TRYMAP_ENC(jisxcommon, code, c)) |
201 | ; |
202 | else if (c >= 0xff61 && c <= 0xff9f) { |
203 | /* JIS X 0201 half-width katakana */ |
204 | WRITEBYTE2(0x8e, c - 0xfec0); |
205 | NEXT(1, 2); |
206 | continue; |
207 | } |
208 | else if (c == 0xff3c) |
209 | /* F/W REVERSE SOLIDUS (see NOTES) */ |
210 | code = 0x2140; |
211 | else if (c == 0xff5e) |
212 | /* F/W TILDE (see NOTES) */ |
213 | code = 0x2232; |
214 | else |
215 | return 1; |
216 | } |
217 | else if (c >> 16 == EMPBASE >> 16) { |
218 | EMULATE_JISX0213_2000_ENCODE_EMP(code, c) |
219 | else if (TRYMAP_ENC(jisx0213_emp, code, c & 0xffff)) |
220 | ; |
221 | else |
222 | return insize; |
223 | } |
224 | else |
225 | return insize; |
226 | |
227 | if (code & 0x8000) { |
228 | /* Codeset 2 */ |
229 | WRITEBYTE3(0x8f, code >> 8, (code & 0xFF) | 0x80); |
230 | NEXT(insize, 3); |
231 | } else { |
232 | /* Codeset 1 */ |
233 | WRITEBYTE2((code >> 8) | 0x80, (code & 0xFF) | 0x80); |
234 | NEXT(insize, 2); |
235 | } |
236 | } |
237 | |
238 | return 0; |
239 | } |
240 | |
241 | DECODER(euc_jis_2004) |
242 | { |
243 | while (inleft > 0) { |
244 | unsigned char c = INBYTE1; |
245 | Py_UCS4 code, decoded; |
246 | |
247 | if (c < 0x80) { |
248 | OUTCHAR(c); |
249 | NEXT_IN(1); |
250 | continue; |
251 | } |
252 | |
253 | if (c == 0x8e) { |
254 | /* JIS X 0201 half-width katakana */ |
255 | unsigned char c2; |
256 | |
257 | REQUIRE_INBUF(2); |
258 | c2 = INBYTE2; |
259 | if (c2 >= 0xa1 && c2 <= 0xdf) { |
260 | OUTCHAR(0xfec0 + c2); |
261 | NEXT_IN(2); |
262 | } |
263 | else |
264 | return 1; |
265 | } |
266 | else if (c == 0x8f) { |
267 | unsigned char c2, c3; |
268 | |
269 | REQUIRE_INBUF(3); |
270 | c2 = INBYTE2 ^ 0x80; |
271 | c3 = INBYTE3 ^ 0x80; |
272 | |
273 | /* JIS X 0213 Plane 2 or JIS X 0212 (see NOTES) */ |
274 | EMULATE_JISX0213_2000_DECODE_PLANE2(writer, c2, c3) |
275 | else if (TRYMAP_DEC(jisx0213_2_bmp, decoded, c2, c3)) |
276 | OUTCHAR(decoded); |
277 | else if (TRYMAP_DEC(jisx0213_2_emp, code, c2, c3)) { |
278 | OUTCHAR(EMPBASE | code); |
279 | NEXT_IN(3); |
280 | continue; |
281 | } |
282 | else if (TRYMAP_DEC(jisx0212, decoded, c2, c3)) |
283 | OUTCHAR(decoded); |
284 | else |
285 | return 1; |
286 | NEXT_IN(3); |
287 | } |
288 | else { |
289 | unsigned char c2; |
290 | |
291 | REQUIRE_INBUF(2); |
292 | c ^= 0x80; |
293 | c2 = INBYTE2 ^ 0x80; |
294 | |
295 | /* JIS X 0213 Plane 1 */ |
296 | EMULATE_JISX0213_2000_DECODE_PLANE1(writer, c, c2) |
297 | else if (c == 0x21 && c2 == 0x40) |
298 | OUTCHAR(0xff3c); |
299 | else if (c == 0x22 && c2 == 0x32) |
300 | OUTCHAR(0xff5e); |
301 | else if (TRYMAP_DEC(jisx0208, decoded, c, c2)) |
302 | OUTCHAR(decoded); |
303 | else if (TRYMAP_DEC(jisx0213_1_bmp, decoded, c, c2)) |
304 | OUTCHAR(decoded); |
305 | else if (TRYMAP_DEC(jisx0213_1_emp, code, c, c2)) { |
306 | OUTCHAR(EMPBASE | code); |
307 | NEXT_IN(2); |
308 | continue; |
309 | } |
310 | else if (TRYMAP_DEC(jisx0213_pair, code, c, c2)) { |
311 | OUTCHAR2(code >> 16, code & 0xffff); |
312 | NEXT_IN(2); |
313 | continue; |
314 | } |
315 | else |
316 | return 1; |
317 | NEXT_IN(2); |
318 | } |
319 | } |
320 | |
321 | return 0; |
322 | } |
323 | |
324 | |
325 | /* |
326 | * EUC-JP codec |
327 | */ |
328 | |
329 | ENCODER(euc_jp) |
330 | { |
331 | while (*inpos < inlen) { |
332 | Py_UCS4 c = INCHAR1; |
333 | DBCHAR code; |
334 | |
335 | if (c < 0x80) { |
336 | WRITEBYTE1((unsigned char)c); |
337 | NEXT(1, 1); |
338 | continue; |
339 | } |
340 | |
341 | if (c > 0xFFFF) |
342 | return 1; |
343 | |
344 | if (TRYMAP_ENC(jisxcommon, code, c)) |
345 | ; |
346 | else if (c >= 0xff61 && c <= 0xff9f) { |
347 | /* JIS X 0201 half-width katakana */ |
348 | WRITEBYTE2(0x8e, c - 0xfec0); |
349 | NEXT(1, 2); |
350 | continue; |
351 | } |
352 | #ifndef STRICT_BUILD |
353 | else if (c == 0xff3c) /* FULL-WIDTH REVERSE SOLIDUS */ |
354 | code = 0x2140; |
355 | else if (c == 0xa5) { /* YEN SIGN */ |
356 | WRITEBYTE1(0x5c); |
357 | NEXT(1, 1); |
358 | continue; |
359 | } else if (c == 0x203e) { /* OVERLINE */ |
360 | WRITEBYTE1(0x7e); |
361 | NEXT(1, 1); |
362 | continue; |
363 | } |
364 | #endif |
365 | else |
366 | return 1; |
367 | |
368 | if (code & 0x8000) { |
369 | /* JIS X 0212 */ |
370 | WRITEBYTE3(0x8f, code >> 8, (code & 0xFF) | 0x80); |
371 | NEXT(1, 3); |
372 | } else { |
373 | /* JIS X 0208 */ |
374 | WRITEBYTE2((code >> 8) | 0x80, (code & 0xFF) | 0x80); |
375 | NEXT(1, 2); |
376 | } |
377 | } |
378 | |
379 | return 0; |
380 | } |
381 | |
382 | DECODER(euc_jp) |
383 | { |
384 | while (inleft > 0) { |
385 | unsigned char c = INBYTE1; |
386 | Py_UCS4 decoded; |
387 | |
388 | if (c < 0x80) { |
389 | OUTCHAR(c); |
390 | NEXT_IN(1); |
391 | continue; |
392 | } |
393 | |
394 | if (c == 0x8e) { |
395 | /* JIS X 0201 half-width katakana */ |
396 | unsigned char c2; |
397 | |
398 | REQUIRE_INBUF(2); |
399 | c2 = INBYTE2; |
400 | if (c2 >= 0xa1 && c2 <= 0xdf) { |
401 | OUTCHAR(0xfec0 + c2); |
402 | NEXT_IN(2); |
403 | } |
404 | else |
405 | return 1; |
406 | } |
407 | else if (c == 0x8f) { |
408 | unsigned char c2, c3; |
409 | |
410 | REQUIRE_INBUF(3); |
411 | c2 = INBYTE2; |
412 | c3 = INBYTE3; |
413 | /* JIS X 0212 */ |
414 | if (TRYMAP_DEC(jisx0212, decoded, c2 ^ 0x80, c3 ^ 0x80)) { |
415 | OUTCHAR(decoded); |
416 | NEXT_IN(3); |
417 | } |
418 | else |
419 | return 1; |
420 | } |
421 | else { |
422 | unsigned char c2; |
423 | |
424 | REQUIRE_INBUF(2); |
425 | c2 = INBYTE2; |
426 | /* JIS X 0208 */ |
427 | #ifndef STRICT_BUILD |
428 | if (c == 0xa1 && c2 == 0xc0) |
429 | /* FULL-WIDTH REVERSE SOLIDUS */ |
430 | OUTCHAR(0xff3c); |
431 | else |
432 | #endif |
433 | if (TRYMAP_DEC(jisx0208, decoded, c ^ 0x80, c2 ^ 0x80)) |
434 | OUTCHAR(decoded); |
435 | else |
436 | return 1; |
437 | NEXT_IN(2); |
438 | } |
439 | } |
440 | |
441 | return 0; |
442 | } |
443 | |
444 | |
445 | /* |
446 | * SHIFT_JIS codec |
447 | */ |
448 | |
449 | ENCODER(shift_jis) |
450 | { |
451 | while (*inpos < inlen) { |
452 | Py_UCS4 c = INCHAR1; |
453 | DBCHAR code; |
454 | unsigned char c1, c2; |
455 | |
456 | #ifdef STRICT_BUILD |
457 | JISX0201_R_ENCODE(c, code) |
458 | #else |
459 | if (c < 0x80) |
460 | code = c; |
461 | else if (c == 0x00a5) |
462 | code = 0x5c; /* YEN SIGN */ |
463 | else if (c == 0x203e) |
464 | code = 0x7e; /* OVERLINE */ |
465 | #endif |
466 | else JISX0201_K_ENCODE(c, code) |
467 | else if (c > 0xFFFF) |
468 | return 1; |
469 | else |
470 | code = NOCHAR; |
471 | |
472 | if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) { |
473 | REQUIRE_OUTBUF(1); |
474 | |
475 | OUTBYTE1((unsigned char)code); |
476 | NEXT(1, 1); |
477 | continue; |
478 | } |
479 | |
480 | REQUIRE_OUTBUF(2); |
481 | |
482 | if (code == NOCHAR) { |
483 | if (TRYMAP_ENC(jisxcommon, code, c)) |
484 | ; |
485 | #ifndef STRICT_BUILD |
486 | else if (c == 0xff3c) |
487 | code = 0x2140; /* FULL-WIDTH REVERSE SOLIDUS */ |
488 | #endif |
489 | else |
490 | return 1; |
491 | |
492 | if (code & 0x8000) /* MSB set: JIS X 0212 */ |
493 | return 1; |
494 | } |
495 | |
496 | c1 = code >> 8; |
497 | c2 = code & 0xff; |
498 | c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21); |
499 | c1 = (c1 - 0x21) >> 1; |
500 | OUTBYTE1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1); |
501 | OUTBYTE2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41); |
502 | NEXT(1, 2); |
503 | } |
504 | |
505 | return 0; |
506 | } |
507 | |
508 | DECODER(shift_jis) |
509 | { |
510 | while (inleft > 0) { |
511 | unsigned char c = INBYTE1; |
512 | Py_UCS4 decoded; |
513 | |
514 | #ifdef STRICT_BUILD |
515 | JISX0201_R_DECODE(c, writer) |
516 | #else |
517 | if (c < 0x80) |
518 | OUTCHAR(c); |
519 | #endif |
520 | else JISX0201_K_DECODE(c, writer) |
521 | else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){ |
522 | unsigned char c1, c2; |
523 | |
524 | REQUIRE_INBUF(2); |
525 | c2 = INBYTE2; |
526 | if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc) |
527 | return 1; |
528 | |
529 | c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1); |
530 | c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41); |
531 | c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1) + 0x21); |
532 | c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21; |
533 | |
534 | #ifndef STRICT_BUILD |
535 | if (c1 == 0x21 && c2 == 0x40) { |
536 | /* FULL-WIDTH REVERSE SOLIDUS */ |
537 | OUTCHAR(0xff3c); |
538 | NEXT_IN(2); |
539 | continue; |
540 | } |
541 | #endif |
542 | if (TRYMAP_DEC(jisx0208, decoded, c1, c2)) { |
543 | OUTCHAR(decoded); |
544 | NEXT_IN(2); |
545 | continue; |
546 | } |
547 | else |
548 | return 1; |
549 | } |
550 | else |
551 | return 1; |
552 | |
553 | NEXT_IN(1); /* JIS X 0201 */ |
554 | } |
555 | |
556 | return 0; |
557 | } |
558 | |
559 | |
560 | /* |
561 | * SHIFT_JIS-2004 codec |
562 | */ |
563 | |
564 | ENCODER(shift_jis_2004) |
565 | { |
566 | while (*inpos < inlen) { |
567 | Py_UCS4 c = INCHAR1; |
568 | DBCHAR code = NOCHAR; |
569 | int c1, c2; |
570 | Py_ssize_t insize; |
571 | |
572 | JISX0201_ENCODE(c, code) |
573 | |
574 | if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) { |
575 | WRITEBYTE1((unsigned char)code); |
576 | NEXT(1, 1); |
577 | continue; |
578 | } |
579 | |
580 | REQUIRE_OUTBUF(2); |
581 | insize = 1; |
582 | |
583 | if (code == NOCHAR) { |
584 | if (c <= 0xffff) { |
585 | EMULATE_JISX0213_2000_ENCODE_BMP(code, c) |
586 | else if (TRYMAP_ENC(jisx0213_bmp, code, c)) { |
587 | if (code == MULTIC) { |
588 | if (inlen - *inpos < 2) { |
589 | if (flags & MBENC_FLUSH) { |
590 | code = find_pairencmap |
591 | ((ucs2_t)c, 0, |
592 | jisx0213_pair_encmap, |
593 | JISX0213_ENCPAIRS); |
594 | if (code == DBCINV) |
595 | return 1; |
596 | } |
597 | else |
598 | return MBERR_TOOFEW; |
599 | } |
600 | else { |
601 | Py_UCS4 ch2 = INCHAR2; |
602 | code = find_pairencmap( |
603 | (ucs2_t)c, ch2, |
604 | jisx0213_pair_encmap, |
605 | JISX0213_ENCPAIRS); |
606 | if (code == DBCINV) { |
607 | code = find_pairencmap( |
608 | (ucs2_t)c, 0, |
609 | jisx0213_pair_encmap, |
610 | JISX0213_ENCPAIRS); |
611 | if (code == DBCINV) |
612 | return 1; |
613 | } |
614 | else |
615 | insize = 2; |
616 | } |
617 | } |
618 | } |
619 | else if (TRYMAP_ENC(jisxcommon, code, c)) { |
620 | /* abandon JIS X 0212 codes */ |
621 | if (code & 0x8000) |
622 | return 1; |
623 | } |
624 | else |
625 | return 1; |
626 | } |
627 | else if (c >> 16 == EMPBASE >> 16) { |
628 | EMULATE_JISX0213_2000_ENCODE_EMP(code, c) |
629 | else if (TRYMAP_ENC(jisx0213_emp, code, c&0xffff)) |
630 | ; |
631 | else |
632 | return insize; |
633 | } |
634 | else |
635 | return insize; |
636 | } |
637 | |
638 | c1 = code >> 8; |
639 | c2 = (code & 0xff) - 0x21; |
640 | |
641 | if (c1 & 0x80) { |
642 | /* Plane 2 */ |
643 | if (c1 >= 0xee) |
644 | c1 -= 0x87; |
645 | else if (c1 >= 0xac || c1 == 0xa8) |
646 | c1 -= 0x49; |
647 | else |
648 | c1 -= 0x43; |
649 | } |
650 | else { |
651 | /* Plane 1 */ |
652 | c1 -= 0x21; |
653 | } |
654 | |
655 | if (c1 & 1) |
656 | c2 += 0x5e; |
657 | c1 >>= 1; |
658 | OUTBYTE1(c1 + (c1 < 0x1f ? 0x81 : 0xc1)); |
659 | OUTBYTE2(c2 + (c2 < 0x3f ? 0x40 : 0x41)); |
660 | |
661 | NEXT(insize, 2); |
662 | } |
663 | |
664 | return 0; |
665 | } |
666 | |
667 | DECODER(shift_jis_2004) |
668 | { |
669 | while (inleft > 0) { |
670 | unsigned char c = INBYTE1; |
671 | |
672 | JISX0201_DECODE(c, writer) |
673 | else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc)){ |
674 | unsigned char c1, c2; |
675 | Py_UCS4 code, decoded; |
676 | |
677 | REQUIRE_INBUF(2); |
678 | c2 = INBYTE2; |
679 | if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc) |
680 | return 1; |
681 | |
682 | c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1); |
683 | c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41); |
684 | c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1)); |
685 | c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21; |
686 | |
687 | if (c1 < 0x5e) { /* Plane 1 */ |
688 | c1 += 0x21; |
689 | EMULATE_JISX0213_2000_DECODE_PLANE1(writer, |
690 | c1, c2) |
691 | else if (TRYMAP_DEC(jisx0208, decoded, c1, c2)) |
692 | OUTCHAR(decoded); |
693 | else if (TRYMAP_DEC(jisx0213_1_bmp, decoded, c1, c2)) |
694 | OUTCHAR(decoded); |
695 | else if (TRYMAP_DEC(jisx0213_1_emp, code, c1, c2)) |
696 | OUTCHAR(EMPBASE | code); |
697 | else if (TRYMAP_DEC(jisx0213_pair, code, c1, c2)) |
698 | OUTCHAR2(code >> 16, code & 0xffff); |
699 | else |
700 | return 1; |
701 | NEXT_IN(2); |
702 | } |
703 | else { /* Plane 2 */ |
704 | if (c1 >= 0x67) |
705 | c1 += 0x07; |
706 | else if (c1 >= 0x63 || c1 == 0x5f) |
707 | c1 -= 0x37; |
708 | else |
709 | c1 -= 0x3d; |
710 | |
711 | EMULATE_JISX0213_2000_DECODE_PLANE2(writer, |
712 | c1, c2) |
713 | else if (TRYMAP_DEC(jisx0213_2_bmp, decoded, c1, c2)) |
714 | OUTCHAR(decoded); |
715 | else if (TRYMAP_DEC(jisx0213_2_emp, code, c1, c2)) { |
716 | OUTCHAR(EMPBASE | code); |
717 | NEXT_IN(2); |
718 | continue; |
719 | } |
720 | else |
721 | return 1; |
722 | NEXT_IN(2); |
723 | } |
724 | continue; |
725 | } |
726 | else |
727 | return 1; |
728 | |
729 | NEXT_IN(1); /* JIS X 0201 */ |
730 | } |
731 | |
732 | return 0; |
733 | } |
734 | |
735 | |
736 | BEGIN_MAPPINGS_LIST |
737 | MAPPING_DECONLY(jisx0208) |
738 | MAPPING_DECONLY(jisx0212) |
739 | MAPPING_ENCONLY(jisxcommon) |
740 | MAPPING_DECONLY(jisx0213_1_bmp) |
741 | MAPPING_DECONLY(jisx0213_2_bmp) |
742 | MAPPING_ENCONLY(jisx0213_bmp) |
743 | MAPPING_DECONLY(jisx0213_1_emp) |
744 | MAPPING_DECONLY(jisx0213_2_emp) |
745 | MAPPING_ENCONLY(jisx0213_emp) |
746 | MAPPING_ENCDEC(jisx0213_pair) |
747 | MAPPING_ENCDEC(cp932ext) |
748 | END_MAPPINGS_LIST |
749 | |
750 | BEGIN_CODECS_LIST |
751 | CODEC_STATELESS(shift_jis) |
752 | CODEC_STATELESS(cp932) |
753 | CODEC_STATELESS(euc_jp) |
754 | CODEC_STATELESS(shift_jis_2004) |
755 | CODEC_STATELESS(euc_jis_2004) |
756 | { "euc_jisx0213" , (void *)2000, NULL, _STATELESS_METHODS(euc_jis_2004) }, |
757 | { "shift_jisx0213" , (void *)2000, NULL, _STATELESS_METHODS(shift_jis_2004) }, |
758 | END_CODECS_LIST |
759 | |
760 | I_AM_A_MODULE_FOR(jp) |
761 | |