1/*
2 * _codecs_hk.c: Codecs collection for encodings from Hong Kong
3 *
4 * Written by Hye-Shik Chang <[email protected]>
5 */
6
7#define USING_IMPORTED_MAPS
8
9#include "cjkcodecs.h"
10#include "mappings_hk.h"
11
12/*
13 * BIG5HKSCS codec
14 */
15
16static const encode_map *big5_encmap = NULL;
17static const decode_map *big5_decmap = NULL;
18
19CODEC_INIT(big5hkscs)
20{
21 static int initialized = 0;
22
23 if (!initialized && IMPORT_MAP(tw, big5, &big5_encmap, &big5_decmap))
24 return -1;
25 initialized = 1;
26 return 0;
27}
28
29/*
30 * There are four possible pair unicode -> big5hkscs maps as in HKSCS 2004:
31 * U+00CA U+0304 -> 8862 (U+00CA alone is mapped to 8866)
32 * U+00CA U+030C -> 8864
33 * U+00EA U+0304 -> 88a3 (U+00EA alone is mapped to 88a7)
34 * U+00EA U+030C -> 88a5
35 * These are handled by not mapping tables but a hand-written code.
36 */
37static const DBCHAR big5hkscs_pairenc_table[4] = {0x8862, 0x8864, 0x88a3, 0x88a5};
38
39ENCODER(big5hkscs)
40{
41 while (*inpos < inlen) {
42 Py_UCS4 c = INCHAR1;
43 DBCHAR code;
44 Py_ssize_t insize;
45
46 if (c < 0x80) {
47 REQUIRE_OUTBUF(1);
48 **outbuf = (unsigned char)c;
49 NEXT(1, 1);
50 continue;
51 }
52
53 insize = 1;
54 REQUIRE_OUTBUF(2);
55
56 if (c < 0x10000) {
57 if (TRYMAP_ENC(big5hkscs_bmp, code, c)) {
58 if (code == MULTIC) {
59 Py_UCS4 c2;
60 if (inlen - *inpos >= 2)
61 c2 = INCHAR2;
62 else
63 c2 = 0;
64
65 if (inlen - *inpos >= 2 &&
66 ((c & 0xffdf) == 0x00ca) &&
67 ((c2 & 0xfff7) == 0x0304)) {
68 code = big5hkscs_pairenc_table[
69 ((c >> 4) |
70 (c2 >> 3)) & 3];
71 insize = 2;
72 }
73 else if (inlen - *inpos < 2 &&
74 !(flags & MBENC_FLUSH))
75 return MBERR_TOOFEW;
76 else {
77 if (c == 0xca)
78 code = 0x8866;
79 else /* c == 0xea */
80 code = 0x88a7;
81 }
82 }
83 }
84 else if (TRYMAP_ENC(big5, code, c))
85 ;
86 else
87 return 1;
88 }
89 else if (c < 0x20000)
90 return insize;
91 else if (c < 0x30000) {
92 if (TRYMAP_ENC(big5hkscs_nonbmp, code, c & 0xffff))
93 ;
94 else
95 return insize;
96 }
97 else
98 return insize;
99
100 OUTBYTE1(code >> 8);
101 OUTBYTE2(code & 0xFF);
102 NEXT(insize, 2);
103 }
104
105 return 0;
106}
107
108#define BH2S(c1, c2) (((c1) - 0x87) * (0xfe - 0x40 + 1) + ((c2) - 0x40))
109
110DECODER(big5hkscs)
111{
112 while (inleft > 0) {
113 unsigned char c = INBYTE1;
114 Py_UCS4 decoded;
115
116 if (c < 0x80) {
117 OUTCHAR(c);
118 NEXT_IN(1);
119 continue;
120 }
121
122 REQUIRE_INBUF(2);
123
124 if (0xc6 > c || c > 0xc8 || (c < 0xc7 && INBYTE2 < 0xa1)) {
125 if (TRYMAP_DEC(big5, decoded, c, INBYTE2)) {
126 OUTCHAR(decoded);
127 NEXT_IN(2);
128 continue;
129 }
130 }
131
132 if (TRYMAP_DEC(big5hkscs, decoded, c, INBYTE2))
133 {
134 int s = BH2S(c, INBYTE2);
135 const unsigned char *hintbase;
136
137 assert(0x87 <= c && c <= 0xfe);
138 assert(0x40 <= INBYTE2 && INBYTE2 <= 0xfe);
139
140 if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) {
141 hintbase = big5hkscs_phint_0;
142 s -= BH2S(0x87, 0x40);
143 }
144 else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){
145 hintbase = big5hkscs_phint_12130;
146 s -= BH2S(0xc6, 0xa1);
147 }
148 else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){
149 hintbase = big5hkscs_phint_21924;
150 s -= BH2S(0xf9, 0xd6);
151 }
152 else
153 return MBERR_INTERNAL;
154
155 if (hintbase[s >> 3] & (1 << (s & 7))) {
156 OUTCHAR(decoded | 0x20000);
157 NEXT_IN(2);
158 }
159 else {
160 OUTCHAR(decoded);
161 NEXT_IN(2);
162 }
163 continue;
164 }
165
166 switch ((c << 8) | INBYTE2) {
167 case 0x8862: OUTCHAR2(0x00ca, 0x0304); break;
168 case 0x8864: OUTCHAR2(0x00ca, 0x030c); break;
169 case 0x88a3: OUTCHAR2(0x00ea, 0x0304); break;
170 case 0x88a5: OUTCHAR2(0x00ea, 0x030c); break;
171 default: return 1;
172 }
173
174 NEXT_IN(2); /* all decoded code points are pairs, above. */
175 }
176
177 return 0;
178}
179
180
181BEGIN_MAPPINGS_LIST
182 MAPPING_DECONLY(big5hkscs)
183 MAPPING_ENCONLY(big5hkscs_bmp)
184 MAPPING_ENCONLY(big5hkscs_nonbmp)
185END_MAPPINGS_LIST
186
187BEGIN_CODECS_LIST
188 CODEC_STATELESS_WINIT(big5hkscs)
189END_CODECS_LIST
190
191I_AM_A_MODULE_FOR(hk)
192