|
34 | 34 | #include "unicode_table_jis.h" |
35 | 35 |
|
36 | 36 | static int mbfl_filt_conv_eucjp_wchar_flush(mbfl_convert_filter *filter); |
| 37 | +static size_t mb_eucjp_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); |
| 38 | +static void mb_wchar_to_eucjp(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); |
37 | 39 |
|
38 | 40 | const unsigned char mblen_table_eucjp[] = { /* 0xA1-0xFE */ |
39 | 41 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
@@ -64,7 +66,9 @@ const mbfl_encoding mbfl_encoding_euc_jp = { |
64 | 66 | mblen_table_eucjp, |
65 | 67 | 0, |
66 | 68 | &vtbl_eucjp_wchar, |
67 | | - &vtbl_wchar_eucjp |
| 69 | + &vtbl_wchar_eucjp, |
| 70 | + mb_eucjp_to_wchar, |
| 71 | + mb_wchar_to_eucjp |
68 | 72 | }; |
69 | 73 |
|
70 | 74 | const struct mbfl_convert_vtbl vtbl_eucjp_wchar = { |
@@ -243,3 +247,125 @@ mbfl_filt_conv_wchar_eucjp(int c, mbfl_convert_filter *filter) |
243 | 247 |
|
244 | 248 | return 0; |
245 | 249 | } |
| 250 | + |
| 251 | +static size_t mb_eucjp_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) |
| 252 | +{ |
| 253 | + unsigned char *p = *in, *e = p + *in_len; |
| 254 | + uint32_t *out = buf, *limit = buf + bufsize; |
| 255 | + |
| 256 | + while (p < e && out < limit) { |
| 257 | + unsigned char c = *p++; |
| 258 | + |
| 259 | + if (c < 0x80) { |
| 260 | + *out++ = c; |
| 261 | + } else if (c >= 0xA1 && c <= 0xFE && p < e) { |
| 262 | + /* JISX 0208 */ |
| 263 | + unsigned char c2 = *p++; |
| 264 | + if (c2 >= 0xA1 && c2 <= 0xFE) { |
| 265 | + unsigned int s = (c - 0xA1)*94 + c2 - 0xA1; |
| 266 | + if (s < jisx0208_ucs_table_size) { |
| 267 | + uint32_t w = jisx0208_ucs_table[s]; |
| 268 | + if (!w) |
| 269 | + w = MBFL_BAD_INPUT; |
| 270 | + *out++ = w; |
| 271 | + } else { |
| 272 | + *out++ = MBFL_BAD_INPUT; |
| 273 | + } |
| 274 | + } else { |
| 275 | + *out++ = MBFL_BAD_INPUT; |
| 276 | + } |
| 277 | + } else if (c == 0x8E && p < e) { |
| 278 | + /* Kana */ |
| 279 | + unsigned char c2 = *p++; |
| 280 | + *out++ = (c2 >= 0xA1 && c2 <= 0xDF) ? 0xFEC0 + c2 : MBFL_BAD_INPUT; |
| 281 | + } else if (c == 0x8F) { |
| 282 | + /* JISX 0212 */ |
| 283 | + if ((e - p) >= 2) { |
| 284 | + unsigned char c2 = *p++; |
| 285 | + unsigned char c3 = *p++; |
| 286 | + if (c3 >= 0xA1 && c3 <= 0xFE && c2 >= 0xA1 && c2 <= 0xFE) { |
| 287 | + unsigned int s = (c2 - 0xA1)*94 + c3 - 0xA1; |
| 288 | + if (s < jisx0212_ucs_table_size) { |
| 289 | + uint32_t w = jisx0212_ucs_table[s]; |
| 290 | + if (!w) |
| 291 | + w = MBFL_BAD_INPUT; |
| 292 | + *out++ = w; |
| 293 | + } else { |
| 294 | + *out++ = MBFL_BAD_INPUT; |
| 295 | + } |
| 296 | + } else { |
| 297 | + *out++ = MBFL_BAD_INPUT; |
| 298 | + } |
| 299 | + } else { |
| 300 | + *out++ = MBFL_BAD_INPUT; |
| 301 | + p = e; /* Jump to end of string */ |
| 302 | + } |
| 303 | + } else { |
| 304 | + *out++ = MBFL_BAD_INPUT; |
| 305 | + } |
| 306 | + } |
| 307 | + |
| 308 | + *in_len = e - p; |
| 309 | + *in = p; |
| 310 | + return out - buf; |
| 311 | +} |
| 312 | + |
| 313 | +static void mb_wchar_to_eucjp(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) |
| 314 | +{ |
| 315 | + unsigned char *out, *limit; |
| 316 | + MB_CONVERT_BUF_LOAD(buf, out, limit); |
| 317 | + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); |
| 318 | + |
| 319 | + while (len--) { |
| 320 | + uint32_t w = *in++; |
| 321 | + unsigned int s = 0; |
| 322 | + |
| 323 | + if (w == 0xAF) { /* U+00AF is MACRON */ |
| 324 | + s = 0xA2B4; /* Use JIS X 0212 overline */ |
| 325 | + } else if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { |
| 326 | + s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; |
| 327 | + } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { |
| 328 | + s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; |
| 329 | + } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { |
| 330 | + s = ucs_i_jis_table[w - ucs_i_jis_table_min]; |
| 331 | + } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { |
| 332 | + s = ucs_r_jis_table[w - ucs_r_jis_table_min]; |
| 333 | + } |
| 334 | + |
| 335 | + if (s == 0) { |
| 336 | + if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ |
| 337 | + s = 0x2140; |
| 338 | + } else if (w == 0x2225) { /* PARALLEL TO */ |
| 339 | + s = 0x2142; |
| 340 | + } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ |
| 341 | + s = 0x215D; |
| 342 | + } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ |
| 343 | + s = 0x2171; |
| 344 | + } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ |
| 345 | + s = 0x2172; |
| 346 | + } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ |
| 347 | + s = 0x224C; |
| 348 | + } else if (w == 0) { |
| 349 | + out = mb_convert_buf_add(out, 0); |
| 350 | + continue; |
| 351 | + } else { |
| 352 | + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_eucjp); |
| 353 | + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); |
| 354 | + continue; |
| 355 | + } |
| 356 | + } |
| 357 | + |
| 358 | + if (s < 0x80) { |
| 359 | + out = mb_convert_buf_add(out, s); |
| 360 | + } else if (s < 0x100) { |
| 361 | + out = mb_convert_buf_add2(out, 0x8E, s); |
| 362 | + } else if (s < 0x8080) { |
| 363 | + out = mb_convert_buf_add2(out, ((s >> 8) & 0xFF) | 0x80, (s & 0xFF) | 0x80); |
| 364 | + } else { |
| 365 | + MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 3); |
| 366 | + out = mb_convert_buf_add3(out, 0x8F, ((s >> 8) & 0xFF) | 0x80, (s & 0xFF) | 0x80); |
| 367 | + } |
| 368 | + } |
| 369 | + |
| 370 | + MB_CONVERT_BUF_STORE(buf, out, limit); |
| 371 | +} |
0 commit comments