|
5 | 5 | #define RB_UNLIKELY(cond) (cond)
|
6 | 6 | #endif
|
7 | 7 |
|
8 |
| -static VALUE mJSON, cState, mString_Extend, eGeneratorError, eNestingError; |
| 8 | +static VALUE mJSON, cState, mString_Extend, eGeneratorError, eNestingError, Encoding_UTF_8; |
9 | 9 |
|
10 |
| -static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend; |
| 10 | +static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend, i_encode; |
11 | 11 |
|
12 | 12 | /* Converts in_string to a JSON string (without the wrapping '"'
|
13 | 13 | * characters) in FBuffer out_buffer.
|
@@ -735,20 +735,41 @@ static void generate_json_array(FBuffer *buffer, VALUE Vstate, JSON_Generator_St
|
735 | 735 | fbuffer_append_char(buffer, ']');
|
736 | 736 | }
|
737 | 737 |
|
738 |
| -static int usascii_encindex, utf8_encindex; |
| 738 | +static int usascii_encindex, utf8_encindex, binary_encindex; |
739 | 739 |
|
740 |
| -static int enc_utf8_compatible_p(int enc_idx) |
| 740 | +static inline int enc_utf8_compatible_p(int enc_idx) |
741 | 741 | {
|
742 | 742 | if (enc_idx == usascii_encindex) return 1;
|
743 | 743 | if (enc_idx == utf8_encindex) return 1;
|
744 | 744 | return 0;
|
745 | 745 | }
|
746 | 746 |
|
| 747 | +static inline VALUE ensure_valid_encoding(VALUE str) |
| 748 | +{ |
| 749 | + int encindex = RB_ENCODING_GET(str); |
| 750 | + VALUE utf8_string; |
| 751 | + if (RB_UNLIKELY(!enc_utf8_compatible_p(encindex))) { |
| 752 | + if (encindex == binary_encindex) { |
| 753 | + // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work. |
| 754 | + // TODO: Deprecate in 2.8.0 |
| 755 | + // TODO: Remove in 3.0.0 |
| 756 | + utf8_string = rb_enc_associate_index(rb_str_dup(str), utf8_encindex); |
| 757 | + switch (rb_enc_str_coderange(utf8_string)) { |
| 758 | + case ENC_CODERANGE_7BIT: |
| 759 | + case ENC_CODERANGE_VALID: |
| 760 | + return utf8_string; |
| 761 | + break; |
| 762 | + } |
| 763 | + } |
| 764 | + |
| 765 | + str = rb_funcall(str, i_encode, 1, Encoding_UTF_8); |
| 766 | + } |
| 767 | + return str; |
| 768 | +} |
| 769 | + |
747 | 770 | static void generate_json_string(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
|
748 | 771 | {
|
749 |
| - if (!enc_utf8_compatible_p(RB_ENCODING_GET(obj))) { |
750 |
| - obj = rb_str_export_to_enc(obj, rb_utf8_encoding()); |
751 |
| - } |
| 772 | + obj = ensure_valid_encoding(obj); |
752 | 773 |
|
753 | 774 | fbuffer_append_char(buffer, '"');
|
754 | 775 |
|
@@ -1462,14 +1483,19 @@ void Init_generator(void)
|
1462 | 1483 | VALUE mNilClass = rb_define_module_under(mGeneratorMethods, "NilClass");
|
1463 | 1484 | rb_define_method(mNilClass, "to_json", mNilClass_to_json, -1);
|
1464 | 1485 |
|
| 1486 | + rb_global_variable(&Encoding_UTF_8); |
| 1487 | + Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8")); |
| 1488 | + |
1465 | 1489 | i_to_s = rb_intern("to_s");
|
1466 | 1490 | i_to_json = rb_intern("to_json");
|
1467 | 1491 | i_new = rb_intern("new");
|
1468 | 1492 | i_pack = rb_intern("pack");
|
1469 | 1493 | i_unpack = rb_intern("unpack");
|
1470 | 1494 | i_create_id = rb_intern("create_id");
|
1471 | 1495 | i_extend = rb_intern("extend");
|
| 1496 | + i_encode = rb_intern("encode"); |
1472 | 1497 |
|
1473 | 1498 | usascii_encindex = rb_usascii_encindex();
|
1474 | 1499 | utf8_encindex = rb_utf8_encindex();
|
| 1500 | + binary_encindex = rb_ascii8bit_encindex(); |
1475 | 1501 | }
|
0 commit comments