From 04bba3935bea698926ee77312826ca47931fd5a4 Mon Sep 17 00:00:00 2001 From: Tatu Saloranta Date: Mon, 14 Jan 2019 22:56:22 -0800 Subject: [PATCH] Fix #464 --- release-notes/VERSION-2.x | 1 + .../fasterxml/jackson/core/JsonFactory.java | 27 +++++++--- .../jackson/core/JsonFactoryBuilder.java | 36 ++++++++++++-- .../fasterxml/jackson/core/JsonGenerator.java | 2 +- .../jackson/core/json/UTF8JsonGenerator.java | 2 - .../jackson/core/json/TestCharEscaping.java | 49 +++++++++++++++++-- 6 files changed, 101 insertions(+), 16 deletions(-) diff --git a/release-notes/VERSION-2.x b/release-notes/VERSION-2.x index f3e8e06aed..573a39e2dc 100644 --- a/release-notes/VERSION-2.x +++ b/release-notes/VERSION-2.x @@ -17,6 +17,7 @@ JSON library. 2.10.0 (not yet released) #433: Add Builder pattern for creating configured Stream factories +#464: Add "maximum unescaped char" configuration option for `JsonFactory` via builder #467: Create `JsonReadFeature` to move JSON-specific `JsonParser.Feature`s to #480: `SerializableString` value can not directly render to Writer (requested by Philippe M) diff --git a/src/main/java/com/fasterxml/jackson/core/JsonFactory.java b/src/main/java/com/fasterxml/jackson/core/JsonFactory.java index 0d53d24272..8719d93540 100644 --- a/src/main/java/com/fasterxml/jackson/core/JsonFactory.java +++ b/src/main/java/com/fasterxml/jackson/core/JsonFactory.java @@ -265,7 +265,17 @@ public static int collectDefaults() { * @since 2.1 */ protected SerializableString _rootValueSeparator = DEFAULT_ROOT_VALUE_SEPARATOR; - + + /** + * Optional threshold used for automatically escaping character above certain character + * code value: either {@code 0} to indicate that no threshold is specified, or value + * at or above 127 to indicate last character code that is NOT automatically escaped + * (but depends on other configuration rules for checking). + * + * @since 2.10 + */ + protected int _maximumNonEscapedChar; + /* /********************************************************** /* Construction @@ -301,11 +311,7 @@ protected JsonFactory(JsonFactory src, ObjectCodec codec) _inputDecorator = src._inputDecorator; _outputDecorator = src._outputDecorator; _rootValueSeparator = src._rootValueSeparator; - - /* 27-Apr-2013, tatu: How about symbol table; should we try to - * reuse shared symbol tables? Could be more efficient that way; - * although can slightly add to concurrency overhead. - */ + _maximumNonEscapedChar = src._maximumNonEscapedChar; } /** @@ -317,6 +323,7 @@ public JsonFactory(JsonFactoryBuilder b) { this(b, false); _characterEscapes = b._characterEscapes; _rootValueSeparator = b._rootValueSeparator; + _maximumNonEscapedChar = b._maximumNonEscapedChar; } /** @@ -334,6 +341,8 @@ protected JsonFactory(TSFBuilder b, boolean bogus) { _generatorFeatures = b._streamWriteFeatures; _inputDecorator = b._inputDecorator; _outputDecorator = b._outputDecorator; + // NOTE: missing _maximumNonEscapedChar since that's only in JsonFactoryBuilder + _maximumNonEscapedChar = 0; } /** @@ -1524,6 +1533,9 @@ protected JsonGenerator _createGenerator(Writer out, IOContext ctxt) throws IOEx { WriterBasedJsonGenerator gen = new WriterBasedJsonGenerator(ctxt, _generatorFeatures, _objectCodec, out); + if (_maximumNonEscapedChar > 0) { + gen.setHighestNonEscapedChar(_maximumNonEscapedChar); + } if (_characterEscapes != null) { gen.setCharacterEscapes(_characterEscapes); } @@ -1547,6 +1559,9 @@ protected JsonGenerator _createGenerator(Writer out, IOContext ctxt) throws IOEx protected JsonGenerator _createUTF8Generator(OutputStream out, IOContext ctxt) throws IOException { UTF8JsonGenerator gen = new UTF8JsonGenerator(ctxt, _generatorFeatures, _objectCodec, out); + if (_maximumNonEscapedChar > 0) { + gen.setHighestNonEscapedChar(_maximumNonEscapedChar); + } if (_characterEscapes != null) { gen.setCharacterEscapes(_characterEscapes); } diff --git a/src/main/java/com/fasterxml/jackson/core/JsonFactoryBuilder.java b/src/main/java/com/fasterxml/jackson/core/JsonFactoryBuilder.java index 6cb25a584a..76151416c7 100644 --- a/src/main/java/com/fasterxml/jackson/core/JsonFactoryBuilder.java +++ b/src/main/java/com/fasterxml/jackson/core/JsonFactoryBuilder.java @@ -18,15 +18,19 @@ public class JsonFactoryBuilder extends TSFBuilder + * NOTE! Lowest value (aside from marker 0) is 127: for ASCII range, other checks apply + * and this threshold is ignored. + * + * @param maxNonEscaped Highest character code that is NOT automatically escaped; if + * positive value above 0, or 0 to indicate that no automatic escaping is applied + * beside from what JSON specification requires (and possible custom escape settings). + * Values between 1 and 127 are all taken to behave as if 127 is specified: that is, + * no automatic escaping is applied in ASCII range. + */ + public JsonFactoryBuilder highestNonEscapedChar(int maxNonEscaped) { + _maximumNonEscapedChar = (maxNonEscaped <= 0) ? 0 : Math.max(127, maxNonEscaped); + return this; + } + + // // // Accessors for JSON-specific settings + public CharacterEscapes characterEscapes() { return _characterEscapes; } public SerializableString rootValueSeparator() { return _rootValueSeparator; } + public int highestNonEscapedChar() { return _maximumNonEscapedChar; } + @Override public JsonFactory build() { // 28-Dec-2017, tatu: No special settings beyond base class ones, so: diff --git a/src/main/java/com/fasterxml/jackson/core/JsonGenerator.java b/src/main/java/com/fasterxml/jackson/core/JsonGenerator.java index ee49f50af0..1febb8a40e 100644 --- a/src/main/java/com/fasterxml/jackson/core/JsonGenerator.java +++ b/src/main/java/com/fasterxml/jackson/core/JsonGenerator.java @@ -511,7 +511,7 @@ public PrettyPrinter getPrettyPrinter() { * simply return 0. * * @return Currently active limitation for highest non-escaped character, - * if defined; or -1 to indicate no additional escaping is performed. + * if defined; or 0 to indicate no additional escaping is performed. */ public int getHighestEscapedChar() { return 0; } diff --git a/src/main/java/com/fasterxml/jackson/core/json/UTF8JsonGenerator.java b/src/main/java/com/fasterxml/jackson/core/json/UTF8JsonGenerator.java index 19ecbd91ff..f924d93c41 100644 --- a/src/main/java/com/fasterxml/jackson/core/json/UTF8JsonGenerator.java +++ b/src/main/java/com/fasterxml/jackson/core/json/UTF8JsonGenerator.java @@ -1318,10 +1318,8 @@ private final void _writeStringSegment(char[] cbuf, int offset, int len) } _outputTail = outputPtr; if (offset < len) { - // [JACKSON-106] if (_characterEscapes != null) { _writeCustomStringSegment2(cbuf, offset, len); - // [JACKSON-102] } else if (_maximumNonEscapedChar == 0) { _writeStringSegment2(cbuf, offset, len); } else { diff --git a/src/test/java/com/fasterxml/jackson/core/json/TestCharEscaping.java b/src/test/java/com/fasterxml/jackson/core/json/TestCharEscaping.java index d1028be725..59506175fc 100644 --- a/src/test/java/com/fasterxml/jackson/core/json/TestCharEscaping.java +++ b/src/test/java/com/fasterxml/jackson/core/json/TestCharEscaping.java @@ -12,7 +12,6 @@ public class TestCharEscaping extends com.fasterxml.jackson.core.BaseTest { - // for [JACKSON-627] @SuppressWarnings("serial") private final static CharacterEscapes ESC_627 = new CharacterEscapes() { final int[] ascii = CharacterEscapes.standardAsciiEscapesForJSON(); @@ -132,7 +131,6 @@ public void test8DigitSequence() jp.close(); } - // for [JACKSON-627] public void testWriteLongCustomEscapes() throws Exception { JsonFactory jf = new JsonFactory(); @@ -150,7 +148,7 @@ public void testWriteLongCustomEscapes() throws Exception jgen.close(); } - // [Issue#116] + // [jackson-core#116] public void testEscapesForCharArrays() throws Exception { JsonFactory jf = new JsonFactory(); StringWriter writer = new StringWriter(); @@ -160,5 +158,48 @@ public void testEscapesForCharArrays() throws Exception { jgen.close(); assertEquals("\"\\u0000\"", writer.toString()); } -} + // [jackson-core#116] + public void testEscapeNonLatin1Chars() throws Exception { + _testEscapeNonLatin1ViaChars(false); + } + + // [jackson-core#116] + public void testEscapeNonLatin1Bytes() throws Exception { + _testEscapeNonLatin1ViaChars(true); + } + + private void _testEscapeNonLatin1ViaChars(boolean useBytes) throws Exception { + // NOTE! First one is outside latin-1, so escape; second one within, do NOT escape: + final String VALUE_IN = "Line\u2028feed, \u00D6l!"; + final String VALUE_ESCAPED = "Line\\u2028feed, \u00D6l!"; + final JsonFactory DEFAULT_F = new JsonFactory(); + + // First: with default settings, no auto-escaping + _testEscapeNonLatin1(DEFAULT_F, VALUE_IN, VALUE_IN, useBytes); // char + + // Second: with escaping beyond Latin-1 range + final JsonFactory latinF = ((JsonFactoryBuilder)JsonFactory.builder()) + .highestNonEscapedChar(255) + .build(); + _testEscapeNonLatin1(latinF, VALUE_IN, VALUE_ESCAPED, useBytes); + } + + private void _testEscapeNonLatin1(JsonFactory f, String valueIn, String expEncoded, + boolean useBytes) throws Exception + { + ByteArrayOutputStream bytes = new ByteArrayOutputStream(); + StringWriter sw = new StringWriter(); + final JsonGenerator g = useBytes ? f.createGenerator(bytes, JsonEncoding.UTF8) + : f.createGenerator(sw); + g.writeStartArray(); + g.writeString(valueIn); + g.writeEndArray(); + g.close(); + + // Don't parse, as we want to verify actual escaping aspects + + final String doc = useBytes ? bytes.toString("UTF-8") : sw.toString(); + assertEquals("[\""+expEncoded+"\"]", doc); + } +}