diff --git a/src/main/java/com/fasterxml/jackson/core/json/ByteSourceJsonBootstrapper.java b/src/main/java/com/fasterxml/jackson/core/json/ByteSourceJsonBootstrapper.java index baa4d5d20d..b40778d215 100644 --- a/src/main/java/com/fasterxml/jackson/core/json/ByteSourceJsonBootstrapper.java +++ b/src/main/java/com/fasterxml/jackson/core/json/ByteSourceJsonBootstrapper.java @@ -242,7 +242,9 @@ public JsonParser constructParser(ObjectReadContext readCtxt, ByteQuadsCanonicalizer rootByteSymbols, CharsToNameCanonicalizer rootCharSymbols, int factoryFeatures) throws IOException { + int prevInputPtr = _inputPtr; JsonEncoding enc = detectEncoding(); + int bytesProcessed = _inputPtr - prevInputPtr; if (enc == JsonEncoding.UTF8) { /* and without canonicalization, byte-based approach is not performant; just use std UTF-8 reader @@ -252,7 +254,7 @@ public JsonParser constructParser(ObjectReadContext readCtxt, ByteQuadsCanonicalizer can = rootByteSymbols.makeChild(factoryFeatures); return new UTF8StreamJsonParser(readCtxt, _context, streamReadFeatures, formatReadFeatures, _in, can, - _inputBuffer, _inputPtr, _inputEnd, _bufferRecyclable); + _inputBuffer, _inputPtr, _inputEnd, bytesProcessed, _bufferRecyclable); } } return new ReaderBasedJsonParser(readCtxt, _context, streamReadFeatures, formatReadFeatures, diff --git a/src/main/java/com/fasterxml/jackson/core/json/UTF8StreamJsonParser.java b/src/main/java/com/fasterxml/jackson/core/json/UTF8StreamJsonParser.java index cfd096c326..5f4c83f6f4 100644 --- a/src/main/java/com/fasterxml/jackson/core/json/UTF8StreamJsonParser.java +++ b/src/main/java/com/fasterxml/jackson/core/json/UTF8StreamJsonParser.java @@ -118,11 +118,22 @@ public class UTF8StreamJsonParser /********************************************************** */ + public UTF8StreamJsonParser(ObjectReadContext readCtxt, IOContext ctxt, + int stdFeatures, int formatReadFeatures, + InputStream in, + ByteQuadsCanonicalizer sym, + byte[] inputBuffer, int start, int end, + boolean bufferRecyclable) + { + this(readCtxt, ctxt, stdFeatures, formatReadFeatures, in, sym, + inputBuffer, start, end, 0, bufferRecyclable); + } + public UTF8StreamJsonParser(ObjectReadContext readCtxt, IOContext ctxt, int stdFeatures, int formatReadFeatures, InputStream in, ByteQuadsCanonicalizer sym, - byte[] inputBuffer, int start, int end, + byte[] inputBuffer, int start, int end, int bytesPreProcessed, boolean bufferRecyclable) { super(readCtxt, ctxt, stdFeatures, formatReadFeatures); @@ -131,9 +142,9 @@ public UTF8StreamJsonParser(ObjectReadContext readCtxt, IOContext ctxt, _inputBuffer = inputBuffer; _inputPtr = start; _inputEnd = end; - _currInputRowStart = start; + _currInputRowStart = start - bytesPreProcessed; // If we have offset, need to omit that from byte offset, so: - _currInputProcessed = -start; + _currInputProcessed = -start + bytesPreProcessed; _bufferRecyclable = bufferRecyclable; } diff --git a/src/test/java/com/fasterxml/jackson/core/json/LocationOffsetsTest.java b/src/test/java/com/fasterxml/jackson/core/json/LocationOffsetsTest.java index c5f1ad0e6a..524ff4504f 100644 --- a/src/test/java/com/fasterxml/jackson/core/json/LocationOffsetsTest.java +++ b/src/test/java/com/fasterxml/jackson/core/json/LocationOffsetsTest.java @@ -23,7 +23,7 @@ public void testSimpleInitialOffsets() throws Exception assertEquals(0L, loc.getCharOffset()); assertEquals(1, loc.getLineNr()); assertEquals(1, loc.getColumnNr()); - + loc = p.getCurrentLocation(); assertEquals(-1L, loc.getByteOffset()); assertEquals(1L, loc.getCharOffset()); @@ -33,7 +33,7 @@ public void testSimpleInitialOffsets() throws Exception p.close(); // then byte-based - + p = JSON_F.createParser(ObjectReadContext.empty(), DOC.getBytes("UTF-8")); assertToken(JsonToken.START_OBJECT, p.nextToken()); @@ -42,7 +42,7 @@ public void testSimpleInitialOffsets() throws Exception assertEquals(-1L, loc.getCharOffset()); assertEquals(1, loc.getLineNr()); assertEquals(1, loc.getColumnNr()); - + loc = p.getCurrentLocation(); assertEquals(1L, loc.getByteOffset()); assertEquals(-1L, loc.getCharOffset()); @@ -61,7 +61,7 @@ public void testOffsetWithInputOffset() throws Exception byte[] b = " { } ".getBytes("UTF-8"); // and then peel them off - p = JSON_F.createParser(ObjectReadContext.empty(), b, 3, b.length-5); + p = JSON_F.createParser(ObjectReadContext.empty(), b, 3, b.length - 5); assertToken(JsonToken.START_OBJECT, p.nextToken()); loc = p.getTokenLocation(); @@ -69,7 +69,7 @@ public void testOffsetWithInputOffset() throws Exception assertEquals(-1L, loc.getCharOffset()); assertEquals(1, loc.getLineNr()); assertEquals(1, loc.getColumnNr()); - + loc = p.getCurrentLocation(); assertEquals(1L, loc.getByteOffset()); assertEquals(-1L, loc.getCharOffset()); @@ -78,4 +78,119 @@ public void testOffsetWithInputOffset() throws Exception p.close(); } + + public void testOffsetWithoutInputOffset() throws Exception + { + JsonLocation loc; + JsonParser p; + // 3 spaces before, 2 after, just for padding + byte[] b = " { } ".getBytes("UTF-8"); + + // and then peel them off + p = JSON_F.createParser(ObjectReadContext.empty(), b); + assertToken(JsonToken.START_OBJECT, p.nextToken()); + + loc = p.getTokenLocation(); + assertEquals(3L, loc.getByteOffset()); + assertEquals(-1L, loc.getCharOffset()); + assertEquals(1, loc.getLineNr()); + assertEquals(4, loc.getColumnNr()); + + loc = p.getCurrentLocation(); + assertEquals(4L, loc.getByteOffset()); + assertEquals(-1L, loc.getCharOffset()); + assertEquals(1, loc.getLineNr()); + assertEquals(5, loc.getColumnNr()); + + p.close(); + } + + // for [core#533] + public void testUtf8Bom() throws Exception + { + JsonLocation loc; + JsonParser p; + + byte[] b = withUtf8Bom("{ }".getBytes()); + + // and then peel them off + p = JSON_F.createParser(ObjectReadContext.empty(), b); + assertToken(JsonToken.START_OBJECT, p.nextToken()); + + loc = p.getTokenLocation(); + assertEquals(3L, loc.getByteOffset()); + assertEquals(-1L, loc.getCharOffset()); + assertEquals(1, loc.getLineNr()); + assertEquals(4, loc.getColumnNr()); + + loc = p.getCurrentLocation(); + assertEquals(4L, loc.getByteOffset()); + assertEquals(-1L, loc.getCharOffset()); + assertEquals(1, loc.getLineNr()); + assertEquals(5, loc.getColumnNr()); + + p.close(); + } + + public void testUtf8BomWithPadding() throws Exception + { + JsonLocation loc; + JsonParser p; + + byte[] b = withUtf8Bom(" { }".getBytes()); + + // and then peel them off + p = JSON_F.createParser(ObjectReadContext.empty(), b); + assertToken(JsonToken.START_OBJECT, p.nextToken()); + + loc = p.getTokenLocation(); + assertEquals(6L, loc.getByteOffset()); + assertEquals(-1L, loc.getCharOffset()); + assertEquals(1, loc.getLineNr()); + assertEquals(7, loc.getColumnNr()); + + loc = p.getCurrentLocation(); + assertEquals(7L, loc.getByteOffset()); + assertEquals(-1L, loc.getCharOffset()); + assertEquals(1, loc.getLineNr()); + assertEquals(8, loc.getColumnNr()); + + p.close(); + } + + public void testUtf8BomWithInputOffset() throws Exception + { + JsonLocation loc; + JsonParser p; + + byte[] b = withUtf8Bom(" { }".getBytes()); + + // and then peel them off + p = JSON_F.createParser(ObjectReadContext.empty(), b); + assertToken(JsonToken.START_OBJECT, p.nextToken()); + + loc = p.getTokenLocation(); + assertEquals(6L, loc.getByteOffset()); + assertEquals(-1L, loc.getCharOffset()); + assertEquals(1, loc.getLineNr()); + assertEquals(7, loc.getColumnNr()); + + loc = p.getCurrentLocation(); + assertEquals(7L, loc.getByteOffset()); + assertEquals(-1L, loc.getCharOffset()); + assertEquals(1, loc.getLineNr()); + assertEquals(8, loc.getColumnNr()); + + p.close(); + } + + private byte[] withUtf8Bom(byte[] bytes) { + byte[] arr = new byte[bytes.length + 3]; + // write UTF-8 BOM + arr[0] = (byte) 0xEF; + arr[1] = (byte) 0xBB; + arr[2] = (byte) 0xBF; + System.arraycopy(bytes, 0, arr, 3, bytes.length); + return arr; + } } diff --git a/src/test/java/com/fasterxml/jackson/core/read/JsonParserTest.java b/src/test/java/com/fasterxml/jackson/core/read/JsonParserTest.java index 3970c2c994..69eb37de9b 100644 --- a/src/test/java/com/fasterxml/jackson/core/read/JsonParserTest.java +++ b/src/test/java/com/fasterxml/jackson/core/read/JsonParserTest.java @@ -432,15 +432,9 @@ public void testUtf8BOMHandling() throws Exception JsonParser p = JSON_FACTORY.createParser(ObjectReadContext.empty(), input); assertEquals(JsonToken.START_ARRAY, p.nextToken()); - // should also have skipped first 3 bytes of BOM; but do we have offset available? - /* 08-Oct-2013, tatu: Alas, due to [core#111], we have to omit BOM in calculations - * as we do not know what the offset is due to -- may need to revisit, if this - * discrepancy becomes an issue. For now it just means that BOM is considered - * "out of stream" (not part of input). - */ + JsonLocation loc = p.getTokenLocation(); - // so if BOM was consider in-stream (part of input), this should expect 3: - assertEquals(0, loc.getByteOffset()); + assertEquals(3, loc.getByteOffset()); assertEquals(-1, loc.getCharOffset()); assertEquals(JsonToken.VALUE_NUMBER_INT, p.nextToken()); assertEquals(JsonToken.END_ARRAY, p.nextToken()); @@ -449,7 +443,7 @@ public void testUtf8BOMHandling() throws Exception p = JSON_FACTORY.createParser(ObjectReadContext.empty(), new MockDataInput(input)); assertEquals(JsonToken.START_ARRAY, p.nextToken()); - // same BOM, but DataInput is more restrctive so can skip but offsets + // same BOM, but DataInput is more restrictive so can skip but offsets // are not reliable... loc = p.getTokenLocation(); assertNotNull(loc);