Skip to content

Commit 3139b2c

Browse files
author
jimeng
committed
add test case
1 parent ebe3d00 commit 3139b2c

File tree

5 files changed

+138
-47
lines changed

5 files changed

+138
-47
lines changed

build.gradle

+6
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ ext {
4747
junitVersion = '5.10.2'
4848
jsoniterScalaVersion = '2.28.4'
4949
lombokVersion = '1.18.34'
50+
jacksonVersion = '2.18.0'
5051
}
5152

5253
dependencies {
@@ -66,6 +67,11 @@ dependencies {
6667
testImplementation group: 'org.junit.jupiter', name: 'junit-jupiter-api', version: junitVersion
6768
testImplementation group: 'org.junit.jupiter', name: 'junit-jupiter-params', version: junitVersion
6869
testRuntimeOnly group: 'org.junit.jupiter', name: 'junit-jupiter-engine', version: junitVersion
70+
71+
// Jackson dependency for jmh
72+
implementation group: 'com.fasterxml.jackson.core', name: 'jackson-core', version: jacksonVersion
73+
implementation group: 'com.fasterxml.jackson.core', name: 'jackson-databind', version: jacksonVersion
74+
implementation group: 'com.fasterxml.jackson.core', name: 'jackson-annotations', version: jacksonVersion
6975
}
7076

7177
tasks.register('downloadTestData') {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
package org.simdjson;
2+
3+
import java.io.IOException;
4+
import java.io.InputStream;
5+
import java.util.concurrent.TimeUnit;
6+
7+
import org.openjdk.jmh.annotations.Benchmark;
8+
import org.openjdk.jmh.annotations.BenchmarkMode;
9+
import org.openjdk.jmh.annotations.Level;
10+
import org.openjdk.jmh.annotations.Mode;
11+
import org.openjdk.jmh.annotations.OutputTimeUnit;
12+
import org.openjdk.jmh.annotations.Param;
13+
import org.openjdk.jmh.annotations.Scope;
14+
import org.openjdk.jmh.annotations.Setup;
15+
import org.openjdk.jmh.annotations.State;
16+
17+
import com.fasterxml.jackson.databind.ObjectMapper;
18+
import com.fasterxml.jackson.databind.node.ArrayNode;
19+
20+
@State(Scope.Benchmark)
21+
@BenchmarkMode(Mode.Throughput)
22+
@OutputTimeUnit(TimeUnit.SECONDS)
23+
public class Parse2VsJacksonBenchMark {
24+
@Param({"/twitter.json"})
25+
String fileName;
26+
private byte[] buffer;
27+
private final SimdJsonParser2 parser = new SimdJsonParser2("statuses.0.metadata", "metadata.0.created_at", "metadata.0.id",
28+
"statuses.1.metadata", "metadata.1.created_at", "metadata.1.id");
29+
private final ObjectMapper MAPPER = new ObjectMapper();
30+
31+
@Setup(Level.Trial)
32+
public void setup() throws IOException {
33+
try (InputStream is = ParseBenchmark.class.getResourceAsStream(fileName)) {
34+
assert is != null;
35+
buffer = is.readAllBytes();
36+
}
37+
}
38+
39+
@Benchmark
40+
public void parseBySimdJson() {
41+
String[] result = parser.parse(buffer, buffer.length);
42+
}
43+
44+
@Benchmark
45+
public void parseByJackson() throws Exception {
46+
ArrayNode arrayNode = (ArrayNode) MAPPER.readTree(buffer).path("statuses");
47+
String[] result = new String[6];
48+
result[0] = arrayNode.get(0).path("metadata").toString();
49+
result[1] = arrayNode.get(0).path("created_at").toString();
50+
result[2] = arrayNode.get(0).path("id").toString();
51+
result[3] = arrayNode.get(1).path("metadata").toString();
52+
result[4] = arrayNode.get(1).path("created_at").toString();
53+
result[5] = arrayNode.get(1).path("id").toString();
54+
}
55+
}

src/main/java/org/simdjson/BitIndexes.java

+2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
package org.simdjson;
22

3+
import java.util.Arrays;
4+
35
public class BitIndexes {
46

57
private final int[] indexes;

src/main/java/org/simdjson/SimdJsonParser2.java

+42-47
Original file line numberDiff line numberDiff line change
@@ -29,21 +29,22 @@ static class JsonNode {
2929
private final String[] emptyResult;
3030
private JsonNode ptr;
3131
private byte[] buffer;
32-
private final int targetParseNum;
32+
private final int expectParseCols;
33+
// every time json string is processed, currentVersion will be incremented by 1
3334
private long currentVersion = 0;
3435
// pruning, when alreadyProcessedCols == NUM
35-
private long alreadyProcessedCols = 0;
36+
private long parseCols = 0;
3637

3738
public SimdJsonParser2(String... args) {
3839
parser = new SimdJsonParser();
39-
targetParseNum = args.length;
40-
row = new JsonNode[targetParseNum];
41-
result = new String[targetParseNum];
42-
emptyResult = new String[targetParseNum];
40+
expectParseCols = args.length;
41+
row = new JsonNode[expectParseCols];
42+
result = new String[expectParseCols];
43+
emptyResult = new String[expectParseCols];
4344
for (int i = 0; i < args.length; i++) {
4445
emptyResult[i] = null;
4546
}
46-
for (int i = 0; i < targetParseNum; i++) {
47+
for (int i = 0; i < expectParseCols; i++) {
4748
JsonNode cur = root;
4849
String[] paths = args[i].split("\\.");
4950
for (int j = 0; j < paths.length; j++) {
@@ -65,7 +66,7 @@ public String[] parse(byte[] buffer, int len) {
6566
if (buffer == null || buffer.length == 0) {
6667
return emptyResult;
6768
}
68-
this.alreadyProcessedCols = 0;
69+
this.parseCols = 0;
6970
this.currentVersion++;
7071
this.ptr = root;
7172
this.buffer = buffer;
@@ -84,22 +85,34 @@ public String[] parse(byte[] buffer, int len) {
8485
return getResult();
8586
}
8687

87-
private void parseElement(String fieldName) {
88-
if (fieldName == null) {
89-
int start = bitIndexes.advance();
90-
int realEnd = bitIndexes.advance();
91-
while (realEnd > start) {
92-
if (buffer[--realEnd] == '"') {
93-
break;
94-
}
95-
}
96-
fieldName = new String(buffer, start + 1, realEnd - start - 1);
88+
private String parseField() {
89+
int start = bitIndexes.advance();
90+
int next = bitIndexes.peek();
91+
String field = new String(buffer, start, next - start).trim();
92+
if ("null".equalsIgnoreCase(field)) {
93+
return null;
94+
}
95+
// field type is string or type is decimal
96+
if (field.startsWith("\"")) {
97+
field = field.substring(1, field.length() - 1);
98+
}
99+
return field;
100+
}
101+
102+
private void parseElement(String expectFieldName) {
103+
if (parseCols >= expectParseCols) {
104+
return;
105+
}
106+
// if expectFieldName is null, parent is map, else is list
107+
if (expectFieldName == null) {
108+
expectFieldName = parseField();
109+
bitIndexes.advance(); // skip :
97110
}
98-
if (!ptr.getChildren().containsKey(fieldName)) {
111+
if (!ptr.getChildren().containsKey(expectFieldName)) {
99112
skip(false);
100113
return;
101114
}
102-
ptr = ptr.getChildren().get(fieldName);
115+
ptr = ptr.getChildren().get(expectFieldName);
103116
switch (buffer[bitIndexes.peek()]) {
104117
case '{' -> {
105118
parseMap();
@@ -110,7 +123,7 @@ private void parseElement(String fieldName) {
110123
default -> {
111124
ptr.setValue(skip(true));
112125
ptr.setVersion(currentVersion);
113-
++alreadyProcessedCols;
126+
++parseCols;
114127
}
115128
}
116129
ptr = ptr.getParent();
@@ -120,12 +133,12 @@ private void parseMap() {
120133
if (ptr.getChildren() == null) {
121134
ptr.setValue(skip(true));
122135
ptr.setVersion(currentVersion);
123-
++alreadyProcessedCols;
136+
++parseCols;
124137
return;
125138
}
126139
ptr.setStart(bitIndexes.peek());
127140
bitIndexes.advance();
128-
while (bitIndexes.hasNext() && buffer[bitIndexes.peek()] != '}' && alreadyProcessedCols < targetParseNum) {
141+
while (bitIndexes.hasNext() && buffer[bitIndexes.peek()] != '}' && parseCols <= expectParseCols) {
129142
parseElement(null);
130143
if (buffer[bitIndexes.peek()] == ',') {
131144
bitIndexes.advance();
@@ -135,7 +148,7 @@ private void parseMap() {
135148
if (ptr.isLeaf()) {
136149
ptr.setValue(new String(buffer, ptr.getStart(), ptr.getEnd() - ptr.getStart() + 1));
137150
ptr.setVersion(currentVersion);
138-
++alreadyProcessedCols;
151+
++parseCols;
139152
}
140153
bitIndexes.advance();
141154
}
@@ -144,13 +157,13 @@ private void parseList() {
144157
if (ptr.getChildren() == null) {
145158
ptr.setValue(skip(true));
146159
ptr.setVersion(currentVersion);
147-
++alreadyProcessedCols;
160+
++parseCols;
148161
return;
149162
}
150163
ptr.setStart(bitIndexes.peek());
151164
bitIndexes.advance();
152165
int i = 0;
153-
while (bitIndexes.hasNext() && buffer[bitIndexes.peek()] != ']' && alreadyProcessedCols < targetParseNum) {
166+
while (bitIndexes.hasNext() && buffer[bitIndexes.peek()] != ']' && parseCols <= expectParseCols) {
154167
parseElement("" + i);
155168
if (buffer[bitIndexes.peek()] == ',') {
156169
bitIndexes.advance();
@@ -161,7 +174,7 @@ private void parseList() {
161174
if (ptr.isLeaf()) {
162175
ptr.setValue(new String(buffer, ptr.getStart(), ptr.getEnd() - ptr.getStart() + 1));
163176
ptr.setVersion(currentVersion);
164-
++alreadyProcessedCols;
177+
++parseCols;
165178
}
166179
bitIndexes.advance();
167180
}
@@ -198,32 +211,14 @@ private String skip(boolean retainValue) {
198211
bitIndexes.advance();
199212
return retainValue ? new String(buffer, start, end - start + 1) : null;
200213
}
201-
case '"' -> {
202-
bitIndexes.advance();
203-
int realEnd = bitIndexes.peek();
204-
while (realEnd > start) {
205-
if (buffer[--realEnd] == '"') {
206-
break;
207-
}
208-
}
209-
return retainValue ? new String(buffer, start + 1, realEnd - start - 1) : null;
210-
}
211214
default -> {
212-
bitIndexes.advance();
213-
int realEnd = bitIndexes.peek();
214-
while (realEnd >= start) {
215-
--realEnd;
216-
if (buffer[realEnd] >= '0' && buffer[realEnd] <= '9') {
217-
break;
218-
}
219-
}
220-
return retainValue ? new String(buffer, start, realEnd - start + 1) : null;
215+
return parseField();
221216
}
222217
}
223218
}
224219

225220
private String[] getResult() {
226-
for (int i = 0; i < targetParseNum; i++) {
221+
for (int i = 0; i < expectParseCols; i++) {
227222
if (row[i].getVersion() < currentVersion) {
228223
result[i] = null;
229224
continue;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
package org.simdjson;
2+
3+
import static org.simdjson.testutils.SimdJsonAssertions.assertThat;
4+
import static org.simdjson.testutils.TestUtils.toUtf8;
5+
6+
import org.junit.jupiter.api.Test;
7+
8+
public class JsonMultiValueParsingTest {
9+
@Test
10+
public void testParseMultiValue() {
11+
byte[] json = toUtf8("{\"field1\":{\"field2\":\"value2\",\"field3\":3},\"field4\":[\"value4\",\"value5\"],\"field5\":null}");
12+
SimdJsonParser2 parser = new SimdJsonParser2("field1.field2", "field1.field3", "field4", "field4.0", "field5");
13+
String[] result = parser.parse(json, json.length);
14+
assertThat(result[0]).isEqualTo("value2");
15+
assertThat(result[1]).isEqualTo("3");
16+
assertThat(result[2]).isEqualTo("[\"value4\",\"value5\"]");
17+
assertThat(result[3]).isEqualTo("value4");
18+
assertThat(result[4]).isEqualTo(null);
19+
}
20+
21+
@Test
22+
public void testNonAsciiCharacters() {
23+
byte[] json = toUtf8("{\"ąćśńźż\": 1, \"\\u20A9\\u0E3F\": 2, \"αβγ\": 3, \"😀abc😀\": 4}");
24+
SimdJsonParser2 parser = new SimdJsonParser2("ąćśńźż", "\\u20A9\\u0E3F", "αβγ", "😀abc😀");
25+
// when
26+
String[] result = parser.parse(json, json.length);
27+
// then
28+
assertThat(result[0]).isEqualTo("1");
29+
assertThat(result[1]).isEqualTo("2");
30+
assertThat(result[2]).isEqualTo("3");
31+
assertThat(result[3]).isEqualTo("4");
32+
}
33+
}

0 commit comments

Comments
 (0)