Skip to content

Commit f6fc9e5

Browse files
author
jimeng
committed
fix comments, rename SimdjsonParser2 to SimdjsonParserWithFixPath
1 parent d0c4330 commit f6fc9e5

File tree

6 files changed

+322
-5
lines changed

6 files changed

+322
-5
lines changed

build.gradle

+10-1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ group = 'org.simdjson'
2626
version = scmVersion.version
2727

2828
repositories {
29+
mavenLocal()
2930
mavenCentral()
3031
}
3132

@@ -45,6 +46,7 @@ java {
4546
ext {
4647
junitVersion = '5.10.2'
4748
jsoniterScalaVersion = '2.28.4'
49+
lombokVersion = '1.18.34'
4850
}
4951

5052
dependencies {
@@ -53,13 +55,18 @@ dependencies {
5355
jmhImplementation group: 'com.github.plokhotnyuk.jsoniter-scala', name: 'jsoniter-scala-core_2.13', version: jsoniterScalaVersion
5456
jmhImplementation group: 'com.google.guava', name: 'guava', version: '32.1.2-jre'
5557
compileOnly group: 'com.github.plokhotnyuk.jsoniter-scala', name: 'jsoniter-scala-macros_2.13', version: jsoniterScalaVersion
58+
compileOnly group: 'org.projectlombok', name: 'lombok', version: lombokVersion
59+
annotationProcessor group: 'org.projectlombok', name: 'lombok', version: lombokVersion
60+
testCompileOnly group: 'org.projectlombok', name: 'lombok', version: lombokVersion
61+
testAnnotationProcessor group: 'org.projectlombok', name: 'lombok', version: lombokVersion
5662

5763
testImplementation group: 'org.assertj', name: 'assertj-core', version: '3.24.2'
5864
testImplementation group: 'org.apache.commons', name: 'commons-text', version: '1.10.0'
5965
testImplementation group: 'org.junit-pioneer', name: 'junit-pioneer', version: '2.2.0'
6066
testImplementation group: 'org.junit.jupiter', name: 'junit-jupiter-api', version: junitVersion
6167
testImplementation group: 'org.junit.jupiter', name: 'junit-jupiter-params', version: junitVersion
6268
testRuntimeOnly group: 'org.junit.jupiter', name: 'junit-jupiter-engine', version: junitVersion
69+
6370
}
6471

6572
tasks.register('downloadTestData') {
@@ -160,7 +167,9 @@ publishing {
160167
publications {
161168
mavenJava(MavenPublication) {
162169
from(components.java)
163-
170+
groupId = 'org.simdjson'
171+
artifactId = 'simdjson-java'
172+
version = scmVersion.version
164173
pom {
165174
name = project.name
166175
description = 'A Java version of simdjson, a high-performance JSON parser utilizing SIMD instructions.'
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
package org.simdjson;
2+
3+
import java.io.IOException;
4+
import java.io.InputStream;
5+
import java.util.concurrent.TimeUnit;
6+
7+
import org.openjdk.jmh.annotations.*;
8+
9+
import com.fasterxml.jackson.databind.JsonNode;
10+
import com.fasterxml.jackson.databind.ObjectMapper;
11+
12+
@State(Scope.Benchmark)
13+
@BenchmarkMode(Mode.Throughput)
14+
@OutputTimeUnit(TimeUnit.SECONDS)
15+
public class ParseAndSelectFixPathBenchMark {
16+
@Param({"/twitter.json"})
17+
String fileName;
18+
private byte[] buffer;
19+
private final SimdJsonParser simdJsonParser = new SimdJsonParser();
20+
private final ObjectMapper jacksonObjectMapper = new ObjectMapper();
21+
private final SimdJsonParserWithFixPath simdJsonParserWithFixPath = new SimdJsonParserWithFixPath(
22+
"statuses.0.user.default_profile", "statuses.0.user.screen_name",
23+
"statuses.0.user.name", "statuses.0.user.id", "statuses.0.user.description",
24+
"statuses.1.user.default_profile", "statuses.1.user.screen_name",
25+
"statuses.1.user.name", "statuses.1.user.id", "statuses.1.user.description");
26+
27+
@Setup(Level.Trial)
28+
public void setup() throws IOException {
29+
try (InputStream is = ParseBenchmark.class.getResourceAsStream("/twitter.json")) {
30+
buffer = is.readAllBytes();
31+
}
32+
System.out.println("VectorSpecies = " + VectorUtils.BYTE_SPECIES);
33+
}
34+
35+
@Benchmark
36+
public JsonValue parseMultiValuesForFixPaths_SimdJson() {
37+
return simdJsonParser.parse(buffer, buffer.length);
38+
}
39+
40+
@Benchmark
41+
public String[] parseMultiValuesForFixPaths_SimdJsonParserWithFixPath() {
42+
return simdJsonParserWithFixPath.parse(buffer, buffer.length);
43+
}
44+
45+
@Benchmark
46+
public JsonNode parseMultiValuesForFixPaths_Jackson() throws IOException {
47+
return jacksonObjectMapper.readTree(buffer);
48+
}
49+
}

src/main/java/org/simdjson/BitIndexes.java

+5-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package org.simdjson;
22

3-
class BitIndexes {
3+
import java.util.Arrays;
4+
5+
public class BitIndexes {
46

57
private final int[] indexes;
68

@@ -44,8 +46,8 @@ private long clearLowestBit(long bits) {
4446
return bits & (bits - 1);
4547
}
4648

47-
void advance() {
48-
readIdx++;
49+
int advance() {
50+
return indexes[readIdx++];
4951
}
5052

5153
int getAndAdvance() {

src/main/java/org/simdjson/SimdJsonParser.java

+6-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,12 @@ public SimdJsonParser(int capacity, int maxDepth) {
2424
paddedBuffer = new byte[capacity];
2525
indexer = new StructuralIndexer(bitIndexes);
2626
}
27-
27+
BitIndexes buildBitIndex (byte[] buffer, int len) {
28+
byte[] padded = padIfNeeded(buffer, len);
29+
reset();
30+
stage1(padded, len);
31+
return bitIndexes;
32+
}
2833
public <T> T parse(byte[] buffer, int len, Class<T> expectedType) {
2934
byte[] padded = padIfNeeded(buffer, len);
3035
reset();
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
package org.simdjson;
2+
3+
import java.util.HashMap;
4+
import java.util.Map;
5+
6+
import lombok.Data;
7+
import lombok.RequiredArgsConstructor;
8+
9+
public class SimdJsonParserWithFixPath {
10+
11+
@Data
12+
@RequiredArgsConstructor
13+
static class JsonNode {
14+
private long version = 0;
15+
private boolean isLeaf = false;
16+
private final String name;
17+
private String value = null;
18+
private JsonNode parent = null;
19+
private Map<String, JsonNode> children = new HashMap<>();
20+
private int start = -1;
21+
private int end = -1;
22+
}
23+
24+
private final SimdJsonParser parser;
25+
private BitIndexes bitIndexes;
26+
private final JsonNode root = new JsonNode(null);
27+
private final JsonNode[] row;
28+
private final String[] result;
29+
private final String[] emptyResult;
30+
private JsonNode ptr;
31+
private byte[] buffer;
32+
private final int expectParseCols;
33+
// every time json string is processed, currentVersion will be incremented by 1
34+
private long currentVersion = 0;
35+
36+
public SimdJsonParserWithFixPath(String... args) {
37+
parser = new SimdJsonParser();
38+
expectParseCols = args.length;
39+
row = new JsonNode[expectParseCols];
40+
result = new String[expectParseCols];
41+
emptyResult = new String[expectParseCols];
42+
for (int i = 0; i < args.length; i++) {
43+
emptyResult[i] = null;
44+
}
45+
for (int i = 0; i < expectParseCols; i++) {
46+
JsonNode cur = root;
47+
String[] paths = args[i].split("\\.");
48+
for (int j = 0; j < paths.length; j++) {
49+
if (!cur.getChildren().containsKey(paths[j])) {
50+
JsonNode child = new JsonNode(paths[j]);
51+
cur.getChildren().put(paths[j], child);
52+
child.setParent(cur);
53+
}
54+
cur = cur.getChildren().get(paths[j]);
55+
}
56+
cur.setLeaf(true);
57+
row[i] = cur;
58+
}
59+
60+
}
61+
62+
public String[] parse(byte[] buffer, int len) {
63+
this.bitIndexes = parser.buildBitIndex(buffer, len);
64+
if (buffer == null || buffer.length == 0) {
65+
return emptyResult;
66+
}
67+
this.currentVersion++;
68+
this.ptr = root;
69+
this.buffer = buffer;
70+
71+
switch (buffer[bitIndexes.peek()]) {
72+
case '{' -> {
73+
parseMap();
74+
}
75+
case '[' -> {
76+
parseList();
77+
}
78+
default -> {
79+
throw new RuntimeException("invalid json format");
80+
}
81+
}
82+
return getResult();
83+
}
84+
85+
private String parseValue() {
86+
int start = bitIndexes.advance();
87+
int next = bitIndexes.peek();
88+
String field = new String(buffer, start, next - start).trim();
89+
if ("null".equalsIgnoreCase(field)) {
90+
return null;
91+
}
92+
// field type is string or type is decimal
93+
if (field.startsWith("\"")) {
94+
field = field.substring(1, field.length() - 1);
95+
}
96+
return field;
97+
}
98+
99+
private void parseElement(String expectFieldName) {
100+
// if expectFieldName is null, parent is map, else is list
101+
if (expectFieldName == null) {
102+
expectFieldName = parseValue();
103+
bitIndexes.advance(); // skip :
104+
}
105+
if (!ptr.getChildren().containsKey(expectFieldName)) {
106+
skip(false);
107+
return;
108+
}
109+
ptr = ptr.getChildren().get(expectFieldName);
110+
switch (buffer[bitIndexes.peek()]) {
111+
case '{' -> {
112+
parseMap();
113+
}
114+
case '[' -> {
115+
parseList();
116+
}
117+
default -> {
118+
ptr.setValue(skip(true));
119+
ptr.setVersion(currentVersion);
120+
}
121+
}
122+
ptr = ptr.getParent();
123+
}
124+
125+
private void parseMap() {
126+
if (ptr.getChildren() == null) {
127+
ptr.setValue(skip(true));
128+
ptr.setVersion(currentVersion);
129+
return;
130+
}
131+
ptr.setStart(bitIndexes.peek());
132+
bitIndexes.advance();
133+
while (bitIndexes.hasNext() && buffer[bitIndexes.peek()] != '}') {
134+
parseElement(null);
135+
if (buffer[bitIndexes.peek()] == ',') {
136+
bitIndexes.advance();
137+
}
138+
}
139+
ptr.setEnd(bitIndexes.peek());
140+
if (ptr.isLeaf()) {
141+
ptr.setValue(new String(buffer, ptr.getStart(), ptr.getEnd() - ptr.getStart() + 1));
142+
ptr.setVersion(currentVersion);
143+
}
144+
bitIndexes.advance();
145+
}
146+
147+
private void parseList() {
148+
if (ptr.getChildren() == null) {
149+
ptr.setValue(skip(true));
150+
ptr.setVersion(currentVersion);
151+
return;
152+
}
153+
ptr.setStart(bitIndexes.peek());
154+
bitIndexes.advance();
155+
int i = 0;
156+
while (bitIndexes.hasNext() && buffer[bitIndexes.peek()] != ']') {
157+
parseElement("" + i);
158+
if (buffer[bitIndexes.peek()] == ',') {
159+
bitIndexes.advance();
160+
}
161+
i++;
162+
}
163+
ptr.setEnd(bitIndexes.peek());
164+
if (ptr.isLeaf()) {
165+
ptr.setValue(new String(buffer, ptr.getStart(), ptr.getEnd() - ptr.getStart() + 1));
166+
ptr.setVersion(currentVersion);
167+
}
168+
bitIndexes.advance();
169+
}
170+
171+
private String skip(boolean retainValue) {
172+
int i = 0;
173+
int start = retainValue ? bitIndexes.peek() : 0;
174+
switch (buffer[bitIndexes.peek()]) {
175+
case '{' -> {
176+
i++;
177+
while (i > 0) {
178+
bitIndexes.advance();
179+
if (buffer[bitIndexes.peek()] == '{') {
180+
i++;
181+
} else if (buffer[bitIndexes.peek()] == '}') {
182+
i--;
183+
}
184+
}
185+
int end = bitIndexes.peek();
186+
bitIndexes.advance();
187+
return retainValue ? new String(buffer, start, end - start + 1) : null;
188+
}
189+
case '[' -> {
190+
i++;
191+
while (i > 0) {
192+
bitIndexes.advance();
193+
if (buffer[bitIndexes.peek()] == '[') {
194+
i++;
195+
} else if (buffer[bitIndexes.peek()] == ']') {
196+
i--;
197+
}
198+
}
199+
int end = bitIndexes.peek();
200+
bitIndexes.advance();
201+
return retainValue ? new String(buffer, start, end - start + 1) : null;
202+
}
203+
default -> {
204+
return parseValue();
205+
}
206+
}
207+
}
208+
209+
private String[] getResult() {
210+
for (int i = 0; i < expectParseCols; i++) {
211+
if (row[i].getVersion() < currentVersion) {
212+
result[i] = null;
213+
continue;
214+
}
215+
result[i] = row[i].getValue();
216+
}
217+
return result;
218+
}
219+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
package org.simdjson;
2+
3+
import static org.simdjson.testutils.SimdJsonAssertions.assertThat;
4+
import static org.simdjson.testutils.TestUtils.toUtf8;
5+
6+
import org.junit.jupiter.api.Test;
7+
8+
public class JsonMultiValueParsingTest {
9+
@Test
10+
public void testParseMultiValue() {
11+
byte[] json = toUtf8("{\"field1\":{\"field2\":\"value2\",\"field3\":3},\"field4\":[\"value4\",\"value5\"],\"field5\":null}");
12+
SimdJsonParserWithFixPath parser = new SimdJsonParserWithFixPath("field1.field2", "field1.field3", "field4", "field4.0", "field5");
13+
String[] result = parser.parse(json, json.length);
14+
assertThat(result[0]).isEqualTo("value2");
15+
assertThat(result[1]).isEqualTo("3");
16+
assertThat(result[2]).isEqualTo("[\"value4\",\"value5\"]");
17+
assertThat(result[3]).isEqualTo("value4");
18+
assertThat(result[4]).isEqualTo(null);
19+
}
20+
21+
@Test
22+
public void testNonAsciiCharacters() {
23+
byte[] json = toUtf8("{\"ąćśńźż\": 1, \"\\u20A9\\u0E3F\": 2, \"αβγ\": 3, \"😀abc😀\": 4}");
24+
SimdJsonParserWithFixPath parser = new SimdJsonParserWithFixPath("ąćśńźż", "\\u20A9\\u0E3F", "αβγ", "😀abc😀");
25+
// when
26+
String[] result = parser.parse(json, json.length);
27+
// then
28+
assertThat(result[0]).isEqualTo("1");
29+
assertThat(result[1]).isEqualTo("2");
30+
assertThat(result[2]).isEqualTo("3");
31+
assertThat(result[3]).isEqualTo("4");
32+
}
33+
}

0 commit comments

Comments
 (0)