Skip to content

Commit 4bed300

Browse files
author
jimeng
committed
fix comments, rename SimdjsonParser2 to SimdjsonParserWithFixPath
1 parent d0c4330 commit 4bed300

File tree

6 files changed

+325
-5
lines changed

6 files changed

+325
-5
lines changed

build.gradle

+10-1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ group = 'org.simdjson'
2626
version = scmVersion.version
2727

2828
repositories {
29+
mavenLocal()
2930
mavenCentral()
3031
}
3132

@@ -45,6 +46,7 @@ java {
4546
ext {
4647
junitVersion = '5.10.2'
4748
jsoniterScalaVersion = '2.28.4'
49+
lombokVersion = '1.18.34'
4850
}
4951

5052
dependencies {
@@ -53,13 +55,18 @@ dependencies {
5355
jmhImplementation group: 'com.github.plokhotnyuk.jsoniter-scala', name: 'jsoniter-scala-core_2.13', version: jsoniterScalaVersion
5456
jmhImplementation group: 'com.google.guava', name: 'guava', version: '32.1.2-jre'
5557
compileOnly group: 'com.github.plokhotnyuk.jsoniter-scala', name: 'jsoniter-scala-macros_2.13', version: jsoniterScalaVersion
58+
compileOnly group: 'org.projectlombok', name: 'lombok', version: lombokVersion
59+
annotationProcessor group: 'org.projectlombok', name: 'lombok', version: lombokVersion
60+
testCompileOnly group: 'org.projectlombok', name: 'lombok', version: lombokVersion
61+
testAnnotationProcessor group: 'org.projectlombok', name: 'lombok', version: lombokVersion
5662

5763
testImplementation group: 'org.assertj', name: 'assertj-core', version: '3.24.2'
5864
testImplementation group: 'org.apache.commons', name: 'commons-text', version: '1.10.0'
5965
testImplementation group: 'org.junit-pioneer', name: 'junit-pioneer', version: '2.2.0'
6066
testImplementation group: 'org.junit.jupiter', name: 'junit-jupiter-api', version: junitVersion
6167
testImplementation group: 'org.junit.jupiter', name: 'junit-jupiter-params', version: junitVersion
6268
testRuntimeOnly group: 'org.junit.jupiter', name: 'junit-jupiter-engine', version: junitVersion
69+
6370
}
6471

6572
tasks.register('downloadTestData') {
@@ -160,7 +167,9 @@ publishing {
160167
publications {
161168
mavenJava(MavenPublication) {
162169
from(components.java)
163-
170+
groupId = 'org.simdjson'
171+
artifactId = 'simdjson-java'
172+
version = scmVersion.version
164173
pom {
165174
name = project.name
166175
description = 'A Java version of simdjson, a high-performance JSON parser utilizing SIMD instructions.'
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
package org.simdjson;
2+
3+
import java.io.IOException;
4+
import java.io.InputStream;
5+
import java.util.concurrent.TimeUnit;
6+
7+
import org.openjdk.jmh.annotations.*;
8+
9+
import com.fasterxml.jackson.databind.JsonNode;
10+
import com.fasterxml.jackson.databind.ObjectMapper;
11+
12+
@State(Scope.Benchmark)
13+
@BenchmarkMode(Mode.Throughput)
14+
@OutputTimeUnit(TimeUnit.SECONDS)
15+
public class ParseAndSelectFixPathBenchMark {
16+
@Param({"/twitter.json"})
17+
String fileName;
18+
private byte[] buffer;
19+
private final SimdJsonParser simdJsonParser = new SimdJsonParser();
20+
private final ObjectMapper jacksonObjectMapper = new ObjectMapper();
21+
private final SimdJsonParserWithFixPath simdJsonParserWithFixPath = new SimdJsonParserWithFixPath(
22+
"statuses.0.user.default_profile", "statuses.0.user.screen_name",
23+
"statuses.0.user.name", "statuses.0.user.id", "statuses.0.user.description",
24+
"statuses.1.user.default_profile", "statuses.1.user.screen_name",
25+
"statuses.1.user.name", "statuses.1.user.id", "statuses.1.user.description");
26+
27+
@Setup(Level.Trial)
28+
public void setup() throws IOException {
29+
try (InputStream is = ParseBenchmark.class.getResourceAsStream("/twitter.json")) {
30+
buffer = is.readAllBytes();
31+
}
32+
System.out.println("VectorSpecies = " + VectorUtils.BYTE_SPECIES);
33+
}
34+
35+
@Benchmark
36+
public JsonValue parseMultiValuesForFixPaths_SimdJson() {
37+
return simdJsonParser.parse(buffer, buffer.length);
38+
}
39+
40+
@Benchmark
41+
public String[] parseMultiValuesForFixPaths_SimdJsonParserWithFixPath() {
42+
return simdJsonParserWithFixPath.parse(buffer, buffer.length);
43+
}
44+
45+
@Benchmark
46+
public JsonNode parseMultiValuesForFixPaths_Jackson() throws IOException {
47+
return jacksonObjectMapper.readTree(buffer);
48+
}
49+
}

src/main/java/org/simdjson/BitIndexes.java

+5-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package org.simdjson;
22

3-
class BitIndexes {
3+
import java.util.Arrays;
4+
5+
public class BitIndexes {
46

57
private final int[] indexes;
68

@@ -44,8 +46,8 @@ private long clearLowestBit(long bits) {
4446
return bits & (bits - 1);
4547
}
4648

47-
void advance() {
48-
readIdx++;
49+
int advance() {
50+
return indexes[readIdx++];
4951
}
5052

5153
int getAndAdvance() {

src/main/java/org/simdjson/SimdJsonParser.java

+8-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
package org.simdjson;
22

3+
import lombok.Getter;
4+
35
public class SimdJsonParser {
46

57
private static final int PADDING = 64;
@@ -24,7 +26,12 @@ public SimdJsonParser(int capacity, int maxDepth) {
2426
paddedBuffer = new byte[capacity];
2527
indexer = new StructuralIndexer(bitIndexes);
2628
}
27-
29+
BitIndexes buildBitIndex (byte[] buffer, int len) {
30+
byte[] padded = padIfNeeded(buffer, len);
31+
reset();
32+
stage1(padded, len);
33+
return bitIndexes;
34+
}
2835
public <T> T parse(byte[] buffer, int len, Class<T> expectedType) {
2936
byte[] padded = padIfNeeded(buffer, len);
3037
reset();
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
package org.simdjson;
2+
3+
import java.util.HashMap;
4+
import java.util.Map;
5+
6+
import lombok.Data;
7+
import lombok.RequiredArgsConstructor;
8+
9+
public class SimdJsonParserWithFixPath {
10+
11+
@Data
12+
@RequiredArgsConstructor
13+
static class JsonNode {
14+
private long version = 0;
15+
private boolean isLeaf = false;
16+
private final String name;
17+
private String value = null;
18+
private JsonNode parent = null;
19+
private Map<String, JsonNode> children = new HashMap<>();
20+
private int start = -1;
21+
private int end = -1;
22+
}
23+
24+
private final SimdJsonParser parser;
25+
private BitIndexes bitIndexes;
26+
private final JsonNode root = new JsonNode(null);
27+
private final JsonNode[] row;
28+
private final String[] result;
29+
private final String[] emptyResult;
30+
private JsonNode ptr;
31+
private byte[] buffer;
32+
private final int expectParseCols;
33+
// every time json string is processed, currentVersion will be incremented by 1
34+
private long currentVersion = 0;
35+
// pruning, when alreadyProcessedCols == NUM
36+
37+
public SimdJsonParserWithFixPath(String... args) {
38+
parser = new SimdJsonParser();
39+
expectParseCols = args.length;
40+
row = new JsonNode[expectParseCols];
41+
result = new String[expectParseCols];
42+
emptyResult = new String[expectParseCols];
43+
for (int i = 0; i < args.length; i++) {
44+
emptyResult[i] = null;
45+
}
46+
for (int i = 0; i < expectParseCols; i++) {
47+
JsonNode cur = root;
48+
String[] paths = args[i].split("\\.");
49+
for (int j = 0; j < paths.length; j++) {
50+
if (!cur.getChildren().containsKey(paths[j])) {
51+
JsonNode child = new JsonNode(paths[j]);
52+
cur.getChildren().put(paths[j], child);
53+
child.setParent(cur);
54+
}
55+
cur = cur.getChildren().get(paths[j]);
56+
}
57+
cur.setLeaf(true);
58+
row[i] = cur;
59+
}
60+
61+
}
62+
63+
public String[] parse(byte[] buffer, int len) {
64+
this.bitIndexes = parser.buildBitIndex(buffer, len);
65+
if (buffer == null || buffer.length == 0) {
66+
return emptyResult;
67+
}
68+
this.currentVersion++;
69+
this.ptr = root;
70+
this.buffer = buffer;
71+
72+
switch (buffer[bitIndexes.peek()]) {
73+
case '{' -> {
74+
parseMap();
75+
}
76+
case '[' -> {
77+
parseList();
78+
}
79+
default -> {
80+
throw new RuntimeException("invalid json format");
81+
}
82+
}
83+
return getResult();
84+
}
85+
86+
private String parseValue() {
87+
int start = bitIndexes.advance();
88+
int next = bitIndexes.peek();
89+
String field = new String(buffer, start, next - start).trim();
90+
if ("null".equalsIgnoreCase(field)) {
91+
return null;
92+
}
93+
// field type is string or type is decimal
94+
if (field.startsWith("\"")) {
95+
field = field.substring(1, field.length() - 1);
96+
}
97+
return field;
98+
}
99+
100+
private void parseElement(String expectFieldName) {
101+
// if expectFieldName is null, parent is map, else is list
102+
if (expectFieldName == null) {
103+
expectFieldName = parseValue();
104+
bitIndexes.advance(); // skip :
105+
}
106+
if (!ptr.getChildren().containsKey(expectFieldName)) {
107+
skip(false);
108+
return;
109+
}
110+
ptr = ptr.getChildren().get(expectFieldName);
111+
switch (buffer[bitIndexes.peek()]) {
112+
case '{' -> {
113+
parseMap();
114+
}
115+
case '[' -> {
116+
parseList();
117+
}
118+
default -> {
119+
ptr.setValue(skip(true));
120+
ptr.setVersion(currentVersion);
121+
}
122+
}
123+
ptr = ptr.getParent();
124+
}
125+
126+
private void parseMap() {
127+
if (ptr.getChildren() == null) {
128+
ptr.setValue(skip(true));
129+
ptr.setVersion(currentVersion);
130+
return;
131+
}
132+
ptr.setStart(bitIndexes.peek());
133+
bitIndexes.advance();
134+
while (bitIndexes.hasNext() && buffer[bitIndexes.peek()] != '}') {
135+
parseElement(null);
136+
if (buffer[bitIndexes.peek()] == ',') {
137+
bitIndexes.advance();
138+
}
139+
}
140+
ptr.setEnd(bitIndexes.peek());
141+
if (ptr.isLeaf()) {
142+
ptr.setValue(new String(buffer, ptr.getStart(), ptr.getEnd() - ptr.getStart() + 1));
143+
ptr.setVersion(currentVersion);
144+
}
145+
bitIndexes.advance();
146+
}
147+
148+
private void parseList() {
149+
if (ptr.getChildren() == null) {
150+
ptr.setValue(skip(true));
151+
ptr.setVersion(currentVersion);
152+
return;
153+
}
154+
ptr.setStart(bitIndexes.peek());
155+
bitIndexes.advance();
156+
int i = 0;
157+
while (bitIndexes.hasNext() && buffer[bitIndexes.peek()] != ']') {
158+
parseElement("" + i);
159+
if (buffer[bitIndexes.peek()] == ',') {
160+
bitIndexes.advance();
161+
}
162+
i++;
163+
}
164+
ptr.setEnd(bitIndexes.peek());
165+
if (ptr.isLeaf()) {
166+
ptr.setValue(new String(buffer, ptr.getStart(), ptr.getEnd() - ptr.getStart() + 1));
167+
ptr.setVersion(currentVersion);
168+
}
169+
bitIndexes.advance();
170+
}
171+
172+
private String skip(boolean retainValue) {
173+
int i = 0;
174+
int start = retainValue ? bitIndexes.peek() : 0;
175+
switch (buffer[bitIndexes.peek()]) {
176+
case '{' -> {
177+
i++;
178+
while (i > 0) {
179+
bitIndexes.advance();
180+
if (buffer[bitIndexes.peek()] == '{') {
181+
i++;
182+
} else if (buffer[bitIndexes.peek()] == '}') {
183+
i--;
184+
}
185+
}
186+
int end = bitIndexes.peek();
187+
bitIndexes.advance();
188+
return retainValue ? new String(buffer, start, end - start + 1) : null;
189+
}
190+
case '[' -> {
191+
i++;
192+
while (i > 0) {
193+
bitIndexes.advance();
194+
if (buffer[bitIndexes.peek()] == '[') {
195+
i++;
196+
} else if (buffer[bitIndexes.peek()] == ']') {
197+
i--;
198+
}
199+
}
200+
int end = bitIndexes.peek();
201+
bitIndexes.advance();
202+
return retainValue ? new String(buffer, start, end - start + 1) : null;
203+
}
204+
default -> {
205+
return parseValue();
206+
}
207+
}
208+
}
209+
210+
private String[] getResult() {
211+
for (int i = 0; i < expectParseCols; i++) {
212+
if (row[i].getVersion() < currentVersion) {
213+
result[i] = null;
214+
continue;
215+
}
216+
result[i] = row[i].getValue();
217+
}
218+
return result;
219+
}
220+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
package org.simdjson;
2+
3+
import static org.simdjson.testutils.SimdJsonAssertions.assertThat;
4+
import static org.simdjson.testutils.TestUtils.toUtf8;
5+
6+
import org.junit.jupiter.api.Test;
7+
8+
public class JsonMultiValueParsingTest {
9+
@Test
10+
public void testParseMultiValue() {
11+
byte[] json = toUtf8("{\"field1\":{\"field2\":\"value2\",\"field3\":3},\"field4\":[\"value4\",\"value5\"],\"field5\":null}");
12+
SimdJsonParserWithFixPath parser = new SimdJsonParserWithFixPath("field1.field2", "field1.field3", "field4", "field4.0", "field5");
13+
String[] result = parser.parse(json, json.length);
14+
assertThat(result[0]).isEqualTo("value2");
15+
assertThat(result[1]).isEqualTo("3");
16+
assertThat(result[2]).isEqualTo("[\"value4\",\"value5\"]");
17+
assertThat(result[3]).isEqualTo("value4");
18+
assertThat(result[4]).isEqualTo(null);
19+
}
20+
21+
@Test
22+
public void testNonAsciiCharacters() {
23+
byte[] json = toUtf8("{\"ąćśńźż\": 1, \"\\u20A9\\u0E3F\": 2, \"αβγ\": 3, \"😀abc😀\": 4}");
24+
SimdJsonParserWithFixPath parser = new SimdJsonParserWithFixPath("ąćśńźż", "\\u20A9\\u0E3F", "αβγ", "😀abc😀");
25+
// when
26+
String[] result = parser.parse(json, json.length);
27+
// then
28+
assertThat(result[0]).isEqualTo("1");
29+
assertThat(result[1]).isEqualTo("2");
30+
assertThat(result[2]).isEqualTo("3");
31+
assertThat(result[3]).isEqualTo("4");
32+
}
33+
}

0 commit comments

Comments
 (0)