Skip to content

Commit ebe3d00

Browse files
author
jimeng
committed
add SimdJsonParser2 base on bitindex
1 parent d0c4330 commit ebe3d00

File tree

4 files changed

+255
-5
lines changed

4 files changed

+255
-5
lines changed

build.gradle

+9-1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ group = 'org.simdjson'
2626
version = scmVersion.version
2727

2828
repositories {
29+
mavenLocal()
2930
mavenCentral()
3031
}
3132

@@ -45,6 +46,7 @@ java {
4546
ext {
4647
junitVersion = '5.10.2'
4748
jsoniterScalaVersion = '2.28.4'
49+
lombokVersion = '1.18.34'
4850
}
4951

5052
dependencies {
@@ -53,6 +55,10 @@ dependencies {
5355
jmhImplementation group: 'com.github.plokhotnyuk.jsoniter-scala', name: 'jsoniter-scala-core_2.13', version: jsoniterScalaVersion
5456
jmhImplementation group: 'com.google.guava', name: 'guava', version: '32.1.2-jre'
5557
compileOnly group: 'com.github.plokhotnyuk.jsoniter-scala', name: 'jsoniter-scala-macros_2.13', version: jsoniterScalaVersion
58+
compileOnly group: 'org.projectlombok', name: 'lombok', version: lombokVersion
59+
annotationProcessor group: 'org.projectlombok', name: 'lombok', version: lombokVersion
60+
testCompileOnly group: 'org.projectlombok', name: 'lombok', version: lombokVersion
61+
testAnnotationProcessor group: 'org.projectlombok', name: 'lombok', version: lombokVersion
5662

5763
testImplementation group: 'org.assertj', name: 'assertj-core', version: '3.24.2'
5864
testImplementation group: 'org.apache.commons', name: 'commons-text', version: '1.10.0'
@@ -160,7 +166,9 @@ publishing {
160166
publications {
161167
mavenJava(MavenPublication) {
162168
from(components.java)
163-
169+
groupId = 'org.simdjson'
170+
artifactId = 'simdjson-java'
171+
version = scmVersion.version
164172
pom {
165173
name = project.name
166174
description = 'A Java version of simdjson, a high-performance JSON parser utilizing SIMD instructions.'

src/main/java/org/simdjson/BitIndexes.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
package org.simdjson;
22

3-
class BitIndexes {
3+
public class BitIndexes {
44

55
private final int[] indexes;
66

@@ -44,8 +44,8 @@ private long clearLowestBit(long bits) {
4444
return bits & (bits - 1);
4545
}
4646

47-
void advance() {
48-
readIdx++;
47+
int advance() {
48+
return indexes[readIdx++];
4949
}
5050

5151
int getAndAdvance() {

src/main/java/org/simdjson/SimdJsonParser.java

+8-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
package org.simdjson;
22

3+
import lombok.Getter;
4+
35
public class SimdJsonParser {
46

57
private static final int PADDING = 64;
@@ -24,7 +26,12 @@ public SimdJsonParser(int capacity, int maxDepth) {
2426
paddedBuffer = new byte[capacity];
2527
indexer = new StructuralIndexer(bitIndexes);
2628
}
27-
29+
public BitIndexes buildBitIndex (byte[] buffer, int len) {
30+
byte[] padded = padIfNeeded(buffer, len);
31+
reset();
32+
stage1(padded, len);
33+
return bitIndexes;
34+
}
2835
public <T> T parse(byte[] buffer, int len, Class<T> expectedType) {
2936
byte[] padded = padIfNeeded(buffer, len);
3037
reset();
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,235 @@
1+
package org.simdjson;
2+
3+
import java.util.HashMap;
4+
import java.util.Map;
5+
6+
import lombok.Data;
7+
import lombok.RequiredArgsConstructor;
8+
9+
public class SimdJsonParser2 {
10+
11+
@Data
12+
@RequiredArgsConstructor
13+
static class JsonNode {
14+
private long version = 0;
15+
private boolean isLeaf = false;
16+
private final String name;
17+
private String value = null;
18+
private JsonNode parent = null;
19+
private Map<String, JsonNode> children = new HashMap<>();
20+
private int start = -1;
21+
private int end = -1;
22+
}
23+
24+
private final SimdJsonParser parser;
25+
private BitIndexes bitIndexes;
26+
private final JsonNode root = new JsonNode(null);
27+
private final JsonNode[] row;
28+
private final String[] result;
29+
private final String[] emptyResult;
30+
private JsonNode ptr;
31+
private byte[] buffer;
32+
private final int targetParseNum;
33+
private long currentVersion = 0;
34+
// pruning, when alreadyProcessedCols == NUM
35+
private long alreadyProcessedCols = 0;
36+
37+
public SimdJsonParser2(String... args) {
38+
parser = new SimdJsonParser();
39+
targetParseNum = args.length;
40+
row = new JsonNode[targetParseNum];
41+
result = new String[targetParseNum];
42+
emptyResult = new String[targetParseNum];
43+
for (int i = 0; i < args.length; i++) {
44+
emptyResult[i] = null;
45+
}
46+
for (int i = 0; i < targetParseNum; i++) {
47+
JsonNode cur = root;
48+
String[] paths = args[i].split("\\.");
49+
for (int j = 0; j < paths.length; j++) {
50+
if (!cur.getChildren().containsKey(paths[j])) {
51+
JsonNode child = new JsonNode(paths[j]);
52+
cur.getChildren().put(paths[j], child);
53+
child.setParent(cur);
54+
}
55+
cur = cur.getChildren().get(paths[j]);
56+
}
57+
cur.setLeaf(true);
58+
row[i] = cur;
59+
}
60+
61+
}
62+
63+
public String[] parse(byte[] buffer, int len) {
64+
this.bitIndexes = parser.buildBitIndex(buffer, len);
65+
if (buffer == null || buffer.length == 0) {
66+
return emptyResult;
67+
}
68+
this.alreadyProcessedCols = 0;
69+
this.currentVersion++;
70+
this.ptr = root;
71+
this.buffer = buffer;
72+
73+
switch (buffer[bitIndexes.peek()]) {
74+
case '{' -> {
75+
parseMap();
76+
}
77+
case '[' -> {
78+
parseList();
79+
}
80+
default -> {
81+
throw new RuntimeException("invalid json format");
82+
}
83+
}
84+
return getResult();
85+
}
86+
87+
private void parseElement(String fieldName) {
88+
if (fieldName == null) {
89+
int start = bitIndexes.advance();
90+
int realEnd = bitIndexes.advance();
91+
while (realEnd > start) {
92+
if (buffer[--realEnd] == '"') {
93+
break;
94+
}
95+
}
96+
fieldName = new String(buffer, start + 1, realEnd - start - 1);
97+
}
98+
if (!ptr.getChildren().containsKey(fieldName)) {
99+
skip(false);
100+
return;
101+
}
102+
ptr = ptr.getChildren().get(fieldName);
103+
switch (buffer[bitIndexes.peek()]) {
104+
case '{' -> {
105+
parseMap();
106+
}
107+
case '[' -> {
108+
parseList();
109+
}
110+
default -> {
111+
ptr.setValue(skip(true));
112+
ptr.setVersion(currentVersion);
113+
++alreadyProcessedCols;
114+
}
115+
}
116+
ptr = ptr.getParent();
117+
}
118+
119+
private void parseMap() {
120+
if (ptr.getChildren() == null) {
121+
ptr.setValue(skip(true));
122+
ptr.setVersion(currentVersion);
123+
++alreadyProcessedCols;
124+
return;
125+
}
126+
ptr.setStart(bitIndexes.peek());
127+
bitIndexes.advance();
128+
while (bitIndexes.hasNext() && buffer[bitIndexes.peek()] != '}' && alreadyProcessedCols < targetParseNum) {
129+
parseElement(null);
130+
if (buffer[bitIndexes.peek()] == ',') {
131+
bitIndexes.advance();
132+
}
133+
}
134+
ptr.setEnd(bitIndexes.peek());
135+
if (ptr.isLeaf()) {
136+
ptr.setValue(new String(buffer, ptr.getStart(), ptr.getEnd() - ptr.getStart() + 1));
137+
ptr.setVersion(currentVersion);
138+
++alreadyProcessedCols;
139+
}
140+
bitIndexes.advance();
141+
}
142+
143+
private void parseList() {
144+
if (ptr.getChildren() == null) {
145+
ptr.setValue(skip(true));
146+
ptr.setVersion(currentVersion);
147+
++alreadyProcessedCols;
148+
return;
149+
}
150+
ptr.setStart(bitIndexes.peek());
151+
bitIndexes.advance();
152+
int i = 0;
153+
while (bitIndexes.hasNext() && buffer[bitIndexes.peek()] != ']' && alreadyProcessedCols < targetParseNum) {
154+
parseElement("" + i);
155+
if (buffer[bitIndexes.peek()] == ',') {
156+
bitIndexes.advance();
157+
}
158+
i++;
159+
}
160+
ptr.setEnd(bitIndexes.peek());
161+
if (ptr.isLeaf()) {
162+
ptr.setValue(new String(buffer, ptr.getStart(), ptr.getEnd() - ptr.getStart() + 1));
163+
ptr.setVersion(currentVersion);
164+
++alreadyProcessedCols;
165+
}
166+
bitIndexes.advance();
167+
}
168+
169+
private String skip(boolean retainValue) {
170+
int i = 0;
171+
int start = retainValue ? bitIndexes.peek() : 0;
172+
switch (buffer[bitIndexes.peek()]) {
173+
case '{' -> {
174+
i++;
175+
while (i > 0) {
176+
bitIndexes.advance();
177+
if (buffer[bitIndexes.peek()] == '{') {
178+
i++;
179+
} else if (buffer[bitIndexes.peek()] == '}') {
180+
i--;
181+
}
182+
}
183+
int end = bitIndexes.peek();
184+
bitIndexes.advance();
185+
return retainValue ? new String(buffer, start, end - start + 1) : null;
186+
}
187+
case '[' -> {
188+
i++;
189+
while (i > 0) {
190+
bitIndexes.advance();
191+
if (buffer[bitIndexes.peek()] == '[') {
192+
i++;
193+
} else if (buffer[bitIndexes.peek()] == ']') {
194+
i--;
195+
}
196+
}
197+
int end = bitIndexes.peek();
198+
bitIndexes.advance();
199+
return retainValue ? new String(buffer, start, end - start + 1) : null;
200+
}
201+
case '"' -> {
202+
bitIndexes.advance();
203+
int realEnd = bitIndexes.peek();
204+
while (realEnd > start) {
205+
if (buffer[--realEnd] == '"') {
206+
break;
207+
}
208+
}
209+
return retainValue ? new String(buffer, start + 1, realEnd - start - 1) : null;
210+
}
211+
default -> {
212+
bitIndexes.advance();
213+
int realEnd = bitIndexes.peek();
214+
while (realEnd >= start) {
215+
--realEnd;
216+
if (buffer[realEnd] >= '0' && buffer[realEnd] <= '9') {
217+
break;
218+
}
219+
}
220+
return retainValue ? new String(buffer, start, realEnd - start + 1) : null;
221+
}
222+
}
223+
}
224+
225+
private String[] getResult() {
226+
for (int i = 0; i < targetParseNum; i++) {
227+
if (row[i].getVersion() < currentVersion) {
228+
result[i] = null;
229+
continue;
230+
}
231+
result[i] = row[i].getValue();
232+
}
233+
return result;
234+
}
235+
}

0 commit comments

Comments
 (0)