Skip to content

Commit cd2018d

Browse files
bchapuisDrabble
andauthored
Add support for nested types, geoparquet groups, and postgres jsonb in data table (#860)
* Add support for nested types in the DataTable * Add a JsonbHandler that serializes Objects * Add an EnvelopeField to the GeoParquet parser * Save the EnvelopeField as geometry in Postgis * Add a writeEnvelope method to the CopyWriter * BBox use float values in GeoParquet * Create Envelope from Double and Float values * Use the default CRS when the crs field is null in Geoparquet (#861) --------- Co-authored-by: Antoine Drabble <[email protected]>
1 parent ee7aed7 commit cd2018d

File tree

25 files changed

+502
-131
lines changed

25 files changed

+502
-131
lines changed

baremaps-core/src/main/java/org/apache/baremaps/database/copy/CopyWriter.java

+14
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import java.util.Collection;
3131
import java.util.List;
3232
import java.util.Map;
33+
import org.locationtech.jts.geom.Envelope;
3334
import org.locationtech.jts.geom.Geometry;
3435
import org.postgresql.copy.PGCopyOutputStream;
3536
import org.postgresql.core.Oid;
@@ -106,6 +107,9 @@ public class CopyWriter implements AutoCloseable {
106107
public static final GeometryValueHandler GEOMETRY_HANDLER =
107108
new GeometryValueHandler();
108109

110+
public static final EnvelopeValueHandler ENVELOPE_HANDLER =
111+
new EnvelopeValueHandler();
112+
109113
private final DataOutputStream data;
110114

111115
/**
@@ -397,6 +401,16 @@ public void writeGeometry(Geometry value) throws IOException {
397401
GEOMETRY_HANDLER.handle(data, value);
398402
}
399403

404+
/**
405+
* Writes an envelope value.
406+
*
407+
* @param value
408+
* @throws IOException
409+
*/
410+
public void writeEnvelope(Envelope value) throws IOException {
411+
ENVELOPE_HANDLER.handle(data, value);
412+
}
413+
400414
/** Close the writer. */
401415
@Override
402416
public void close() throws IOException {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to you under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.baremaps.database.copy;
19+
20+
import static org.locationtech.jts.io.WKBConstants.wkbNDR;
21+
22+
import de.bytefish.pgbulkinsert.pgsql.handlers.BaseValueHandler;
23+
import java.io.DataOutputStream;
24+
import org.locationtech.jts.geom.Envelope;
25+
import org.locationtech.jts.geom.Geometry;
26+
import org.locationtech.jts.geom.GeometryFactory;
27+
import org.locationtech.jts.io.WKBWriter;
28+
29+
public class EnvelopeValueHandler extends BaseValueHandler<Envelope> {
30+
31+
private static final GeometryFactory geometryFactory = new GeometryFactory();
32+
33+
private static byte[] asWKB(Envelope value) {
34+
Geometry geometry = geometryFactory.toGeometry(value);
35+
return new WKBWriter(2, wkbNDR, true).write(geometry);
36+
}
37+
38+
@Override
39+
protected void internalHandle(DataOutputStream buffer, Envelope value) throws Exception {
40+
byte[] wkb = asWKB(value);
41+
buffer.writeInt(wkb.length);
42+
buffer.write(wkb, 0, wkb.length);
43+
}
44+
45+
@Override
46+
public int getLength(Envelope value) {
47+
return asWKB(value).length + 4;
48+
}
49+
}

baremaps-core/src/main/java/org/apache/baremaps/database/copy/GeometryValueHandler.java

+6-2
Original file line numberDiff line numberDiff line change
@@ -27,15 +27,19 @@
2727

2828
public class GeometryValueHandler extends BaseValueHandler<Geometry> {
2929

30+
private static byte[] asWKB(Geometry geometry) {
31+
return new WKBWriter(2, wkbNDR, true).write(geometry);
32+
}
33+
3034
@Override
3135
protected void internalHandle(DataOutputStream buffer, Geometry value) throws IOException {
32-
byte[] wkb = new WKBWriter(2, wkbNDR, true).write(value);
36+
byte[] wkb = asWKB(value);
3337
buffer.writeInt(wkb.length);
3438
buffer.write(wkb, 0, wkb.length);
3539
}
3640

3741
@Override
3842
public int getLength(Geometry geometry) {
39-
throw new UnsupportedOperationException();
43+
return asWKB(geometry).length + 4;
4044
}
4145
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to you under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.baremaps.database.copy;
19+
20+
import com.fasterxml.jackson.core.JsonGenerator;
21+
import com.fasterxml.jackson.databind.JsonSerializer;
22+
import com.fasterxml.jackson.databind.ObjectMapper;
23+
import com.fasterxml.jackson.databind.SerializerProvider;
24+
import com.fasterxml.jackson.databind.module.SimpleModule;
25+
import de.bytefish.pgbulkinsert.pgsql.handlers.BaseValueHandler;
26+
import java.io.DataOutputStream;
27+
import java.io.IOException;
28+
29+
public class JsonbValueHandler extends BaseValueHandler<Object> {
30+
31+
private static final ObjectMapper objectMapper;
32+
33+
static {
34+
objectMapper = new ObjectMapper();
35+
SimpleModule module = new SimpleModule();
36+
module.addSerializer(String.class, new NoQuotesStringSerializer());
37+
objectMapper.registerModule(module);
38+
}
39+
40+
static class NoQuotesStringSerializer extends JsonSerializer<String> {
41+
@Override
42+
public void serialize(String value, JsonGenerator gen, SerializerProvider serializers)
43+
throws IOException {
44+
gen.writeRawValue(value);
45+
}
46+
}
47+
48+
private final int jsonbProtocolVersion;
49+
50+
public JsonbValueHandler() {
51+
this(1);
52+
}
53+
54+
public JsonbValueHandler(int jsonbProtocolVersion) {
55+
this.jsonbProtocolVersion = jsonbProtocolVersion;
56+
}
57+
58+
private static byte[] asJson(Object object) {
59+
try {
60+
String value = objectMapper.writeValueAsString(object);
61+
return value.getBytes("UTF-8");
62+
} catch (Exception e) {
63+
throw new RuntimeException(e);
64+
}
65+
}
66+
67+
@Override
68+
protected void internalHandle(DataOutputStream buffer, Object value) throws Exception {
69+
byte[] utf8Bytes = asJson(value);
70+
buffer.writeInt(utf8Bytes.length + 1);
71+
buffer.writeByte(jsonbProtocolVersion);
72+
buffer.write(utf8Bytes);
73+
}
74+
75+
@Override
76+
public int getLength(Object value) {
77+
byte[] utf8Bytes = asJson(value);
78+
return utf8Bytes.length;
79+
}
80+
}

baremaps-core/src/main/java/org/apache/baremaps/database/metadata/DatabaseMetadata.java

+24-12
Original file line numberDiff line numberDiff line change
@@ -82,18 +82,30 @@ private List<ColumnResult> getColumns(String catalog, String schemaPattern,
8282
var resultSet = connection.getMetaData().getColumns(catalog, schemaPattern,
8383
tableNamePattern, columnNamePattern)) {
8484
while (resultSet.next()) {
85-
tableColumns.add(new ColumnResult(resultSet.getString("TABLE_CAT"),
86-
resultSet.getString("TABLE_SCHEM"), resultSet.getString("TABLE_NAME"),
87-
resultSet.getString("COLUMN_NAME"), resultSet.getInt("DATA_TYPE"),
88-
resultSet.getString("TYPE_NAME"), resultSet.getInt("COLUMN_SIZE"),
89-
resultSet.getInt("DECIMAL_DIGITS"), resultSet.getInt("NUM_PREC_RADIX"),
90-
resultSet.getInt("NULLABLE"), resultSet.getString("REMARKS"),
91-
resultSet.getString("COLUMN_DEF"), resultSet.getInt("SQL_DATA_TYPE"),
92-
resultSet.getInt("SQL_DATETIME_SUB"), resultSet.getInt("CHAR_OCTET_LENGTH"),
93-
resultSet.getInt("ORDINAL_POSITION"), resultSet.getString("IS_NULLABLE"),
94-
resultSet.getString("SCOPE_CATALOG"), resultSet.getString("SCOPE_SCHEMA"),
95-
resultSet.getString("SCOPE_TABLE"), resultSet.getShort("SOURCE_DATA_TYPE"),
96-
resultSet.getString("IS_AUTOINCREMENT"), resultSet.getString("IS_GENERATEDCOLUMN")));
85+
tableColumns.add(new ColumnResult(
86+
resultSet.getString("TABLE_CAT"),
87+
resultSet.getString("TABLE_SCHEM"),
88+
resultSet.getString("TABLE_NAME"),
89+
resultSet.getString("COLUMN_NAME"),
90+
resultSet.getInt("DATA_TYPE"),
91+
resultSet.getString("TYPE_NAME"),
92+
resultSet.getInt("COLUMN_SIZE"),
93+
resultSet.getInt("DECIMAL_DIGITS"),
94+
resultSet.getInt("NUM_PREC_RADIX"),
95+
resultSet.getInt("NULLABLE"),
96+
resultSet.getString("REMARKS"),
97+
resultSet.getString("COLUMN_DEF"),
98+
resultSet.getInt("SQL_DATA_TYPE"),
99+
resultSet.getInt("SQL_DATETIME_SUB"),
100+
resultSet.getInt("CHAR_OCTET_LENGTH"),
101+
resultSet.getInt("ORDINAL_POSITION"),
102+
resultSet.getString("IS_NULLABLE"),
103+
resultSet.getString("SCOPE_CATALOG"),
104+
resultSet.getString("SCOPE_SCHEMA"),
105+
resultSet.getString("SCOPE_TABLE"),
106+
resultSet.getShort("SOURCE_DATA_TYPE"),
107+
resultSet.getString("IS_AUTOINCREMENT"),
108+
resultSet.getString("IS_GENERATEDCOLUMN")));
97109
}
98110
} catch (SQLException e) {
99111
throw new RuntimeException(e);

baremaps-core/src/main/java/org/apache/baremaps/storage/flatgeobuf/FlatGeoBufDataTable.java

+6-1
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,12 @@ public FlatGeoBufDataTable(Path file) {
5757
this.schema = readSchema(file);
5858
}
5959

60-
60+
/**
61+
* Reads the schema from a flatgeobuf file.
62+
*
63+
* @param file the path to the flatgeobuf file
64+
* @return the schema of the table
65+
*/
6166
private static DataSchema readSchema(Path file) {
6267
try (var channel = FileChannel.open(file, StandardOpenOption.READ)) {
6368
// try to read the schema from the file

baremaps-core/src/main/java/org/apache/baremaps/storage/flatgeobuf/FlatGeoBufTypeConversion.java

+5-1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import java.util.*;
2727
import java.util.stream.Collectors;
2828
import org.apache.baremaps.data.storage.*;
29+
import org.apache.baremaps.data.storage.DataColumn.Cardinality;
2930
import org.apache.baremaps.data.storage.DataColumn.Type;
3031
import org.wololo.flatgeobuf.ColumnMeta;
3132
import org.wololo.flatgeobuf.GeometryConversions;
@@ -53,7 +54,10 @@ public class FlatGeoBufTypeConversion {
5354
public static DataSchema asSchema(HeaderMeta headerMeta) {
5455
var name = headerMeta.name;
5556
var columns = headerMeta.columns.stream()
56-
.map(column -> new DataColumnImpl(column.name, Type.fromBinding(column.getBinding())))
57+
.map(column -> new DataColumnFixed(
58+
column.name,
59+
column.nullable ? Cardinality.OPTIONAL : Cardinality.REQUIRED,
60+
Type.fromBinding(column.getBinding())))
5761
.map(DataColumn.class::cast)
5862
.toList();
5963
return new DataSchemaImpl(name, columns);

baremaps-core/src/main/java/org/apache/baremaps/storage/geopackage/GeoPackageDataTable.java

+4-1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import mil.nga.geopackage.features.user.FeatureResultSet;
2525
import mil.nga.geopackage.geom.GeoPackageGeometryData;
2626
import org.apache.baremaps.data.storage.*;
27+
import org.apache.baremaps.data.storage.DataColumn.Cardinality;
2728
import org.apache.baremaps.data.storage.DataColumn.Type;
2829
import org.locationtech.jts.geom.*;
2930

@@ -50,7 +51,9 @@ public GeoPackageDataTable(FeatureDao featureDao) {
5051
for (FeatureColumn column : featureDao.getColumns()) {
5152
var propertyName = column.getName();
5253
var propertyType = classType(column);
53-
columns.add(new DataColumnImpl(propertyName, propertyType));
54+
var propertyCardinality = column.isNotNull() ? Cardinality.REQUIRED : Cardinality.OPTIONAL;
55+
columns.add(new DataColumnFixed(
56+
propertyName, propertyCardinality, propertyType));
5457
}
5558
schema = new DataSchemaImpl(name, columns);
5659
geometryFactory = new GeometryFactory(new PrecisionModel(), (int) featureDao.getSrs().getId());

baremaps-core/src/main/java/org/apache/baremaps/storage/geoparquet/GeoParquetTypeConversion.java

+55-20
Original file line numberDiff line numberDiff line change
@@ -18,38 +18,50 @@
1818
package org.apache.baremaps.storage.geoparquet;
1919

2020
import java.util.ArrayList;
21+
import java.util.HashMap;
2122
import java.util.List;
22-
import org.apache.baremaps.data.storage.DataColumn;
23+
import java.util.Map;
24+
import org.apache.baremaps.data.storage.*;
25+
import org.apache.baremaps.data.storage.DataColumn.Cardinality;
2326
import org.apache.baremaps.data.storage.DataColumn.Type;
24-
import org.apache.baremaps.data.storage.DataColumnImpl;
25-
import org.apache.baremaps.data.storage.DataSchema;
26-
import org.apache.baremaps.data.storage.DataSchemaImpl;
2727
import org.apache.baremaps.geoparquet.data.GeoParquetGroup;
2828
import org.apache.baremaps.geoparquet.data.GeoParquetGroup.Field;
29+
import org.apache.baremaps.geoparquet.data.GeoParquetGroup.GroupField;
2930
import org.apache.baremaps.geoparquet.data.GeoParquetGroup.Schema;
3031

3132
public class GeoParquetTypeConversion {
3233

3334
private GeoParquetTypeConversion() {}
3435

3536
public static DataSchema asSchema(String table, Schema schema) {
36-
List<DataColumn> columns = schema.fields().stream()
37-
.map(field -> (DataColumn) new DataColumnImpl(field.name(), asSchema(field.type())))
38-
.toList();
37+
List<DataColumn> columns = asDataColumns(schema);
3938
return new DataSchemaImpl(table, columns);
4039
}
4140

42-
public static Type asSchema(GeoParquetGroup.Type type) {
43-
return switch (type) {
44-
case BINARY -> Type.BYTE_ARRAY;
45-
case BOOLEAN -> Type.BOOLEAN;
46-
case INTEGER -> Type.INTEGER;
47-
case INT96, LONG -> Type.LONG;
48-
case FLOAT -> Type.FLOAT;
49-
case DOUBLE -> Type.DOUBLE;
50-
case STRING -> Type.STRING;
51-
case GEOMETRY -> Type.GEOMETRY;
52-
case GROUP -> null;
41+
private static List<DataColumn> asDataColumns(Schema field) {
42+
return field.fields().stream()
43+
.map(GeoParquetTypeConversion::asDataColumn)
44+
.toList();
45+
}
46+
47+
private static DataColumn asDataColumn(Field field) {
48+
Cardinality cardinality = switch (field.cardinality()) {
49+
case REQUIRED -> Cardinality.REQUIRED;
50+
case OPTIONAL -> Cardinality.OPTIONAL;
51+
case REPEATED -> Cardinality.REPEATED;
52+
};
53+
return switch (field.type()) {
54+
case BINARY -> new DataColumnFixed(field.name(), cardinality, Type.BINARY);
55+
case BOOLEAN -> new DataColumnFixed(field.name(), cardinality, Type.BOOLEAN);
56+
case INTEGER -> new DataColumnFixed(field.name(), cardinality, Type.INTEGER);
57+
case INT96, LONG -> new DataColumnFixed(field.name(), cardinality, Type.LONG);
58+
case FLOAT -> new DataColumnFixed(field.name(), cardinality, Type.FLOAT);
59+
case DOUBLE -> new DataColumnFixed(field.name(), cardinality, Type.DOUBLE);
60+
case STRING -> new DataColumnFixed(field.name(), cardinality, Type.STRING);
61+
case GEOMETRY -> new DataColumnFixed(field.name(), cardinality, Type.GEOMETRY);
62+
case ENVELOPE -> new DataColumnFixed(field.name(), cardinality, Type.ENVELOPE);
63+
case GROUP -> new DataColumnNested(field.name(), cardinality,
64+
asDataColumns(((GroupField) field).schema()));
5365
};
5466
}
5567

@@ -59,7 +71,6 @@ public static List<Object> asRowValues(GeoParquetGroup group) {
5971
List<Field> fields = schema.fields();
6072
for (int i = 0; i < fields.size(); i++) {
6173
Field field = fields.get(i);
62-
field.type();
6374
switch (field.type()) {
6475
case BINARY -> values.add(group.getBinaryValue(i).getBytes());
6576
case BOOLEAN -> values.add(group.getBooleanValue(i));
@@ -69,9 +80,33 @@ public static List<Object> asRowValues(GeoParquetGroup group) {
6980
case DOUBLE -> values.add(group.getDoubleValue(i));
7081
case STRING -> values.add(group.getStringValue(i));
7182
case GEOMETRY -> values.add(group.getGeometryValue(i));
72-
case GROUP -> values.add(null); // TODO: values.add(asDataRow(group.getGroupValue(i)));
83+
case ENVELOPE -> values.add(group.getEnvelopeValue(i));
84+
case GROUP -> values.add(asNested(group.getGroupValue(i)));
7385
}
7486
}
7587
return values;
7688
}
89+
90+
public static Map<String, Object> asNested(GeoParquetGroup group) {
91+
Map<String, Object> nested = new HashMap<>();
92+
Schema schema = group.getSchema();
93+
List<Field> fields = schema.fields();
94+
for (int i = 0; i < fields.size(); i++) {
95+
Field field = fields.get(i);
96+
nested.put(field.name(), switch (field.type()) {
97+
case BINARY -> group.getBinaryValue(i).getBytes();
98+
case BOOLEAN -> group.getBooleanValue(i);
99+
case INTEGER -> group.getIntegerValue(i);
100+
case INT96, LONG -> group.getLongValue(i);
101+
case FLOAT -> group.getFloatValue(i);
102+
case DOUBLE -> group.getDoubleValue(i);
103+
case STRING -> group.getStringValue(i);
104+
case GEOMETRY -> group.getGeometryValue(i);
105+
case ENVELOPE -> group.getEnvelopeValue(i);
106+
case GROUP -> asNested(group.getGroupValue(i));
107+
});
108+
}
109+
return nested;
110+
}
111+
77112
}

0 commit comments

Comments
 (0)