1
- """Handle GeoArrow tables with WKB-encoded geometry
2
- """
1
+ """Handle GeoArrow tables with WKB-encoded geometry"""
2
+
3
+ import json
3
4
from typing import Tuple
4
5
5
6
import pyarrow as pa
6
7
import shapely
7
8
8
- from lonboard ._constants import EXTENSION_NAME
9
+ from lonboard ._constants import EXTENSION_NAME , OGC_84
9
10
from lonboard ._geoarrow .crs import get_field_crs
10
11
from lonboard ._geoarrow .extension_types import construct_geometry_array
12
+ from lonboard ._utils import get_geometry_column_index
11
13
12
14
13
15
def parse_wkb_table (table : pa .Table ) -> pa .Table :
@@ -16,6 +18,8 @@ def parse_wkb_table(table: pa.Table) -> pa.Table:
16
18
If no columns are WKB-encoded, returns the input. Note that WKB columns must be
17
19
tagged with an extension name of `geoarrow.wkb` or `ogc.wkb`
18
20
"""
21
+ table = parse_geoparquet_table (table )
22
+
19
23
wkb_names = {EXTENSION_NAME .WKB , EXTENSION_NAME .OGC_WKB }
20
24
for field_idx in range (len (table .schema )):
21
25
field = table .field (field_idx )
@@ -32,6 +36,43 @@ def parse_wkb_table(table: pa.Table) -> pa.Table:
32
36
return table
33
37
34
38
39
+ def parse_geoparquet_table (table : pa .Table ) -> pa .Table :
40
+ """Parse GeoParquet table metadata, assigning it to GeoArrow metadata"""
41
+ # If a column already has geoarrow metadata, don't parse from GeoParquet metadata
42
+ if get_geometry_column_index (table .schema ) is not None :
43
+ return table
44
+
45
+ schema_metadata = table .schema .metadata or {}
46
+ geo_metadata = schema_metadata .get (b"geo" )
47
+ if not geo_metadata :
48
+ return table
49
+
50
+ try :
51
+ geo_metadata = json .loads (geo_metadata )
52
+ except json .JSONDecodeError :
53
+ return table
54
+
55
+ primary_column = geo_metadata ["primary_column" ]
56
+ column_meta = geo_metadata ["columns" ][primary_column ]
57
+ column_idx = [
58
+ idx for idx , name in enumerate (table .column_names ) if name == primary_column
59
+ ]
60
+ assert len (column_idx ) == 1 , f"Expected one column with name { primary_column } "
61
+ column_idx = column_idx [0 ]
62
+ if column_meta ["encoding" ] == "WKB" :
63
+ existing_field = table .schema .field (column_idx )
64
+ existing_column = table .column (column_idx )
65
+ crs_metadata = {"crs" : column_meta .get ("crs" , OGC_84 .to_json_dict ())}
66
+ metadata = {
67
+ b"ARROW:extension:name" : EXTENSION_NAME .WKB ,
68
+ b"ARROW:extension:metadata" : json .dumps (crs_metadata ),
69
+ }
70
+ new_field = existing_field .with_metadata (metadata )
71
+ table = table .set_column (column_idx , new_field , existing_column )
72
+
73
+ return table
74
+
75
+
35
76
def parse_wkb_column (
36
77
field : pa .Field , column : pa .ChunkedArray
37
78
) -> Tuple [pa .Field , pa .ChunkedArray ]:
0 commit comments