Skip to content

Commit 9d17693

Browse files
committed
EP-3981 #114 add more DriverVectorCube tests
1 parent 1b0902e commit 9d17693

File tree

3 files changed

+128
-18
lines changed

3 files changed

+128
-18
lines changed

openeo_driver/datacube.py

+26-11
Original file line numberDiff line numberDiff line change
@@ -154,8 +154,18 @@ class DriverVectorCube:
154154
These components are "joined" on the GeoPandas dataframe's index and DataArray first dimension
155155
"""
156156
DIM_GEOMETRIES = "geometries"
157-
158-
def __init__(self, geometries: gpd.GeoDataFrame, cube: Optional[xarray.DataArray] = None):
157+
FLATTEN_PREFIX = "vc"
158+
159+
def __init__(
160+
self, geometries: gpd.GeoDataFrame, cube: Optional[xarray.DataArray] = None,
161+
flatten_prefix: str = FLATTEN_PREFIX
162+
):
163+
"""
164+
165+
:param geometries:
166+
:param cube:
167+
:param flatten_prefix: prefix for column/field/property names when flattening the cube
168+
"""
159169
# TODO #114 EP-3981: lazy loading (like DelayedVector)?
160170
if cube is not None:
161171
if cube.dims[0] != self.DIM_GEOMETRIES:
@@ -166,11 +176,12 @@ def __init__(self, geometries: gpd.GeoDataFrame, cube: Optional[xarray.DataArray
166176
raise VectorCubeError("Incompatible vector cube components")
167177
self._geometries = geometries
168178
self._cube = cube
179+
self._flatten_prefix = flatten_prefix
169180

170-
def with_cube(self, cube: xarray.DataArray) -> "DriverVectorCube":
181+
def with_cube(self, cube: xarray.DataArray, flatten_prefix: str = FLATTEN_PREFIX) -> "DriverVectorCube":
171182
"""Create new vector cube with same geometries but new cube"""
172183
log.info(f"Creating vector cube with new cube {cube.name!r}")
173-
return DriverVectorCube(geometries=self._geometries, cube=cube)
184+
return DriverVectorCube(geometries=self._geometries, cube=cube, flatten_prefix=flatten_prefix)
174185

175186
@classmethod
176187
def from_fiona(cls, paths: List[str], driver: str, options: dict):
@@ -189,23 +200,24 @@ def _as_geopandas_df(self) -> gpd.GeoDataFrame:
189200
assert self._cube.dims[0] == self.DIM_GEOMETRIES
190201
# TODO: better way to combine cube with geometries
191202
# Flatten multiple (non-geometry) dimensions from cube to new properties in geopandas dataframe
192-
prefix = self._cube.attrs.get("prefix", "cube")
193203
if self._cube.dims[1:]:
194204
stacked = self._cube.stack(prop=self._cube.dims[1:])
195205
log.info(f"Flattened cube component of vector cube to {stacked.shape[1]} properties")
196206
for p in stacked.indexes["prop"]:
197-
name = "~".join(str(x) for x in [prefix] + list(p))
207+
name = "~".join(str(x) for x in [self._flatten_prefix] + list(p))
198208
# TODO: avoid column collisions?
199209
df[name] = stacked.sel(prop=p)
200210
else:
201-
df[prefix] = self._cube
211+
df[self._flatten_prefix] = self._cube
202212

203213
return df
204214

205215
def to_geojson(self):
206216
return shapely.geometry.mapping(self._as_geopandas_df())
207217

208-
def write_assets(self, directory: Union[str, Path], format: str, options: Optional[dict] = None) -> Dict[str, StacAsset]:
218+
def write_assets(
219+
self, directory: Union[str, Path], format: str, options: Optional[dict] = None
220+
) -> Dict[str, StacAsset]:
209221
directory = ensure_dir(directory)
210222
format_info = IOFORMATS.get(format)
211223
# TODO: check if format can be used for vector data?
@@ -243,13 +255,16 @@ def to_multipolygon(self) -> shapely.geometry.MultiPolygon:
243255
return shapely.ops.unary_union(self._geometries.geometry)
244256

245257
def get_bounding_box(self) -> Tuple[float, float, float, float]:
246-
return self._geometries.total_bounds
258+
return tuple(self._geometries.total_bounds)
247259

248260
def get_geometries(self) -> Sequence[shapely.geometry.base.BaseGeometry]:
249261
return self._geometries.geometry
250262

251-
def get_geometries_index(self) -> pd.Index:
252-
return self._geometries.index
263+
def get_xarray_cube_basics(self) -> Tuple[tuple, dict]:
264+
"""Get initial dims/coords for xarray DataArray construction"""
265+
dims = (self.DIM_GEOMETRIES,)
266+
coords = {self.DIM_GEOMETRIES: self._geometries.index.to_list()}
267+
return dims, coords
253268

254269

255270
class DriverMlModel:

openeo_driver/dummy/dummy_backend.py

+3-7
Original file line numberDiff line numberDiff line change
@@ -222,9 +222,7 @@ def assert_polygon_sequence(geometries: Union[Sequence, BaseMultipartGeometry]):
222222
# TODO #114 EP-3981 normalize to vector cube and preserve original properties
223223
if isinstance(geometries, DriverVectorCube):
224224
# Build dummy aggregation data cube
225-
dims = (DriverVectorCube.DIM_GEOMETRIES,)
226-
# TODO: use something else than the geopandas dataframe's index?
227-
coords = {DriverVectorCube.DIM_GEOMETRIES: geometries.get_geometries_index().to_list()}
225+
dims, coords = geometries.get_xarray_cube_basics()
228226
if self.metadata.has_temporal_dimension():
229227
dims += (self.metadata.temporal_dimension.name,)
230228
coords[self.metadata.temporal_dimension.name] = ["2015-07-06T00:00:00", "2015-08-22T00:00:00"]
@@ -233,10 +231,8 @@ def assert_polygon_sequence(geometries: Union[Sequence, BaseMultipartGeometry]):
233231
coords[self.metadata.band_dimension.name] = self.metadata.band_names
234232
shape = [len(coords[d]) for d in dims]
235233
data = numpy.arange(numpy.prod(shape)).reshape(shape)
236-
cube = xarray.DataArray(
237-
data=data, dims=dims, coords=coords, name="aggregate_spatial", attrs={"prefix": "agg"}
238-
)
239-
return geometries.with_cube(cube=cube)
234+
cube = xarray.DataArray(data=data, dims=dims, coords=coords, name="aggregate_spatial")
235+
return geometries.with_cube(cube=cube, flatten_prefix="agg")
240236
elif isinstance(geometries, str):
241237
geometries = [geometry for geometry in DelayedVector(geometries).geometries]
242238
assert_polygon_sequence(geometries)

tests/test_vectorcube.py

+99
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
import geopandas as gpd
2+
import pytest
3+
import xarray
4+
from shapely.geometry import Polygon, MultiPolygon
5+
6+
from openeo_driver.datacube import DriverVectorCube
7+
from openeo_driver.testing import DictSubSet
8+
from .data import get_path
9+
10+
11+
class TestDriverVectorCube:
12+
13+
@pytest.fixture
14+
def gdf(self) -> gpd.GeoDataFrame:
15+
"""Fixture for a simple GeoPandas DataFrame from file"""
16+
path = str(get_path("geojson/FeatureCollection02.json"))
17+
df = gpd.read_file(path)
18+
return df
19+
20+
def test_basic(self, gdf):
21+
vc = DriverVectorCube(gdf)
22+
assert vc.get_bounding_box() == (1, 1, 5, 4)
23+
24+
def test_to_multipolygon(self, gdf):
25+
vc = DriverVectorCube(gdf)
26+
mp = vc.to_multipolygon()
27+
assert isinstance(mp, MultiPolygon)
28+
assert len(mp) == 2
29+
assert mp.equals(MultiPolygon([
30+
Polygon([(1, 1), (2, 3), (3, 1), (1, 1)]),
31+
Polygon([(4, 2), (5, 4), (3, 4), (4, 2)]),
32+
]))
33+
34+
def test_get_geometries(self, gdf):
35+
vc = DriverVectorCube(gdf)
36+
geometries = vc.get_geometries()
37+
assert len(geometries) == 2
38+
expected_geometries = [
39+
Polygon([(1, 1), (2, 3), (3, 1), (1, 1)]),
40+
Polygon([(4, 2), (5, 4), (3, 4), (4, 2)]),
41+
]
42+
for geometry, expected in zip(geometries, expected_geometries):
43+
assert geometry.equals(expected)
44+
45+
def test_to_geojson(self, gdf):
46+
vc = DriverVectorCube(gdf)
47+
assert vc.to_geojson() == DictSubSet({
48+
"type": "FeatureCollection",
49+
"features": [
50+
DictSubSet({
51+
"type": "Feature",
52+
"geometry": {"type": "Polygon", "coordinates": (((1, 1), (3, 1), (2, 3), (1, 1)),)},
53+
"properties": {"id": "first", "pop": 1234},
54+
}),
55+
DictSubSet({
56+
"type": "Feature",
57+
"geometry": {"type": "Polygon", "coordinates": (((4, 2), (5, 4), (3, 4), (4, 2)),)},
58+
"properties": {"id": "second", "pop": 5678},
59+
}),
60+
]
61+
})
62+
63+
def test_with_cube_to_geojson(self, gdf):
64+
vc1 = DriverVectorCube(gdf)
65+
dims, coords = vc1.get_xarray_cube_basics()
66+
dims += ("bands",)
67+
coords["bands"] = ["red", "green"]
68+
cube = xarray.DataArray(data=[[1, 2], [3, 4]], dims=dims, coords=coords)
69+
vc2 = vc1.with_cube(cube, flatten_prefix="bandz")
70+
assert vc1.to_geojson() == DictSubSet({
71+
"type": "FeatureCollection",
72+
"features": [
73+
DictSubSet({
74+
"type": "Feature",
75+
"geometry": {"type": "Polygon", "coordinates": (((1, 1), (3, 1), (2, 3), (1, 1)),)},
76+
"properties": {"id": "first", "pop": 1234},
77+
}),
78+
DictSubSet({
79+
"type": "Feature",
80+
"geometry": {"type": "Polygon", "coordinates": (((4, 2), (5, 4), (3, 4), (4, 2)),)},
81+
"properties": {"id": "second", "pop": 5678},
82+
}),
83+
]
84+
})
85+
assert vc2.to_geojson() == DictSubSet({
86+
"type": "FeatureCollection",
87+
"features": [
88+
DictSubSet({
89+
"type": "Feature",
90+
"geometry": {"type": "Polygon", "coordinates": (((1, 1), (3, 1), (2, 3), (1, 1)),)},
91+
"properties": {"id": "first", "pop": 1234, "bandz~red": 1, "bandz~green": 2},
92+
}),
93+
DictSubSet({
94+
"type": "Feature",
95+
"geometry": {"type": "Polygon", "coordinates": (((4, 2), (5, 4), (3, 4), (4, 2)),)},
96+
"properties": {"id": "second", "pop": 5678, "bandz~red": 3, "bandz~green": 4},
97+
}),
98+
]
99+
})

0 commit comments

Comments
 (0)