Skip to content

Commit ec146af

Browse files
committed
Tests and fleshing it all out
1 parent 1d0d153 commit ec146af

File tree

8 files changed

+124
-90
lines changed

8 files changed

+124
-90
lines changed

Dockerfile

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11

2-
# Create image with docker buildx build -t datacube-vectoriser .
2+
# Create image with docker buildx build -t dea-vectoriser .
33

44
# Thankyou https://uwekorn.com/2021/03/01/deploying-conda-environments-in-docker-how-to-do-it-right.html
55

@@ -21,4 +21,4 @@ RUN --mount=type=cache,target=/opt/conda/pkgs micromamba install -y -n base -f /
2121
find /opt/conda/ -name '__pycache__' -type d -exec rm -rf '{}' '+'
2222

2323

24-
COPY . .
24+
COPY . .

datacube_vectoriser/utils.py

+32-7
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,17 @@
1-
import json
2-
import os
31
from concurrent import futures
42

53
import boto3
6-
from toolz import dicttoolz
4+
import json
5+
import os
6+
from pathlib import PurePosixPath
7+
from toolz import dicttoolz, get_in
8+
from typing import Tuple, Optional
9+
from urllib.parse import urlparse
710

811

912
def _stac_to_sns(sns_arn, stac):
1013
"""
11-
Publish our STAC document to an SNS
14+
Publish a STAC document to an SNS
1215
"""
1316
bbox = stac["bbox"]
1417

@@ -41,6 +44,7 @@ def _stac_to_sns(sns_arn, stac):
4144

4245

4346
def upload_directory(directory, bucket, prefix, boto3_session: boto3.Session = None):
47+
"""Recursively upload a directory to an s3 bucket"""
4448
if boto3_session is None:
4549
boto3_session = boto3.Session()
4650
s3 = boto3_session.client("s3")
@@ -71,7 +75,9 @@ def upload_file(filename):
7175
except Exception as e:
7276
print("Exception {} encountered while uploading file {}".format(e, upload_task[task]))
7377

78+
7479
def receive_messages(queue_url):
80+
"""Yield SQS Messages until the queue is empty"""
7581
sqs = boto3.resource('sqs')
7682
queue = sqs.Queue(queue_url)
7783

@@ -80,13 +86,32 @@ def receive_messages(queue_url):
8086

8187
while len(messages) > 0:
8288
for message in messages:
83-
# body = json.loads(message.body)
8489
yield message
8590

86-
# message.delete()
87-
8891
messages = queue.receive_messages(MaxNumberOfMessages=1, )
8992

93+
94+
def geotiff_url_from_stac(stac_document) -> Optional[str]:
95+
return get_in(['assets', 'water', 'href'], stac_document)
96+
97+
98+
def output_name_from_url(src_url, file_suffix) -> Tuple[PurePosixPath, str]:
99+
"""Derive the output directory structure and filename from the input URL"""
100+
o = urlparse(src_url)
101+
path = PurePosixPath(o.path)
102+
103+
relative_path = PurePosixPath(*path.parts[-6:-1])
104+
105+
filename = path.with_suffix(file_suffix).name
106+
107+
# parts
108+
# Out[6]: ['097', '075', '1998', '08', '17']
109+
# filename
110+
# Out[7]: 'ga_ls_wo_3_097075_1998-08-17_final_water.tif'
111+
112+
return relative_path, filename
113+
114+
90115
def chain_funcs(arg, *funcs):
91116
result = arg
92117
for f in funcs:

datacube_vectoriser/vector_wos.py

+39-69
Original file line numberDiff line numberDiff line change
@@ -6,81 +6,83 @@
66
# - C) conduct 1 pixle buffer of no-data class? (unsure if should be latter in workflow)
77
# 3. vectorise
88
# 4. simplify shapes to remove complixity
9-
# 5. join both data types back together as one Geopandas Geodataframe (container for sapely objects with projection imformation)
9+
# 5. join both data types back together as one Geopandas Geodataframe (container for shapely objects with projection
10+
# information)
1011
# 6. export an a single shapefile with attributes intact.
1112

12-
import fiona
13-
# Derived from https://github.com/GeoscienceAustralia/dea-notebooks/blob/KooieCate/vector_WOs_draft4.py
1413
import geopandas
1514
import geopandas as gp
1615
import pandas as pd
1716
import rasterio.features
1817
import xarray as xr
1918
from fiona.crs import from_epsg
2019
from scipy import ndimage
21-
from shapely.geometry import shape, mapping
20+
from shapely.geometry import shape
21+
# Derived from https://github.com/GeoscienceAustralia/dea-notebooks/blob/KooieCate/vector_WOs_draft4.py
22+
from typing import Tuple
2223

2324

24-
def load_data(url):
25-
# Open geotiff and reformat to Xarray DataArray
26-
geotiff_wos = xr.open_rasterio(url) # 'ga_s2am_wo_0-0-1_49JGN_2021-02-08_nrt_water.tif')
25+
def load_wos_data(url) -> xr.Dataset:
26+
"""Open a GeoTIFF info an in memory DataArray """
27+
geotiff_wos = xr.open_rasterio(url)
2728
wos_dataset = geotiff_wos.to_dataset('band')
2829
wos_dataset = wos_dataset.rename({1: 'wo'})
2930
return wos_dataset
3031

3132

32-
def generate_raster_layers(wos_dataset):
33+
def generate_raster_layers(wos_dataset: xr.Dataset) -> Tuple[xr.DataArray, xr.DataArray]:
3334
# Defining the three 'classes':
3435
# a) Water: where water is observed. Bit value 128
35-
# b) unspoken 'dry'. this is not vectorised and is left and transparent layer. bit values: 1 (no data) 2 (Contiguity)
36-
# c) Not_analysed: every masking applied to the data except terrain shadow. bit values: composed of Everyting else,
36+
# b) unspoken 'dry'. this is not vectorised and is left as a transparent layer. bit values: 1 (no data) 2 (
37+
# Contiguity)
38+
# c) Not_analysed: every masking applied to the data except terrain shadow. bit values: composed of everything else,
3739
# 1 create binary arrays for two classes of interest
3840
water_vals = (wos_dataset.wo == 128) # water only has 128 water observations
39-
# here we used reversed logic to turn all pixles that should be 'not analysed' to a value of 3. is is easier to list the 4 classes that are passed to the unlabled 'dry' class
41+
# here we used reversed logic to turn all pixels that should be 'not analysed' to a value of 3. is is easier to
42+
# list the 4 classes that are passed to the unlabled 'dry' class
4043
not_analysed = wos_dataset.wo.where(((wos_dataset.wo == 0) | (wos_dataset.wo == 1) | (wos_dataset.wo == 8)
4144
| (wos_dataset.wo == 2) | (wos_dataset.wo == 128) | (wos_dataset.wo == 130) | (
4245
wos_dataset.wo == 142)), 3)
43-
not_analysed = not_analysed.where((not_analysed == 3), 0) # now keep the 3 values and make everyting else 0
44-
# 2 conduct binary errosion and closing to remove single pixles
46+
not_analysed = not_analysed.where((not_analysed == 3), 0) # now keep the 3 values and make everything else 0
47+
# 2 conduct binary erosion and closing to remove single pixels
4548
erroded_water = xr.DataArray(ndimage.binary_erosion(water_vals, iterations=2).astype(water_vals.dtype),
4649
coords=water_vals.coords)
4750
erroded_not_analysed = xr.DataArray(ndimage.binary_erosion(not_analysed, iterations=2).astype(not_analysed.dtype),
4851
coords=not_analysed.coords)
49-
# dialating cloud 3 times after erroding 2, to create small overlap and iliminate gaps in data
52+
# dilating cloud 3 times after eroding 2, to create small overlap and illuminate gaps in data
5053
dilated_water = xr.DataArray(ndimage.binary_dilation(erroded_water, iterations=3).astype(water_vals.dtype),
5154
coords=water_vals.coords)
5255
dilated_not_analysed = xr.DataArray(
53-
ndimage.binary_dilation(erroded_not_analysed, iterations=(3)).astype(not_analysed.dtype),
56+
ndimage.binary_dilation(erroded_not_analysed, iterations=3).astype(not_analysed.dtype),
5457
coords=not_analysed.coords)
5558

5659
return dilated_water, dilated_not_analysed
5760

5861

59-
def vectorise_data(xarrayDataArray, transform, crs, label='Label'):
62+
def vectorise_data(data_array: xr.DataArray, transform, crs, label='Label'):
6063
"""this module takes an Xarray DataArray and vectorises it as shapely geometries in a Geopandas Geodataframe
6164
6265
Input
63-
xarrayDataArray: an Xarray DataArray with boolean values (1,0) with 1 or True equal to the areas that will be turned into vectors
64-
Label: default 'Label', String, the data label that will be added to each geometry in geodataframe
66+
data_array: a DataArray with boolean values (1,0) with 1 or True equal to the areas that will be turned
67+
into vectors
68+
label: default 'Label', String, the data label that will be added to each geometry in geodataframe
6569
6670
output
67-
Geodataframe containing shapely geometies with data type lable in a series called attribute"""
71+
Geodataframe containing shapely geometries with data type label in a series called attribute"""
6872

6973
vector = rasterio.features.shapes(
70-
xarrayDataArray.data.astype('float32'),
71-
mask=xarrayDataArray.data.astype('float32') == 1, # this defines which part of array becomes polygons
74+
data_array.data.astype('float32'),
75+
mask=data_array.data.astype('float32') == 1, # this defines which part of array becomes polygons
7276
transform=transform)
7377

74-
# rasterio.features.shapes outputs tupples. we only want the polygon coordinate portions of the tupples
75-
vectored_data = list(vector) # put tupple output in list
78+
# rasterio.features.shapes outputs tuples. we only want the polygon coordinate portions of the tuples
79+
vectored_data = list(vector) # put tuple output in list
7680

7781
# Extract the polygon coordinates from the list
7882
polygons = [polygon for polygon, value in vectored_data]
79-
# create empty list for lables
80-
labels = []
81-
# put in labels
82-
for i in polygons:
83-
labels.append(label) # create a list with the data label type
83+
84+
# create a list with the data label type
85+
labels = [label for _ in polygons]
8486

8587
# Convert polygon coordinates into polygon shapes
8688
polygons = [shape(polygon) for polygon in polygons]
@@ -93,7 +95,7 @@ def vectorise_data(xarrayDataArray, transform, crs, label='Label'):
9395

9496

9597
def vectorise_wos_from_url(url) -> geopandas.GeoDataFrame:
96-
raster = load_data(url)
98+
raster = load_wos_data(url)
9799

98100
dataset_crs = from_epsg(raster.crs[11:])
99101
dataset_transform = raster.transform
@@ -102,26 +104,25 @@ def vectorise_wos_from_url(url) -> geopandas.GeoDataFrame:
102104
dilated_water, dilated_not_analysed = generate_raster_layers(raster)
103105

104106
# vectorise the arrays
105-
106107
notAnalysedGPD = vectorise_data(dilated_not_analysed, dataset_transform, dataset_crs, label='Not_analysed')
107108

108109
WaterGPD = vectorise_data(dilated_water, dataset_transform, dataset_crs, label='Water')
109110

110111
# Simplify
111112

112-
# Run simplification with 15 tollerance
113-
simplifyed_water = WaterGPD.simplify(10)
113+
# Run simplification with 15 tolerance
114+
simplified_water = WaterGPD.simplify(10)
114115

115-
simplifyed_notAnalysed = notAnalysedGPD.simplify(15)
116+
simplified_not_analysed = notAnalysedGPD.simplify(15)
116117

117118
# Put simplified shapes in a dataframe
118-
simple_waterGPD = gp.GeoDataFrame(geometry=simplifyed_water,
119+
simple_waterGPD = gp.GeoDataFrame(geometry=simplified_water,
119120
crs=dataset_crs)
120121

121-
simple_notAnalysedGPD = gp.GeoDataFrame(geometry=simplifyed_notAnalysed,
122+
simple_notAnalysedGPD = gp.GeoDataFrame(geometry=simplified_not_analysed,
122123
crs=dataset_crs)
123124

124-
# add attribute lables back in
125+
# add attribute labels back in
125126
simple_waterGPD['attribute'] = WaterGPD['attribute']
126127

127128
simple_notAnalysedGPD['attribute'] = notAnalysedGPD['attribute']
@@ -132,38 +133,7 @@ def vectorise_wos_from_url(url) -> geopandas.GeoDataFrame:
132133

133134
# 6 Join together and save to file
134135

135-
All_classes = gp.GeoDataFrame(pd.concat([simple_waterGPD, simple_notAnalysedGPD], ignore_index=True),
136+
all_classes = gp.GeoDataFrame(pd.concat([simple_waterGPD, simple_notAnalysedGPD], ignore_index=True),
136137
crs=simple_notAnalysedGPD.crs)
137138

138-
return All_classes
139-
140-
141-
def save_to_file(gpd_dataframe):
142-
# +
143-
# #define output file name to save vectors as
144-
outFile = 'Test_simplify/WO_vectors_test_clean'
145-
146-
# # Save the polygons to a shapefile
147-
schema = {
148-
'geometry': 'Polygon',
149-
'properties': {
150-
'attribute': 'str'
151-
}
152-
}
153-
154-
# # Generate our dynamic filename
155-
FileName = f'{outFile}.shp'
156-
157-
# #create file and save
158-
with fiona.open(FileName,
159-
"w",
160-
crs=from_epsg(3577),
161-
driver='ESRI Shapefile',
162-
schema=schema) as output:
163-
for ix, poly in gpd_dataframe.iterrows():
164-
output.write(({
165-
'properties': {
166-
'attribute': poly['attribute']
167-
},
168-
'geometry': mapping(shape(poly['geometry']))
169-
}))
139+
return all_classes

environment.yaml

+1-2
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ channels:
33
- conda-forge
44
dependencies:
55
- python=3.9
6-
- datacube
76
- boto3
87
- scipy
98
- rasterio
@@ -15,4 +14,4 @@ dependencies:
1514
- xarray
1615
- toolz
1716
- pip:
18-
- boto3_fixtures
17+
- boto3_fixtures

setup.cfg

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
[flake8]
2+
max-line-length = 120

tests/conftest.py

+24-3
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,32 @@
1-
import fixtures
21
import boto3_fixtures as b3f
2+
import pytest
3+
from pathlib import Path
4+
from urllib.request import urlretrieve
5+
6+
import fixtures
37

48
aws = b3f.contrib.pytest.moto_fixture(
5-
services=["sqs", "s3", "sns"],
6-
scope="class",
9+
services=["sqs", "s3", "sns"],
10+
scope="class",
711
)
812

913
sqs = b3f.contrib.pytest.service_fixture("sqs", scope="class", queues=fixtures.SQS)
1014
s3 = b3f.contrib.pytest.service_fixture("s3", scope="class", buckets=fixtures.S3)
1115
sns = b3f.contrib.pytest.service_fixture("sns", scope="class", topics=fixtures.SNS)
16+
17+
SAMPLE_DATA = [
18+
("sample_raster.tif", "https://data.dea.ga.gov.au/derivative/ga_ls_wo_3/1-6-0/097/075/1998/08/17/"
19+
"ga_ls_wo_3_097075_1998-08-17_final_water.tif"),
20+
("sample_stac.json", "https://data.dea.ga.gov.au/derivative/ga_ls_wo_3/1-6-0/097/075/1998/08/17/"
21+
"ga_ls_wo_3_097075_1998-08-17_final.stac-item.json")
22+
]
23+
24+
25+
@pytest.fixture
26+
def sample_data(pytestconfig):
27+
data_dir = Path(pytestconfig.cache.makedir('vect_data'))
28+
for dest, src in SAMPLE_DATA:
29+
destination_file = data_dir / dest
30+
if not destination_file.exists():
31+
urlretrieve(src, destination_file)
32+
return data_dir

tests/fixtures.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,4 @@
1313
"DisplayName": "YourSystemIsOnFireTopic",
1414
},
1515
}
16-
]
16+
]

0 commit comments

Comments
 (0)