Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@ repos:
# name: isort (python)
# args: [ "--profile", "black", "--filter-files" ]
- repo: https://github.com/psf/black
rev: 24.4.0
rev: 24.4.2
hooks:
- id: black
- repo: https://github.com/pre-commit/mirrors-pylint
rev: 'v3.0.0a5' # Use the sha / tag you want to point at
- repo: https://github.com/PyCQA/pylint
rev: 'v3.2.3' # Use the sha / tag you want to point at
hooks:
- id: pylint
- repo: https://github.com/PyCQA/flake8
Expand Down
7 changes: 4 additions & 3 deletions apps/cloud/odc/apps/cloud/azure_to_tar.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,10 @@ def cli(
url_prefix = (account_url + "/" + container_name + "/")[len("https://") :]

# jam it all in a tar
tar_opts = dict(
name=outfile, mode="w" + tar_mode(gzip=True, xz=True, is_pipe=False)
)
tar_opts = {
"name": outfile,
"mode": "w" + tar_mode(gzip=True, xz=True, is_pipe=False),
}
with tarfile.open(**tar_opts) as tar:
for yaml in yamls:
add_txt_file(tar=tar, content=yaml[0], fname=url_prefix + yaml[1])
Expand Down
4 changes: 3 additions & 1 deletion apps/cloud/odc/apps/cloud/s3_find.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,9 @@ def cli(uri, skip_check, no_sign_request=None, request_payer=False):
try:
stream = s3_find_glob(uri, skip_check=skip_check, s3=s3, **opts)
for i, o in enumerate(stream):
print(o.url, flush=(i % flush_freq == 0))
print(
o.url, flush=(i % flush_freq == 0)
) # pylint:disable=superfluous-parens
except ValueError as ve:
click.echo(str(ve), err=True)
sys.exit(1)
Expand Down
2 changes: 1 addition & 1 deletion apps/cloud/odc/apps/cloud/s3_to_tar.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def dump_to_tar(data_stream, tar):

fetcher = S3Fetcher(nconcurrent=nconnections, aws_unsigned=no_sign_request)
is_pipe = outfile == "-"
tar_opts = dict(mode="w" + tar_mode(gzip=gzip, xz=xz, is_pipe=is_pipe))
tar_opts = {"mode": "w" + tar_mode(gzip=gzip, xz=xz, is_pipe=is_pipe)}
if is_pipe:
if stdout.isatty():
click.echo("Will not write to a terminal", err=True)
Expand Down
7 changes: 4 additions & 3 deletions apps/cloud/odc/apps/cloud/thredds_to_tar.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,10 @@ def cli(thredds_catalogue, skips, select, workers, outfile):
yamls = download_yamls(urls, workers)

# jam it all in a tar
tar_opts = dict(
name=outfile, mode="w" + tar_mode(gzip=True, xz=True, is_pipe=False)
)
tar_opts = {
"name": outfile,
"mode": "w" + tar_mode(gzip=True, xz=True, is_pipe=False),
}
with tarfile.open(**tar_opts) as tar:
for yaml in yamls:
add_txt_file(tar=tar, content=yaml[0], fname=yaml[1])
Expand Down
61 changes: 29 additions & 32 deletions apps/dc_tools/odc/apps/dc_tools/_stac.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from eodatasets3.serialise import from_doc
from eodatasets3.stac import to_stac_item
from toolz import get_in
from urlpath import URL
from urllib.parse import urlparse

from ._docs import odc_uuid

Expand All @@ -29,6 +29,7 @@
"sentinel_s2_l2a_cogs",
"sentinel-s2-l2a-cogs",
"sentinel-2-l2a",
"s2_l2a_c1",
]

# Mapping between EO3 field names and STAC properties object field names
Expand Down Expand Up @@ -118,7 +119,10 @@ def _stac_product_lookup(
dataset_id = properties.get("sentinel:product_id") or properties.get(
"s2:granule_id", dataset_id
)
product_name = "s2_l2a"
if collection == "s2_l2a_c1":
product_name = "s2_l2a_c1"
else:
product_name = "s2_l2a"
if region_code is None:
# Let's try two options, and throw an exception if we still don't get it
try:
Expand Down Expand Up @@ -179,10 +183,27 @@ def _find_self_href(item: Document) -> str:
return self_uri[0]


def _get_relative_path(asset_href, self_link):
if self_link is None:
return asset_href
self_parts = urlparse(self_link)
href_parts = urlparse(asset_href)

if self_parts.netloc.split(".")[0] != href_parts.netloc.split(".")[0]:
# files are not stored with same hostname (e.g. different buckets)
# therefore use the absolute path
return asset_href

try:
return str(Path(href_parts.path).relative_to(Path(self_parts.path).parent))
except ValueError:
# if it's not relative, keep as an absolute link
return asset_href


def _get_stac_bands(
item: Document,
default_grid: str,
relative: bool = False,
proj_shape: Optional[str] = None,
proj_transform: Optional[str] = None,
) -> Tuple[Document, Document, Document]:
Expand All @@ -195,35 +216,16 @@ def _get_stac_bands(

assets = item.get("assets", {})

def _get_path(asset, force_relative=False):
path = URL(asset["href"])
if relative:
try:
if self_link is None:
raise ValueError
path = path.relative_to(URL(self_link).parent)
# Value error is raised if the path is not relative to the parent
# or if the self link cannot be found.
except ValueError:
# If the path is not relative to the parent force_relative
# is still used for data assets, due to a historical assumption.
# TODO: Implement rewrite_assets (like in stac_to_dc) in all
# tools so that this is no longer necessary.
if force_relative:
path = path.name
else:
pass

return str(path)

for asset_name, asset in assets.items():
image_types = ["jp2", "geotiff"]
# If something's not in image_types, make it an accessory
# include thumbnails in accessories
if not any(
t in asset.get("type", []) for t in image_types
) or "thumbnail" in asset.get("roles", []):
accessories[asset_name] = {"path": _get_path(asset)}
accessories[asset_name] = {
"path": _get_relative_path(asset["href"], self_link)
}
continue

# If transform specified here in the asset it should override
Expand All @@ -240,7 +242,7 @@ def _get_path(asset, force_relative=False):
"transform": transform,
}

path = _get_path(asset, force_relative=True)
path = _get_relative_path(asset["href"], self_link)
band_index = asset.get("band", None)

band_info = {"path": path}
Expand Down Expand Up @@ -275,10 +277,6 @@ def round_coords(c1, c2):
return None


def stac_transform_absolute(input_stac):
return stac_transform(input_stac, relative=False)


def _convert_value_to_eo3_type(key: str, value):
"""
Convert return type as per EO3 specification.
Expand Down Expand Up @@ -332,7 +330,7 @@ def _check_valid_uuid(uuid_string: str) -> bool:
return False


def stac_transform(input_stac: Document, relative: bool = True) -> Document:
def stac_transform(input_stac: Document) -> Document:
"""Takes in a raw STAC 1.0 dictionary and returns an ODC dictionary"""
# pylint: disable=too-many-locals

Expand Down Expand Up @@ -371,7 +369,6 @@ def stac_transform(input_stac: Document, relative: bool = True) -> Document:
bands, grids, accessories = _get_stac_bands(
input_stac,
default_grid,
relative=relative,
proj_shape=proj_shape,
proj_transform=proj_transform,
)
Expand Down
2 changes: 1 addition & 1 deletion apps/dc_tools/odc/apps/dc_tools/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.2.16"
__version__ = "0.2.18"
2 changes: 1 addition & 1 deletion apps/dc_tools/odc/apps/dc_tools/esa_worldcover_to_dc.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
"{algo}/{year}/map/ESA_WorldCover_10m_{year}_{algo}_{ns}{ew}_Map.tif"
)

map_version = dict(algo="v100", year="2020")
map_version = {"algo": "v100", "year": "2020"}


def _unpack_bbox(bounding_box: str) -> Tuple[int, int, int, int]:
Expand Down
14 changes: 7 additions & 7 deletions apps/dc_tools/odc/apps/dc_tools/index_from_tar.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,13 +126,13 @@ def cli(
if ignore_lineage:
auto_add_lineage = False

ds_resolve_args = dict(
products=product_names,
exclude_products=exclude_product_names,
fail_on_missing_lineage=not auto_add_lineage,
verify_lineage=verify_lineage,
skip_lineage=ignore_lineage,
)
ds_resolve_args = {
"products": product_names,
"exclude_products": exclude_product_names,
"fail_on_missing_lineage": not auto_add_lineage,
"verify_lineage": verify_lineage,
"skip_lineage": ignore_lineage,
}

allowed_changes = {(): allow_any}

Expand Down
20 changes: 7 additions & 13 deletions apps/dc_tools/odc/apps/dc_tools/s3_to_dc.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from datacube import Datacube
from datacube.index.hl import Doc2Dataset
from odc.apps.dc_tools._docs import parse_doc_stream
from odc.apps.dc_tools._stac import stac_transform, stac_transform_absolute
from odc.apps.dc_tools._stac import stac_transform
from odc.apps.dc_tools.utils import (
IndexingException,
SkippedException,
Expand All @@ -26,7 +26,6 @@
statsd_gauge_reporting,
statsd_setting,
transform_stac,
transform_stac_absolute,
update_flag,
update_if_exists_flag,
verify_lineage,
Expand Down Expand Up @@ -59,12 +58,15 @@ def dump_to_odc(
uris_docs = parse_doc_stream(
((doc.url, doc.data) for doc in document_stream),
on_error=doc_error,
transform=transform,
)

found_docs = False
for uri, metadata in uris_docs:
found_docs = True
stac_doc = None
if transform:
stac_doc = metadata
metadata = stac_transform(metadata)
try:
index_update_dataset(
metadata,
Expand All @@ -76,6 +78,7 @@ def dump_to_odc(
allow_unsafe=allow_unsafe,
archive_less_mature=archive_less_mature,
publish_action=publish_action,
stac_doc=stac_doc,
)
ds_added += 1
except IndexingException:
Expand Down Expand Up @@ -103,7 +106,6 @@ def dump_to_odc(
@fail_on_missing_lineage
@verify_lineage
@transform_stac
@transform_stac_absolute
@update_flag
@update_if_exists_flag
@allow_unsafe
Expand All @@ -121,7 +123,6 @@ def cli(
fail_on_missing_lineage,
verify_lineage,
stac,
absolute,
update,
update_if_exists,
allow_unsafe,
Expand Down Expand Up @@ -151,13 +152,6 @@ def cli(
datefmt="%m/%d/%Y %I:%M:%S",
)

transform = None
if stac:
if absolute:
transform = stac_transform_absolute
else:
transform = stac_transform

opts = {}
if request_payer:
opts["RequestPayer"] = "requester"
Expand Down Expand Up @@ -215,7 +209,7 @@ def cli(
skip_lineage=skip_lineage,
fail_on_missing_lineage=fail_on_missing_lineage,
verify_lineage=verify_lineage,
transform=transform,
transform=stac,
update=update,
update_if_exists=update_if_exists,
allow_unsafe=allow_unsafe,
Expand Down
Loading