Skip to content

Commit

Permalink
release(v0.5.2)
Browse files Browse the repository at this point in the history
 [0.5.2] - 2025-02-13
  • Loading branch information
nkarasiak authored Feb 13, 2025
2 parents 2242b44 + f2b7d52 commit 8d81e6c
Show file tree
Hide file tree
Showing 6 changed files with 76 additions and 32 deletions.
12 changes: 12 additions & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,18 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.5.2] - 2025-02-13

### Added

- New default method for duplicates detection using `proj:transform` instead
of bbox (huge gap). Change can be done using
`filter_duplicate_items(...,method='bbox')`.

### Fixed

- Issues with rescale when same datetime between products.

## [0.5.1] - 2025-02-12

### Added
Expand Down
2 changes: 1 addition & 1 deletion earthdaily/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

__all__ = ["options"]

__version__ = "0.5.1"
__version__ = "0.5.2"


def EarthDataStore(
Expand Down
7 changes: 6 additions & 1 deletion earthdaily/earthdatastore/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1118,6 +1118,7 @@ def datacube(
"eq": cross_calibration_collection
},
},
deduplicate_items=False,
)
except Warning:
raise Warning(
Expand Down Expand Up @@ -1166,7 +1167,10 @@ def datacube(
)
ds["time"] = ds.time.astype("M8[ns]")
if ds.time.size != ds_mask.time.size:
ds = _select_last_common_occurrences(ds, ds_mask)
raise NotImplementedError(
"Sensor and cloudmask don't havethe same time.\
{ds.time.size} for sensor and {ds_mask.time.size} for cloudmask."
)
ds_mask["time"] = ds["time"].time
ds_mask = cube_utils._match_xy_dims(ds_mask, ds)
ds = xr.merge((ds, ds_mask), compat="override")
Expand Down Expand Up @@ -1460,6 +1464,7 @@ def ag_cloud_mask_from_items(items):
collections=collections,
ids=ids_[items_start_idx : items_start_idx + step],
limit=step,
deduplicate_items=False,
**kwargs,
)
items_list.extend(list(items))
Expand Down
60 changes: 41 additions & 19 deletions earthdaily/earthdatastore/_filter_duplicate.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,28 +15,28 @@ class StacGroup:
----------
datetime : datetime
Reference datetime for the group
bbox : tuple[float, ...] | None
footprint : tuple[float, ...] | None
Bounding box coordinates as (west, south, east, north) or None
"""

datetime: datetime
bbox: Optional[Tuple[float, ...]]
footprint: Optional[Tuple[float, ...]]

def matches(
self,
other_dt: datetime,
other_bbox: Optional[Tuple[float, ...]],
other_footprint: Optional[Tuple[float, ...]],
threshold: timedelta,
) -> bool:
"""
Check if another datetime and bbox match this group within threshold.
Check if another datetime and footprint match this group within threshold.
Parameters
----------
other_dt : datetime
Datetime to compare against
other_bbox : tuple[float, ...] | None
Bounding box to compare against
other_fontprint : tuple[float, ...] | None
Proj:transform or Bounding box to compare against
threshold : timedelta
Maximum allowed time difference
Expand All @@ -45,7 +45,10 @@ def matches(
bool
True if the other datetime and bbox match within threshold
"""
return self.bbox == other_bbox and abs(self.datetime - other_dt) <= threshold
return (
self.footprint == other_footprint
and abs(self.datetime - other_dt) <= threshold
)


@lru_cache(maxsize=1024)
Expand All @@ -66,9 +69,11 @@ def _parse_datetime(dt_str: str) -> datetime:
return datetime.fromisoformat(dt_str.replace("Z", "+00:00"))


def _extract_item_metadata(item: Dict) -> Tuple[datetime, Optional[Tuple[float, ...]]]:
def _extract_item_metadata(
item: Dict, method="proj:transform"
) -> Tuple[datetime, Optional[Tuple[float, ...]]]:
"""
Extract datetime and bbox from a STAC item.
Extract datetime and footprint from a STAC item.
Parameters
----------
Expand All @@ -78,11 +83,18 @@ def _extract_item_metadata(item: Dict) -> Tuple[datetime, Optional[Tuple[float,
Returns
-------
tuple[datetime, tuple[float, ...] | None]
Tuple of (datetime, bbox)
Tuple of (datetime, footprint)
"""
dt = _parse_datetime(item["properties"]["datetime"])
bbox = tuple(item["bbox"]) if "bbox" in item else None
return dt, bbox
any_asset = item.get("assets", {}) != {}
if any_asset and method == "proj:transform":
_first_item = list(item["assets"].keys())[0]
footprint = tuple(
item["assets"][_first_item].get("proj:transform", item["bbox"])
)
else:
footprint = tuple(round(bbox, 6) for bbox in item["bbox"])
return dt, footprint


def _group_items(
Expand All @@ -106,12 +118,12 @@ def _group_items(
groups: Dict[StacGroup, List[Dict]] = {}

for item in items:
dt, bbox = _extract_item_metadata(item)
dt, footprint = _extract_item_metadata(item)

# Find matching group or create new one
matching_group = next(
(group for group in groups if group.matches(dt, bbox, time_threshold)),
StacGroup(datetime=dt, bbox=bbox),
(group for group in groups if group.matches(dt, footprint, time_threshold)),
StacGroup(datetime=dt, footprint=footprint),
)

groups.setdefault(matching_group, []).append(item)
Expand Down Expand Up @@ -148,7 +160,9 @@ def _select_latest_items(items: List[Dict]) -> List[Dict]:


def filter_duplicate_items(
items: ItemCollection, time_threshold: timedelta = timedelta(minutes=5)
items: ItemCollection,
time_threshold: timedelta = timedelta(minutes=5),
method="proj:transform",
) -> ItemCollection:
"""
Deduplicate STAC items based on spatial and temporal proximity.
Expand All @@ -164,12 +178,15 @@ def filter_duplicate_items(
ISO format acquisition timestamp
- properties.updated : str
ISO format update timestamp
- bbox : list[float], optional
Bounding box coordinates [west, south, east, north]
- footprint : list[float], optional
Bounding box coordinates [west, south, east, north] or proj:transform.
time_threshold : timedelta, optional
Maximum time difference to consider items as duplicates, by default 5 minutes
method : str
'proj:transform' or 'bbox' to compare against
Returns
-------
ItemCollection
Expand Down Expand Up @@ -216,5 +233,10 @@ def filter_duplicate_items(
for group_items in grouped_items.values()
for item in _select_latest_items(group_items)
]
logging.info(f"Deduplication removes {len(items)-len(deduplicated)} items.")
duplicates = len(items) - len(deduplicated)
if duplicates:
logging.info(f"Deduplication removes {duplicates} items.")

deduplicated = sorted(deduplicated, key=lambda x: x["properties"]["datetime"])

return ItemCollection(deduplicated)
13 changes: 9 additions & 4 deletions earthdaily/earthdatastore/cube_utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,12 @@ def _disable_known_datacube_warning():
message="invalid value encountered in cast",
module="dask.array.chunk",
)
warnings.filterwarnings(
"ignore", category=RuntimeWarning, message="All-NaN slice encountered"
)
warnings.filterwarnings(
"ignore", category=RuntimeWarning, message="Mean of empty slice"
)


def datacube(
Expand Down Expand Up @@ -385,7 +391,7 @@ def datacube(
json.dumps(ds.coords[coord].values[idx])
for idx in range(ds.coords[coord].size)
]

ds = ds.sortby(ds.time)
return ds


Expand Down Expand Up @@ -503,7 +509,7 @@ def rescale_assets_with_items(
# Track scaling parameters
scales.setdefault(ds_asset, {}).setdefault(scale, {}).setdefault(
offset, []
).append(time)
).append(idx)

# Apply rescaling
if scales:
Expand All @@ -512,8 +518,7 @@ def rescale_assets_with_items(
asset_scaled = []
for scale, offset_data in scale_data.items():
for offset, times in offset_data.items():
mask = np.isin(ds.time, times)
asset_scaled.append(ds[[asset]].isel(time=mask) * scale + offset)
asset_scaled.append(ds[[asset]].isel(time=times) * scale + offset)
scaled_assets.append(xr.concat(asset_scaled, dim="time"))

# Merge scaled assets
Expand Down
14 changes: 7 additions & 7 deletions examples/field_evolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,27 +28,27 @@
# Search for collection items for June 2022.
# where at least 50% of the field is clear according to the native cloudmask.

pivot_cube = eds.datacube(
datacube = eds.datacube(
"sentinel-2-l2a",
intersects=pivot,
datetime=["2022-06"],
datetime=["2022-06","2022-07"],
assets=["red", "green", "blue", "nir"],
mask_with="native",
clear_cover=50
)
pivot_cube.clear_percent.plot.scatter(x="time")
datacube.clear_percent.plot.scatter(x="time")

##############################################################################
# Add spectral indices using spyndex from earthdaily accessor
# ------------------------------------------------------------

pivot_cube = pivot_cube.ed.add_indices(['NDVI'])
datacube = datacube.ed.add_indices(['NDVI'])

##############################################################################
# Plots cube with SCL with at least 50% of clear data
# ----------------------------------------------------
pivot_cube = pivot_cube.load()
pivot_cube.ed.plot_rgb(col_wrap=4, vmin=0, vmax=.3)
datacube = datacube.load()
datacube.ed.plot_rgb(col_wrap=4, vmin=0, vmax=.3)
plt.title("Pivot evolution masked with native cloudmasks")
plt.show()

Expand All @@ -57,7 +57,7 @@
# Compute zonal stats for the pivot
# ----------------------------------------------------

zonal_stats = pivot_cube.ed.zonal_stats(pivot, ['mean','max','min'])
zonal_stats = datacube.ed.zonal_stats(pivot, ['mean','max','min'])

zonal_stats.isel(feature=0).to_array(dim="band").plot.line(
x="time", col="band", hue="zonal_statistics", col_wrap=3
Expand Down

0 comments on commit 8d81e6c

Please sign in to comment.