Skip to content

Recursive summary, add pydantic support to --recursive mode #265

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 7 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@ The format is (loosely) based on [Keep a Changelog](http://keepachangelog.com/)
## [Unreleased]

### Added
- Added detailed recursive validation summary showing validation counts by STAC object type (Catalog, Collection, etc.)
- Added validation duration timing that shows total processing time in a human-readable format
- Added support for pydantic validation in recursive mode with proper schema reporting

### Changed
- Standardized summary output formatting across all validation modes for consistency

## [v3.9.3] - 2025-06-28

Expand Down
157 changes: 122 additions & 35 deletions stac_validator/stac_validator.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,52 @@
import json
import sys
import time
from typing import Any, Dict, List, Optional, Tuple

import click # type: ignore

from .validate import StacValidate


def _print_summary(
title: str, valid_count: int, total_count: int, obj_type: str = "STAC objects"
) -> None:
"""Helper function to print a consistent summary line.

Args:
title (str): Title of the summary section
valid_count (int): Number of valid items
total_count (int): Total number of items
obj_type (str): Type of objects being counted (e.g., 'items', 'collections')
"""
click.secho()
click.secho(f"{title}:", bold=True)
if total_count > 0:
percentage = (valid_count / total_count) * 100
click.secho(
f" {obj_type.capitalize()} passed: {valid_count}/{total_count} ({percentage:.1f}%)"
)
else:
click.secho(f" No {obj_type} found to validate")


def format_duration(seconds: float) -> str:
"""Format duration in seconds to a human-readable string.

Args:
seconds (float): Duration in seconds

Returns:
str: Formatted duration string (e.g., '1m 23.45s' or '456.78ms')
"""
if seconds < 1.0:
return f"{seconds * 1000:.2f}ms"
minutes, seconds = divmod(seconds, 60)
if minutes > 0:
return f"{int(minutes)}m {seconds:.2f}s"
return f"{seconds:.2f}s"


def print_update_message(version: str) -> None:
"""Prints an update message for `stac-validator` based on the version of the
STAC file being validated.
Expand Down Expand Up @@ -36,33 +76,64 @@ def item_collection_summary(message: List[Dict[str, Any]]) -> None:
Returns:
None
"""
valid_count = 0
for item in message:
if "valid_stac" in item and item["valid_stac"] is True:
valid_count = valid_count + 1
click.secho()
click.secho("--item-collection summary", bold=True)
click.secho(f"items_validated: {len(message)}")
click.secho(f"valid_items: {valid_count}")
valid_count = sum(1 for item in message if item.get("valid_stac") is True)
_print_summary("-- Item Collection Summary", valid_count, len(message), "items")


def collections_summary(message: List[Dict[str, Any]]) -> None:
"""Prints a summary of the validation results for an item collection response.
"""Prints a summary of the validation results for a collections response.

Args:
message (List[Dict[str, Any]]): The validation results for the item collection.
message (List[Dict[str, Any]]): The validation results for the collections.

Returns:
None
"""
valid_count = 0
for collection in message:
if "valid_stac" in collection and collection["valid_stac"] is True:
valid_count = valid_count + 1
click.secho()
click.secho("--collections summary", bold=True)
click.secho(f"collections_validated: {len(message)}")
click.secho(f"valid_collections: {valid_count}")
valid_count = sum(1 for coll in message if coll.get("valid_stac") is True)
_print_summary("-- Collections Summary", valid_count, len(message), "collections")


def recursive_validation_summary(message: List[Dict[str, Any]]) -> None:
"""Prints a summary of the recursive validation results.

Args:
message (List[Dict[str, Any]]): The validation results from recursive validation.

Returns:
None
"""
# Count valid and total objects by type
type_counts = {}
total_valid = 0

for item in message:
if not isinstance(item, dict):
continue

obj_type = item.get("asset_type", "unknown").lower()
is_valid = item.get("valid_stac", False) is True

if obj_type not in type_counts:
type_counts[obj_type] = {"valid": 0, "total": 0}

type_counts[obj_type]["total"] += 1
if is_valid:
type_counts[obj_type]["valid"] += 1
total_valid += 1

# Print overall summary
_print_summary("-- Recursive Validation Summary", total_valid, len(message))

# Print breakdown by type if there are multiple types
if len(type_counts) > 1:
click.secho("\n Breakdown by type:")
for obj_type, counts in sorted(type_counts.items()):
percentage = (
(counts["valid"] / counts["total"]) * 100 if counts["total"] > 0 else 0
)
click.secho(
f" {obj_type.capitalize()}: {counts['valid']}/{counts['total']} ({percentage:.1f}%)"
)


@click.command()
Expand Down Expand Up @@ -182,15 +253,16 @@ def main(
log_file: str,
pydantic: bool,
verbose: bool = False,
) -> None:
):
"""Main function for the `stac-validator` command line tool. Validates a STAC file
against the STAC specification and prints the validation results to the console as JSON.

Args:
stac_file (str): Path to the STAC file to be validated.
collections (bool): Validate response from /collections endpoint.
item_collection (bool): Whether to validate item collection responses.
no_assets_urls (bool): Whether to open href links when validating assets (enabled by default).
no_assets_urls (bool): Whether to open href links when validating assets
(enabled by default).
headers (dict): HTTP headers to include in the requests.
pages (int): Maximum number of pages to validate via `item_collection`.
recursive (bool): Whether to recursively validate all related STAC objects.
Expand All @@ -215,11 +287,14 @@ def main(
SystemExit: Exits the program with a status code of 0 if the STAC file is valid,
or 1 if it is invalid.
"""
start_time = time.time()
valid = True

if schema_map == ():
schema_map_dict: Optional[Dict[str, str]] = None
else:
schema_map_dict = dict(schema_map)

stac = StacValidate(
stac_file=stac_file,
collections=collections,
Expand All @@ -241,25 +316,37 @@ def main(
pydantic=pydantic,
verbose=verbose,
)
if not item_collection and not collections:
valid = stac.run()
elif collections:
stac.validate_collections()
else:
stac.validate_item_collection()

message = stac.message
if "version" in message[0]:
print_update_message(message[0]["version"])
try:
if not item_collection and not collections:
valid = stac.run()
elif collections:
stac.validate_collections()
else:
stac.validate_item_collection()

message = stac.message
if "version" in message[0]:
print_update_message(message[0]["version"])

if no_output is False:
click.echo(json.dumps(message, indent=4))
if no_output is False:
click.echo(json.dumps(message, indent=4))

if item_collection:
item_collection_summary(message)
elif collections:
collections_summary(message)
# Print appropriate summary based on validation mode
if item_collection:
item_collection_summary(message)
elif collections:
collections_summary(message)
elif recursive:
recursive_validation_summary(message)

finally:
# Always print the duration, even if validation fails
duration = time.time() - start_time
click.secho(
f"\nValidation completed in {format_duration(duration)}", fg="green"
)
click.secho()
sys.exit(0 if valid else 1)


Expand Down
Loading