Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .github/scripts/resolve_run_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ def _github_actions_run_id(env: Mapping[str, str]) -> str:
return build_run_id(
github_run_id=env.get("GITHUB_RUN_ID", ""),
github_run_attempt=env.get("GITHUB_RUN_ATTEMPT", "1"),
github_sha=env.get("GITHUB_SHA", ""),
)


Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/local_area_promote.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ on:
workflow_dispatch:
inputs:
run_id:
description: 'Run ID to promote (e.g. usdata-gha123456-a1-abcdef12)'
description: 'Run ID to promote (e.g. usdata-gha123456-a1)'
required: true
type: string

Expand All @@ -22,7 +22,7 @@ jobs:
MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }}
MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }}
MODAL_ENVIRONMENT: main
US_DATA_MODAL_APP_PREFIX: policyengine-us-data-pub
US_DATA_MODAL_APP_PREFIX: us-data
US_DATA_RUN_ID: ${{ github.event.inputs.run_id }}

steps:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ jobs:
runs-on: ubuntu-latest
env:
MODAL_ENVIRONMENT: main
US_DATA_MODAL_APP_PREFIX: policyengine-us-data-pub
US_DATA_MODAL_APP_PREFIX: us-data
US_DATA_RUN_ID: ${{ inputs.run_id || '' }}
steps:
- uses: actions/checkout@v6
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/push.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
github_run_url: ${{ steps.run-context.outputs.github_run_url }}
env:
MODAL_ENVIRONMENT: main
US_DATA_MODAL_APP_PREFIX: policyengine-us-data-pub
US_DATA_MODAL_APP_PREFIX: us-data
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
Expand Down
1 change: 1 addition & 0 deletions changelog.d/981.changed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Shortened publication run IDs and default Modal app names while preserving candidate-scoped staging identity.
31 changes: 24 additions & 7 deletions modal_app/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -827,6 +827,28 @@ def _write_validation_diagnostics(
# ── Orchestrator ─────────────────────────────────────────────────


def _new_run_metadata(
*,
run_id: str,
branch: str,
sha: str,
candidate_version: str,
release_version: str,
run_context: RunContext,
) -> RunMetadata:
return RunMetadata(
run_id=run_id,
branch=branch,
sha=sha,
version=candidate_version,
candidate_version=candidate_version,
release_version=release_version,
start_time=datetime.now(timezone.utc).isoformat(),
status="running",
**_metadata_run_fields(run_context),
)


@app.function(
image=image,
cpu=2,
Expand Down Expand Up @@ -1021,18 +1043,13 @@ def run_pipeline(
)
_apply_run_context_env(current_run_context)
run_id = current_run_context.run_id
meta = RunMetadata(
meta = _new_run_metadata(
run_id=run_id,
branch=branch,
sha=sha,
version=candidate_version,
candidate_version=candidate_version,
release_version=release_version,
base_release_version=current_run_context.base_release_version,
release_bump=current_run_context.release_bump,
start_time=datetime.now(timezone.utc).isoformat(),
status="running",
**_metadata_run_fields(current_run_context),
run_context=current_run_context,
)

# Create run directory
Expand Down
49 changes: 37 additions & 12 deletions policyengine_us_data/utils/run_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
DATA_PACKAGE_VERSION_ENV = "US_DATA_PACKAGE_VERSION"
MODAL_APP_NAME_ENV = "US_DATA_MODAL_APP_NAME"
MODAL_ENVIRONMENT_ENV = "US_DATA_MODAL_ENVIRONMENT"
DEFAULT_MODAL_APP_PREFIX = "policyengine-us-data-pub"
DEFAULT_MODAL_APP_PREFIX = "us-data"
DEFAULT_MODAL_ENVIRONMENT = "main"
DEFAULT_MAX_RESOURCE_NAME_LENGTH = 64
VALID_RELEASE_BUMPS = frozenset({"major", "minor", "patch"})
Expand Down Expand Up @@ -109,14 +109,12 @@ def build_run_id(
*,
github_run_id: str,
github_run_attempt: str,
github_sha: str,
) -> str:
"""Build a deterministic run ID from GitHub Actions identity."""
if not github_run_id:
raise ValueError("github_run_id is required")
attempt = github_run_attempt or "1"
sha = (github_sha or "unknown")[:8]
return sanitize_run_id(f"usdata-gha{github_run_id}-a{attempt}-{sha}")
return sanitize_run_id(f"usdata-gha{github_run_id}-a{attempt}")


def build_modal_resource_name(
Expand All @@ -132,22 +130,48 @@ def build_modal_resource_name(
)


def staging_prefix(
run_id: str = "",
def candidate_run_segment(
run_id: str,
candidate_version: str = "",
*,
version: str = "",
) -> str:
if not run_id:
return "staging"
"""Return the shared candidate/run segment for staging and app names."""
resolved_run_id = sanitize_run_id(run_id)
resolved_candidate_version = candidate_version or version
if not resolved_candidate_version:
return f"staging/{resolved_run_id}"
staging_scope = sanitize_staging_version(
return resolved_run_id
return sanitize_staging_version(
f"{sanitize_staging_version(resolved_candidate_version)}-{resolved_run_id}"
)
return f"staging/{staging_scope}"


def build_modal_app_name(
run_id: str,
candidate_version: str = "",
*,
prefix: str = DEFAULT_MODAL_APP_PREFIX,
max_length: int = DEFAULT_MAX_RESOURCE_NAME_LENGTH,
) -> str:
"""Build a safe Modal app name from candidate scope and run ID."""
return build_modal_resource_name(
candidate_run_segment(run_id, candidate_version),
prefix=prefix,
max_length=max_length,
)


def staging_prefix(
run_id: str = "",
candidate_version: str = "",
*,
version: str = "",
) -> str:
if not run_id:
return "staging"
return (
f"staging/{candidate_run_segment(run_id, candidate_version, version=version)}"
)


def github_run_url(env: Mapping[str, str]) -> str:
Expand Down Expand Up @@ -368,8 +392,9 @@ def from_env(
or env.get(MODAL_APP_NAME_ENV, "")
or env.get("MODAL_APP_NAME", "")
or (
build_modal_resource_name(
build_modal_app_name(
resolved_run_id,
candidate_version=resolved_candidate_version,
prefix=modal_app_prefix,
)
if resolved_run_id
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/calibration/test_chunked_matrix_modal.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,13 @@ def from_name(app_name: str, function_name: str, **kwargs):
return object()

monkeypatch.setitem(sys.modules, "modal", SimpleNamespace(Function=_FakeFunction))
monkeypatch.setenv("US_DATA_MODAL_APP_NAME", "policyengine-us-data-pub-run")
monkeypatch.setenv("US_DATA_MODAL_APP_NAME", "us-data-run")
monkeypatch.setenv("US_DATA_MODAL_ENVIRONMENT", "main")

_lookup_worker_function()

assert captured == {
"app_name": "policyengine-us-data-pub-run",
"app_name": "us-data-run",
"function_name": "build_matrix_chunk_worker",
"kwargs": {"environment_name": "main"},
}
Expand Down
10 changes: 5 additions & 5 deletions tests/unit/calibration/test_compare_calibration_runs.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,25 +15,25 @@


def test_run_comparison_paths_are_run_scoped():
paths = RunComparisonPaths("usdata-gha123-a1-abcdef12", version="1.73.0")
paths = RunComparisonPaths("usdata-gha123-a1", version="1.73.0")

assert (
paths.regional_diagnostics
== "hf://policyengine/policyengine-us-data/calibration/runs/"
"usdata-gha123-a1-abcdef12/diagnostics/unified_diagnostics.csv"
"usdata-gha123-a1/diagnostics/unified_diagnostics.csv"
)
assert (
paths.national_diagnostics
== "hf://policyengine/policyengine-us-data/calibration/runs/"
"usdata-gha123-a1-abcdef12/diagnostics/national_unified_diagnostics.csv"
"usdata-gha123-a1/diagnostics/national_unified_diagnostics.csv"
)
assert (
paths.candidate_h5 == "hf://policyengine/policyengine-us-data/staging/"
"1.73.0-usdata-gha123-a1-abcdef12/national/US.h5"
"1.73.0-usdata-gha123-a1/national/US.h5"
)
assert (
paths.legacy_h5 == "hf://policyengine/policyengine-us-data/staging/"
"1.73.0-usdata-gha123-a1-abcdef12/enhanced_cps_2024.h5"
"1.73.0-usdata-gha123-a1/enhanced_cps_2024.h5"
)


Expand Down
8 changes: 4 additions & 4 deletions tests/unit/test_modal_local_area.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,13 @@ def test_promote_scripts_can_defer_staging_cleanup_for_pipeline_promotion():

regional_script = local_area._build_promote_publish_script(
version="1.73.0",
run_id="usdata-gha123-a1-abcdef12",
run_id="usdata-gha123-a1",
rel_paths=["states/AL.h5"],
cleanup_staging=False,
)
national_script = local_area._build_promote_national_publish_script(
version="1.73.0",
run_id="usdata-gha123-a1-abcdef12",
run_id="usdata-gha123-a1",
rel_paths=["national/US.h5"],
cleanup_staging=False,
)
Expand All @@ -72,7 +72,7 @@ def test_promote_publish_falls_back_to_package_version_for_new_run_ids(
monkeypatch, tmp_path
):
local_area = load_local_area_module()
run_id = "usdata-gha123-a1-abcdef12"
run_id = "usdata-gha123-a1"
run_dir = tmp_path / run_id
run_dir.mkdir()
(run_dir / "manifest.json").write_text(
Expand Down Expand Up @@ -107,7 +107,7 @@ def test_promote_national_publish_falls_back_to_package_version_for_new_run_ids(
monkeypatch,
):
local_area = load_local_area_module()
run_id = "usdata-gha123-a1-abcdef12"
run_id = "usdata-gha123-a1"
captured = {}

monkeypatch.setattr(local_area, "setup_gcp_credentials", lambda: None)
Expand Down
33 changes: 33 additions & 0 deletions tests/unit/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
NATIONAL_FIT_LAMBDA_L0,
_build_diagnostics_upload_script,
_calibration_package_parameters,
_new_run_metadata,
_pipeline_error_summary,
_run_required_promotion_subprocess,
)
Expand All @@ -21,6 +22,7 @@
read_run_meta,
write_run_meta,
)
from policyengine_us_data.utils.run_context import RunContext # noqa: E402
from policyengine_us_data.utils.step_manifest import ArtifactReference # noqa: E402


Expand Down Expand Up @@ -102,6 +104,37 @@ def test_pipeline_error_summary_falls_back_to_bounded_traceback(monkeypatch):
assert "old traceback" not in summary


def test_new_run_metadata_accepts_release_context_fields_once():
context = RunContext.from_mapping(
{
"run_id": "run-123",
"candidate_version": "1.73.0-minor",
"release_version": "",
"base_release_version": "1.73.0",
"release_bump": "minor",
"modal_app_name": "us-data-1-73-0-minor-run-123",
"modal_environment": "main",
"hf_staging_prefix": "staging/1.73.0-minor-run-123",
}
)

meta = _new_run_metadata(
run_id=context.run_id,
branch="main",
sha="abc123",
candidate_version=context.candidate_version,
release_version=context.release_version,
run_context=context,
)

assert meta.base_release_version == "1.73.0"
assert meta.release_bump == "minor"
assert meta.modal_app_name == "us-data-1-73-0-minor-run-123"
assert meta.hf_staging_prefix == "staging/1.73.0-minor-run-123"
assert meta.run_context["base_release_version"] == "1.73.0"
assert meta.run_context["release_bump"] == "minor"


class TestRunMetadata:
def test_to_dict(self):
meta = RunMetadata(
Expand Down
18 changes: 9 additions & 9 deletions tests/unit/test_release_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,17 +211,17 @@ def test_build_release_manifest_records_run_context(tmp_path):
version="1.73.0",
repo_id="policyengine/policyengine-us-data",
run_context={
"run_id": "usdata-gha123-a1-abcdef12",
"modal_app_name": "policyengine-us-data-pub-usdata-gha123-a1-abcdef12",
"hf_staging_prefix": "staging/1.73.0-usdata-gha123-a1-abcdef12",
"run_id": "usdata-gha123-a1",
"modal_app_name": "us-data-1-73-0-usdata-gha123-a1",
"hf_staging_prefix": "staging/1.73.0-usdata-gha123-a1",
},
created_at="2026-04-10T12:00:00Z",
)

assert manifest["build"]["metadata"]["run_context"] == {
"run_id": "usdata-gha123-a1-abcdef12",
"modal_app_name": "policyengine-us-data-pub-usdata-gha123-a1-abcdef12",
"hf_staging_prefix": "staging/1.73.0-usdata-gha123-a1-abcdef12",
"run_id": "usdata-gha123-a1",
"modal_app_name": "us-data-1-73-0-usdata-gha123-a1",
"hf_staging_prefix": "staging/1.73.0-usdata-gha123-a1",
}


Expand All @@ -244,9 +244,9 @@ def test_build_release_manifest_validates_against_bundle_contract(tmp_path):
version="1.73.0",
repo_id="policyengine/policyengine-us-data",
run_context={
"run_id": "usdata-gha123-a1-abcdef12",
"modal_app_name": "policyengine-us-data-pub-usdata-gha123-a1-abcdef12",
"hf_staging_prefix": "staging/1.73.0-usdata-gha123-a1-abcdef12",
"run_id": "usdata-gha123-a1",
"modal_app_name": "us-data-1-73-0-usdata-gha123-a1",
"hf_staging_prefix": "staging/1.73.0-usdata-gha123-a1",
},
model_package_version=EXPECTED_MODEL_PACKAGE_VERSION,
model_package_git_sha="deadbeef",
Expand Down
9 changes: 3 additions & 6 deletions tests/unit/test_remote_calibration_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ def fake_run_streaming(cmd, env=None, label=""):
workers=1,
n_clones=10,
run_id="bench-run",
modal_app_name="policyengine-us-data-pub-bench-run",
modal_app_name="us-data-bench-run",
modal_environment="main",
pipeline_volume_name="pipeline-artifacts-bench-run",
chunked_matrix=True,
Expand All @@ -220,11 +220,8 @@ def fake_run_streaming(cmd, env=None, label=""):
assert captured["cmd"][chunk_dir_idx] == str(artifacts_dir / "matrix_build")
assert captured["env"]["POLICYENGINE_US_DATA_RUN_ID"] == "bench-run"
assert captured["env"]["US_DATA_RUN_ID"] == "bench-run"
assert (
captured["env"]["US_DATA_MODAL_APP_NAME"]
== "policyengine-us-data-pub-bench-run"
)
assert captured["env"]["MODAL_APP_NAME"] == "policyengine-us-data-pub-bench-run"
assert captured["env"]["US_DATA_MODAL_APP_NAME"] == "us-data-bench-run"
assert captured["env"]["MODAL_APP_NAME"] == "us-data-bench-run"
assert captured["env"]["US_DATA_MODAL_ENVIRONMENT"] == "main"
assert captured["env"]["MODAL_ENVIRONMENT"] == "main"
assert (
Expand Down
Loading