Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 71 additions & 45 deletions .github/workflows/registry-backfill.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,23 +29,54 @@ on: # yamllint disable-line rule:truthy
- staging
- live
default: staging
providers:
provider-versions:
description: >
Space-separated provider IDs
(e.g. 'amazon google databricks')
required: true
type: string
versions:
description: >
Space-separated versions to backfill
(e.g. '9.15.0 9.14.0'). Applied to ALL providers.
Space-separated provider/version pairs
(e.g. 'amazon/9.24.0 google/21.0.0 celery/3.17.2').
Multiple versions per provider are grouped into one job.
required: true
type: string

permissions:
contents: read
packages: read

jobs:
build-ci-image:
name: "Build CI image"
uses: ./.github/workflows/ci-image-build.yml
permissions:
contents: read
packages: write
if: >
contains(fromJSON('[
"ashb",
"bugraoz93",
"eladkal",
"ephraimbuddy",
"jedcunningham",
"jscheffl",
"kaxil",
"pierrejeambrun",
"shahar1",
"potiuk",
"utkarsharma2",
"vincbeck"
]'), github.event.sender.login)
with:
runners: '["ubuntu-22.04"]'
platform: "linux/amd64"
push-image: "false"
upload-image-artifact: "true"
upload-mount-cache-artifact: "false"
python-versions: '["3.12"]'
branch: "main"
constraints-branch: "constraints-main"
use-uv: "true"
upgrade-to-newer-dependencies: "false"
docker-cache: "registry"
disable-airflow-repo-cache: "false"

prepare:
runs-on: ubuntu-latest
outputs:
Expand All @@ -55,12 +86,19 @@ jobs:
- name: "Build provider matrix"
id: matrix
env:
PROVIDERS: ${{ inputs.providers }}
PROVIDER_VERSIONS: ${{ inputs.provider-versions }}
run: |
MATRIX=$(echo "${PROVIDERS}" \
| tr ' ' '\n' | jq -R . \
| jq -cs '{"provider": .}')
# Parse provider/version pairs, group by provider
# Input: "amazon/9.24.0 google/21.0.0 amazon/9.23.0"
# Output: {"include": [{"provider":"amazon","versions":"9.24.0 9.23.0"}, ...]}
MATRIX=$(echo "${PROVIDER_VERSIONS}" | tr ' ' '\n' | grep '/' | \
jq -R 'split("/") | {provider: .[0], version: .[1]}' | \
jq -cs 'group_by(.provider) | map({
provider: .[0].provider,
versions: (map(.version) | join(" "))
}) | {include: .}')
echo "matrix=${MATRIX}" >> "${GITHUB_OUTPUT}"
echo "Matrix: ${MATRIX}"

- name: "Determine S3 destination"
id: destination
Expand All @@ -76,28 +114,16 @@ jobs:
>> "${GITHUB_OUTPUT}"

backfill:
needs: prepare
needs: [prepare, build-ci-image]
runs-on: ubuntu-latest
timeout-minutes: 60
strategy:
fail-fast: false
matrix: ${{ fromJSON(needs.prepare.outputs.matrix) }}
name: "Backfill ${{ matrix.provider }}"
if: >
contains(fromJSON('[
"ashb",
"bugraoz93",
"eladkal",
"ephraimbuddy",
"jedcunningham",
"jscheffl",
"kaxil",
"pierrejeambrun",
"shahar1",
"potiuk",
"utkarsharma2",
"vincbeck"
]'), github.event.sender.login)
name: "Backfill ${{ matrix.provider }} (${{ matrix.versions }})"
permissions:
contents: read
packages: read
steps:
- name: "Checkout repository"
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
Expand All @@ -107,23 +133,23 @@ jobs:

- name: "Fetch provider tags"
env:
VERSIONS: ${{ inputs.versions }}
VERSIONS: ${{ matrix.versions }}
PROVIDER: ${{ matrix.provider }}
run: |
for VERSION in ${VERSIONS}; do
TAG="providers-${PROVIDER}/${VERSION}"
echo "Fetching tag: ${TAG}"
git fetch origin tag "${TAG}" \
2>/dev/null || echo "Tag not found"
2>/dev/null || echo "Tag not found: ${TAG}"
done

- name: "Install uv"
uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0

- name: "Install Breeze"
uses: ./.github/actions/breeze
- name: "Prepare breeze & CI image"
uses: ./.github/actions/prepare_breeze_and_image
with:
python-version: "3.12"
python: "3.12"
platform: "linux/amd64"
use-uv: "true"
make-mnt-writeable-and-cleanup: "true"

- name: "Install AWS CLI v2"
run: |
Expand Down Expand Up @@ -152,7 +178,7 @@ jobs:

- name: "Extract version metadata from git tags"
env:
VERSIONS: ${{ inputs.versions }}
VERSIONS: ${{ matrix.versions }}
PROVIDER: ${{ matrix.provider }}
run: |
VERSION_ARGS=""
Expand All @@ -164,15 +190,15 @@ jobs:

- name: "Run breeze registry backfill"
env:
VERSIONS: ${{ inputs.versions }}
VERSIONS: ${{ matrix.versions }}
PROVIDER: ${{ matrix.provider }}
run: |
VERSION_ARGS=""
for VERSION in ${VERSIONS}; do
VERSION_ARGS="${VERSION_ARGS} --version ${VERSION}"
done
breeze registry backfill \
--provider "${PROVIDER}" ${VERSION_ARGS}
--provider "${PROVIDER}" --python 3.12 ${VERSION_ARGS}

- name: "Download data files from S3 for build"
env:
Expand All @@ -186,12 +212,12 @@ jobs:
registry/src/_data/modules.json

- name: "Setup pnpm"
uses: pnpm/action-setup@8912a9102ac27614460f54aedde9e1e7f9aec20d # v6.0.5
uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0
with:
version: 9
version: 10

- name: "Setup Node.js"
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6.3.0
with:
node-version: 24
cache: 'pnpm'
Expand All @@ -211,7 +237,7 @@ jobs:
env:
S3_BUCKET: ${{ needs.prepare.outputs.bucket }}
CACHE_CONTROL: "public, max-age=300"
VERSIONS: ${{ inputs.versions }}
VERSIONS: ${{ matrix.versions }}
PROVIDER: ${{ matrix.provider }}
run: |
for VERSION in ${VERSIONS}; do
Expand Down
8 changes: 4 additions & 4 deletions dev/breeze/doc/images/output_registry.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion dev/breeze/doc/images/output_registry.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
8c9be6264d33af7facd1fbdf435697b7
27b4df2c81ed8e0d4c566e552e13bb6a
Loading
Loading