Collect Traffic Data #8
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Collect Traffic Data | |
| on: | |
| schedule: | |
| # Runs daily at 06:00 UTC to capture traffic before the 14-day window expires | |
| - cron: "0 6 * * *" | |
| workflow_dispatch: # Allow manual runs | |
| permissions: | |
| contents: write | |
| concurrency: | |
| group: traffic-data-collection | |
| cancel-in-progress: false | |
| jobs: | |
| collect-traffic: | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 10 | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v5 | |
| - name: Collect and persist traffic data | |
| env: | |
| GH_TOKEN: ${{ secrets.TRAFFIC_TOKEN }} | |
| run: | | |
| set -euo pipefail | |
| REPO="microsoft/FastTrack" | |
| REPO_OWNER="${REPO%/*}" | |
| REPO_NAME="${REPO#*/}" | |
| DATA_DIR="traffic-data" | |
| DATE=$(date -u +"%Y-%m-%d") | |
| gh_api_retry() { | |
| local max_attempts=3 | |
| local delay=5 | |
| local attempt=1 | |
| local output | |
| while [ "$attempt" -le "$max_attempts" ]; do | |
| if output=$(gh api "$@" 2>/dev/null); then | |
| printf '%s' "$output" | |
| return 0 | |
| fi | |
| echo "::warning::gh api attempt $attempt/$max_attempts failed, retrying in ${delay}s..." >&2 | |
| sleep "$delay" | |
| delay=$((delay * 2)) | |
| attempt=$((attempt + 1)) | |
| done | |
| return 1 | |
| } | |
| fetch_required_json() { | |
| local label="$1" | |
| local endpoint="$2" | |
| local response | |
| if ! response=$(gh_api_retry "$endpoint"); then | |
| echo "::error::Failed to fetch $label after retries" | |
| exit 1 | |
| fi | |
| if ! printf '%s' "$response" | jq -e . >/dev/null 2>&1; then | |
| echo "::error::Invalid JSON returned for $label" | |
| exit 1 | |
| fi | |
| printf '%s' "$response" | |
| } | |
| mkdir -p "$DATA_DIR" | |
| echo "π Fetching traffic data for $REPO on $DATE..." | |
| # Fetch all four traffic endpoints | |
| views=$(fetch_required_json "views" "repos/$REPO/traffic/views") | |
| clones=$(fetch_required_json "clones" "repos/$REPO/traffic/clones") | |
| referrers=$(fetch_required_json "referrers" "repos/$REPO/traffic/popular/referrers") | |
| paths=$(fetch_required_json "paths" "repos/$REPO/traffic/popular/paths") | |
| # Build a combined JSON snapshot for today | |
| # Fetch star/fork metadata with graceful fallback | |
| echo "β Fetching repository metadata..." | |
| star_count=0 | |
| forks_count=0 | |
| repo_metadata="{}" | |
| if repo_response=$(gh_api_retry "repos/$REPO"); then | |
| if printf '%s' "$repo_response" | jq -e . >/dev/null 2>&1; then | |
| repo_metadata="$repo_response" | |
| star_count=$(printf '%s' "$repo_metadata" | jq -r '.stargazers_count // 0' 2>/dev/null || printf '0') | |
| forks_count=$(printf '%s' "$repo_metadata" | jq -r '.forks_count // 0' 2>/dev/null || printf '0') | |
| else | |
| echo "::warning::Invalid JSON returned for repository metadata. Using fallback values." | |
| fi | |
| else | |
| echo "::warning::Failed to fetch repository metadata. Using fallback values." | |
| fi | |
| # Fetch stargazer timeline (with starred_at dates) | |
| echo "π Fetching stargazer timeline..." | |
| star_timeline="[]" | |
| if star_timeline_response=$(gh_api_retry --paginate --slurp \ | |
| -H "Accept: application/vnd.github.star+json" \ | |
| "repos/$REPO/stargazers"); then | |
| if star_timeline_json=$(printf '%s' "$star_timeline_response" | jq -ce '[.[].[]? | .starred_at | select(type == "string" and length > 0)] | sort' 2>/dev/null); then | |
| star_timeline="$star_timeline_json" | |
| else | |
| echo "::warning::Failed to parse stargazer timeline. Using empty array." | |
| fi | |
| else | |
| echo "::warning::Failed to fetch stargazer timeline. Using empty array." | |
| fi | |
| # Fetch a sample of stargazer locations (last 30 stargazers) in one GraphQL call | |
| echo "π Fetching stargazer locations..." | |
| locations="[]" | |
| location_query='query($owner: String!, $name: String!) { | |
| repository(owner: $owner, name: $name) { | |
| stargazers(last: 30) { | |
| nodes { | |
| ... on User { | |
| location | |
| } | |
| } | |
| } | |
| } | |
| }' | |
| if location_response=$(gh_api_retry graphql \ | |
| -f query="$location_query" \ | |
| -F owner="$REPO_OWNER" \ | |
| -F name="$REPO_NAME"); then | |
| if location_json=$(printf '%s' "$location_response" | jq -ce ' | |
| if ((.errors // []) | length) > 0 then | |
| error("graphql errors") | |
| else | |
| [.data.repository.stargazers.nodes[]?.location | select(type == "string" and length > 0)] | |
| end | |
| ' 2>/dev/null); then | |
| locations="$location_json" | |
| else | |
| echo "::warning::Failed to parse stargazer locations. Using empty array." | |
| fi | |
| else | |
| echo "::warning::Failed to fetch stargazer locations. Using empty array." | |
| fi | |
| # Build combined JSON snapshot | |
| jq -n \ | |
| --arg date "$DATE" \ | |
| --argjson views "$views" \ | |
| --argjson clones "$clones" \ | |
| --argjson referrers "$referrers" \ | |
| --argjson paths "$paths" \ | |
| --argjson star_count "$star_count" \ | |
| --argjson forks_count "$forks_count" \ | |
| --argjson star_timeline "$star_timeline" \ | |
| --argjson stargazer_locations "$locations" \ | |
| '{ | |
| collected_at: $date, | |
| views: $views, | |
| clones: $clones, | |
| referrers: $referrers, | |
| paths: $paths, | |
| stars: $star_count, | |
| forks: $forks_count, | |
| star_timeline: $star_timeline, | |
| stargazer_locations: $stargazer_locations | |
| }' > "$DATA_DIR/$DATE.json" | |
| echo "β Saved $DATA_DIR/$DATE.json" | |
| # Also maintain a rolling summary file with unique daily totals | |
| SUMMARY_FILE="$DATA_DIR/summary.json" | |
| # Extract today's totals from the 14-day data | |
| today_views=$(printf '%s' "$views" | jq "[.views[]? | select(.timestamp | startswith(\"$DATE\"))] | .[0] // {count: 0, uniques: 0}") | |
| today_clones=$(printf '%s' "$clones" | jq "[.clones[]? | select(.timestamp | startswith(\"$DATE\"))] | .[0] // {count: 0, uniques: 0}") | |
| new_entry=$(jq -n \ | |
| --arg date "$DATE" \ | |
| --argjson views "$today_views" \ | |
| --argjson clones "$today_clones" \ | |
| '{ | |
| date: $date, | |
| views_count: ($views.count // 0), | |
| views_uniques: ($views.uniques // 0), | |
| clones_count: ($clones.count // 0), | |
| clones_uniques: ($clones.uniques // 0) | |
| }') | |
| if [ -f "$SUMMARY_FILE" ]; then | |
| # Append if date doesn't already exist, otherwise update | |
| jq --argjson entry "$new_entry" ' | |
| if any(.[]; .date == $entry.date) | |
| then map(if .date == $entry.date then $entry else . end) | |
| else . + [$entry] | |
| end | |
| ' "$SUMMARY_FILE" > "${SUMMARY_FILE}.tmp" && mv "${SUMMARY_FILE}.tmp" "$SUMMARY_FILE" | |
| else | |
| echo "[$new_entry]" | jq '.' > "$SUMMARY_FILE" | |
| fi | |
| echo "β Updated $SUMMARY_FILE" | |
| - name: Commit and push | |
| run: | | |
| git config user.name "github-actions[bot]" | |
| git config user.email "github-actions[bot]@users.noreply.github.com" | |
| git add traffic-data/ | |
| if git diff --cached --quiet; then | |
| echo "No changes to commit" | |
| else | |
| git commit -m "π Traffic data for $(date -u +%Y-%m-%d)" | |
| git push | |
| fi |