Skip to content

Collect Traffic Data #8

Collect Traffic Data

Collect Traffic Data #8

Workflow file for this run

name: Collect Traffic Data
on:
schedule:
# Runs daily at 06:00 UTC to capture traffic before the 14-day window expires
- cron: "0 6 * * *"
workflow_dispatch: # Allow manual runs
permissions:
contents: write
concurrency:
group: traffic-data-collection
cancel-in-progress: false
jobs:
collect-traffic:
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- name: Checkout repository
uses: actions/checkout@v5
- name: Collect and persist traffic data
env:
GH_TOKEN: ${{ secrets.TRAFFIC_TOKEN }}
run: |
set -euo pipefail
REPO="microsoft/FastTrack"
REPO_OWNER="${REPO%/*}"
REPO_NAME="${REPO#*/}"
DATA_DIR="traffic-data"
DATE=$(date -u +"%Y-%m-%d")
gh_api_retry() {
local max_attempts=3
local delay=5
local attempt=1
local output
while [ "$attempt" -le "$max_attempts" ]; do
if output=$(gh api "$@" 2>/dev/null); then
printf '%s' "$output"
return 0
fi
echo "::warning::gh api attempt $attempt/$max_attempts failed, retrying in ${delay}s..." >&2
sleep "$delay"
delay=$((delay * 2))
attempt=$((attempt + 1))
done
return 1
}
fetch_required_json() {
local label="$1"
local endpoint="$2"
local response
if ! response=$(gh_api_retry "$endpoint"); then
echo "::error::Failed to fetch $label after retries"
exit 1
fi
if ! printf '%s' "$response" | jq -e . >/dev/null 2>&1; then
echo "::error::Invalid JSON returned for $label"
exit 1
fi
printf '%s' "$response"
}
mkdir -p "$DATA_DIR"
echo "πŸ“Š Fetching traffic data for $REPO on $DATE..."
# Fetch all four traffic endpoints
views=$(fetch_required_json "views" "repos/$REPO/traffic/views")
clones=$(fetch_required_json "clones" "repos/$REPO/traffic/clones")
referrers=$(fetch_required_json "referrers" "repos/$REPO/traffic/popular/referrers")
paths=$(fetch_required_json "paths" "repos/$REPO/traffic/popular/paths")
# Build a combined JSON snapshot for today
# Fetch star/fork metadata with graceful fallback
echo "⭐ Fetching repository metadata..."
star_count=0
forks_count=0
repo_metadata="{}"
if repo_response=$(gh_api_retry "repos/$REPO"); then
if printf '%s' "$repo_response" | jq -e . >/dev/null 2>&1; then
repo_metadata="$repo_response"
star_count=$(printf '%s' "$repo_metadata" | jq -r '.stargazers_count // 0' 2>/dev/null || printf '0')
forks_count=$(printf '%s' "$repo_metadata" | jq -r '.forks_count // 0' 2>/dev/null || printf '0')
else
echo "::warning::Invalid JSON returned for repository metadata. Using fallback values."
fi
else
echo "::warning::Failed to fetch repository metadata. Using fallback values."
fi
# Fetch stargazer timeline (with starred_at dates)
echo "πŸ“… Fetching stargazer timeline..."
star_timeline="[]"
if star_timeline_response=$(gh_api_retry --paginate --slurp \
-H "Accept: application/vnd.github.star+json" \
"repos/$REPO/stargazers"); then
if star_timeline_json=$(printf '%s' "$star_timeline_response" | jq -ce '[.[].[]? | .starred_at | select(type == "string" and length > 0)] | sort' 2>/dev/null); then
star_timeline="$star_timeline_json"
else
echo "::warning::Failed to parse stargazer timeline. Using empty array."
fi
else
echo "::warning::Failed to fetch stargazer timeline. Using empty array."
fi
# Fetch a sample of stargazer locations (last 30 stargazers) in one GraphQL call
echo "🌍 Fetching stargazer locations..."
locations="[]"
location_query='query($owner: String!, $name: String!) {
repository(owner: $owner, name: $name) {
stargazers(last: 30) {
nodes {
... on User {
location
}
}
}
}
}'
if location_response=$(gh_api_retry graphql \
-f query="$location_query" \
-F owner="$REPO_OWNER" \
-F name="$REPO_NAME"); then
if location_json=$(printf '%s' "$location_response" | jq -ce '
if ((.errors // []) | length) > 0 then
error("graphql errors")
else
[.data.repository.stargazers.nodes[]?.location | select(type == "string" and length > 0)]
end
' 2>/dev/null); then
locations="$location_json"
else
echo "::warning::Failed to parse stargazer locations. Using empty array."
fi
else
echo "::warning::Failed to fetch stargazer locations. Using empty array."
fi
# Build combined JSON snapshot
jq -n \
--arg date "$DATE" \
--argjson views "$views" \
--argjson clones "$clones" \
--argjson referrers "$referrers" \
--argjson paths "$paths" \
--argjson star_count "$star_count" \
--argjson forks_count "$forks_count" \
--argjson star_timeline "$star_timeline" \
--argjson stargazer_locations "$locations" \
'{
collected_at: $date,
views: $views,
clones: $clones,
referrers: $referrers,
paths: $paths,
stars: $star_count,
forks: $forks_count,
star_timeline: $star_timeline,
stargazer_locations: $stargazer_locations
}' > "$DATA_DIR/$DATE.json"
echo "βœ… Saved $DATA_DIR/$DATE.json"
# Also maintain a rolling summary file with unique daily totals
SUMMARY_FILE="$DATA_DIR/summary.json"
# Extract today's totals from the 14-day data
today_views=$(printf '%s' "$views" | jq "[.views[]? | select(.timestamp | startswith(\"$DATE\"))] | .[0] // {count: 0, uniques: 0}")
today_clones=$(printf '%s' "$clones" | jq "[.clones[]? | select(.timestamp | startswith(\"$DATE\"))] | .[0] // {count: 0, uniques: 0}")
new_entry=$(jq -n \
--arg date "$DATE" \
--argjson views "$today_views" \
--argjson clones "$today_clones" \
'{
date: $date,
views_count: ($views.count // 0),
views_uniques: ($views.uniques // 0),
clones_count: ($clones.count // 0),
clones_uniques: ($clones.uniques // 0)
}')
if [ -f "$SUMMARY_FILE" ]; then
# Append if date doesn't already exist, otherwise update
jq --argjson entry "$new_entry" '
if any(.[]; .date == $entry.date)
then map(if .date == $entry.date then $entry else . end)
else . + [$entry]
end
' "$SUMMARY_FILE" > "${SUMMARY_FILE}.tmp" && mv "${SUMMARY_FILE}.tmp" "$SUMMARY_FILE"
else
echo "[$new_entry]" | jq '.' > "$SUMMARY_FILE"
fi
echo "βœ… Updated $SUMMARY_FILE"
- name: Commit and push
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
git add traffic-data/
if git diff --cached --quiet; then
echo "No changes to commit"
else
git commit -m "πŸ“Š Traffic data for $(date -u +%Y-%m-%d)"
git push
fi