Skip to content

Collect Traffic Data #50

Collect Traffic Data

Collect Traffic Data #50

Workflow file for this run

name: Collect Traffic Data
on:
schedule:
# Runs daily at 06:00 UTC to capture traffic before the 14-day window expires
- cron: "0 6 * * *"
workflow_dispatch: # Allow manual runs
permissions:
contents: write
pull-requests: write
concurrency:
group: traffic-data-collection
cancel-in-progress: false
jobs:
collect-traffic:
if: github.repository == 'microsoft/FastTrack'
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- name: Checkout repository
uses: actions/checkout@v5
- name: Collect and persist traffic data
env:
GH_TOKEN: ${{ secrets.TRAFFIC_TOKEN }}
run: |
set -euo pipefail
REPO="microsoft/FastTrack"
REPO_OWNER="${REPO%/*}"
REPO_NAME="${REPO#*/}"
DATA_DIR="traffic-data"
DATE=$(date -u +"%Y-%m-%d")
gh_api_retry() {
local max_attempts=3
local delay=5
local attempt=1
local output
while [ "$attempt" -le "$max_attempts" ]; do
if output=$(gh api "$@" 2>/dev/null); then
printf '%s' "$output"
return 0
fi
echo "::warning::gh api attempt $attempt/$max_attempts failed, retrying in ${delay}s..." >&2
sleep "$delay"
delay=$((delay * 2))
attempt=$((attempt + 1))
done
return 1
}
fetch_required_json() {
local label="$1"
local endpoint="$2"
local response
if ! response=$(gh_api_retry "$endpoint"); then
echo "::error::Failed to fetch $label after retries"
exit 1
fi
if ! printf '%s' "$response" | jq -e . >/dev/null 2>&1; then
echo "::error::Invalid JSON returned for $label"
exit 1
fi
printf '%s' "$response"
}
mkdir -p "$DATA_DIR"
echo "πŸ“Š Fetching traffic data for $REPO on $DATE..."
# Fetch all four traffic endpoints
views=$(fetch_required_json "views" "repos/$REPO/traffic/views")
clones=$(fetch_required_json "clones" "repos/$REPO/traffic/clones")
referrers=$(fetch_required_json "referrers" "repos/$REPO/traffic/popular/referrers")
paths=$(fetch_required_json "paths" "repos/$REPO/traffic/popular/paths")
# Build a combined JSON snapshot for today
# Fetch star/fork metadata with graceful fallback
echo "⭐ Fetching repository metadata..."
star_count=0
forks_count=0
repo_metadata="{}"
if repo_response=$(gh_api_retry "repos/$REPO"); then
if printf '%s' "$repo_response" | jq -e . >/dev/null 2>&1; then
repo_metadata="$repo_response"
star_count=$(printf '%s' "$repo_metadata" | jq -r '.stargazers_count // 0' 2>/dev/null || printf '0')
forks_count=$(printf '%s' "$repo_metadata" | jq -r '.forks_count // 0' 2>/dev/null || printf '0')
else
echo "::warning::Invalid JSON returned for repository metadata. Using fallback values."
fi
else
echo "::warning::Failed to fetch repository metadata. Using fallback values."
fi
# Fetch stargazer timeline (with starred_at dates)
echo "πŸ“… Fetching stargazer timeline..."
star_timeline="[]"
if star_timeline_response=$(gh_api_retry --paginate --slurp \
-H "Accept: application/vnd.github.star+json" \
"repos/$REPO/stargazers"); then
if star_timeline_json=$(printf '%s' "$star_timeline_response" | jq -ce '[.[].[]? | .starred_at | select(type == "string" and length > 0)] | sort' 2>/dev/null); then
star_timeline="$star_timeline_json"
else
echo "::warning::Failed to parse stargazer timeline. Using empty array."
fi
else
echo "::warning::Failed to fetch stargazer timeline. Using empty array."
fi
# Fetch a sample of stargazer locations (last 30 stargazers) in one GraphQL call
echo "🌍 Fetching stargazer locations..."
locations="[]"
location_query='query($owner: String!, $name: String!) {
repository(owner: $owner, name: $name) {
stargazers(last: 30) {
nodes {
... on User {
location
}
}
}
}
}'
if location_response=$(gh_api_retry graphql \
-f query="$location_query" \
-F owner="$REPO_OWNER" \
-F name="$REPO_NAME"); then
if location_json=$(printf '%s' "$location_response" | jq -ce '
if ((.errors // []) | length) > 0 then
error("graphql errors")
else
[.data.repository.stargazers.nodes[]?.location | select(type == "string" and length > 0)]
end
' 2>/dev/null); then
locations="$location_json"
else
echo "::warning::Failed to parse stargazer locations. Using empty array."
fi
else
echo "::warning::Failed to fetch stargazer locations. Using empty array."
fi
# Build combined JSON snapshot
jq -n \
--arg date "$DATE" \
--argjson views "$views" \
--argjson clones "$clones" \
--argjson referrers "$referrers" \
--argjson paths "$paths" \
--argjson star_count "$star_count" \
--argjson forks_count "$forks_count" \
--argjson star_timeline "$star_timeline" \
--argjson stargazer_locations "$locations" \
'{
collected_at: $date,
views: $views,
clones: $clones,
referrers: $referrers,
paths: $paths,
stars: $star_count,
forks: $forks_count,
star_timeline: $star_timeline,
stargazer_locations: $stargazer_locations
}' > "$DATA_DIR/$DATE.json"
echo "βœ… Saved $DATA_DIR/$DATE.json"
- name: Commit and open PR
env:
GH_TOKEN: ${{ secrets.TRAFFIC_TOKEN }}
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
git add traffic-data/
if git diff --cached --quiet; then
echo "No changes to commit"
exit 0
fi
DATE=$(date -u +%Y-%m-%d)
BRANCH="traffic-data/$DATE"
# Create a fresh branch and push
git checkout -b "$BRANCH"
git commit -m "πŸ“Š Traffic data for $DATE"
git push --set-upstream origin "$BRANCH"
# Create or update PR (--fill uses commit message as title/body)
existing_pr=$(gh pr list --head "$BRANCH" --state open --json number --jq '.[0].number // empty')
if [ -n "$existing_pr" ]; then
echo "PR #$existing_pr already exists for $BRANCH"
else
gh pr create \
--title "πŸ“Š Traffic data for $DATE" \
--body "Automated daily traffic data collection." \
--head "$BRANCH" \
--base master
echo "βœ… PR created"
fi
# Enable auto-merge (requires repo setting "Allow auto-merge" to be on)
pr_number=$(gh pr list --head "$BRANCH" --state open --json number --jq '.[0].number')
if [ -n "$pr_number" ]; then
gh pr merge "$pr_number" --auto --squash \
&& echo "βœ… Auto-merge enabled for PR #$pr_number" \
|| echo "::warning::Could not enable auto-merge β€” check repo settings"
fi