Skip to content

Commit e9be7d7

Browse files
Add incremental PR sync script and fix timezone bug
- Add scripts/sync-new-prs.sh for scheduled incremental PR analysis - Fix naive vs aware datetime comparison in get_max_merged_at_from_csv - Add logs/ to .gitignore Made-with: Cursor
1 parent 65c28f7 commit e9be7d7

3 files changed

Lines changed: 106 additions & 0 deletions

File tree

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@ Thumbs.db
5353
*.png
5454
results/
5555

56+
# Sync logs
57+
logs/
58+
5659
# Fetch cache (repos list, PR URLs)
5760
cache/
5861

cli/batch.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -539,6 +539,7 @@ def get_max_merged_at_from_csv(csv_path: Optional[Path]) -> Optional[datetime]:
539539
if val.endswith("Z"):
540540
val = val[:-1] + "+00:00"
541541
dt = datetime.fromisoformat(val.replace("Z", "+00:00"))
542+
dt = dt.replace(tzinfo=None)
542543
if max_dt is None or dt > max_dt:
543544
max_dt = dt
544545
except (ValueError, TypeError):

scripts/sync-new-prs.sh

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
#!/bin/bash
2+
#
3+
# Incremental PR complexity sync
4+
# ─────────────────────────────────
5+
# Finds PRs merged since the latest entry in complexity-report.csv,
6+
# scores them with an LLM, labels them on GitHub (complexity:N),
7+
# and appends them to the CSV.
8+
#
9+
# Designed to run on a recurring schedule (e.g. daily cron / launchd).
10+
#
11+
# Usage:
12+
# ./scripts/sync-new-prs.sh # default: 14-day search window, 3 workers
13+
# ./scripts/sync-new-prs.sh --days 7 # override search window
14+
# ./scripts/sync-new-prs.sh --workers 5 # override parallelism
15+
# DRY_RUN=1 ./scripts/sync-new-prs.sh # fetch-only, no analysis or labeling
16+
17+
set -euo pipefail
18+
19+
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
20+
PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
21+
cd "$PROJECT_DIR"
22+
23+
CSV_FILE="complexity-report.csv"
24+
REPOS_FILE="repos.txt"
25+
LOG_FILE="logs/sync-$(date +%Y%m%d-%H%M%S).log"
26+
27+
DAYS=14
28+
WORKERS=3
29+
30+
while [[ $# -gt 0 ]]; do
31+
case $1 in
32+
--days) DAYS="$2"; shift 2 ;;
33+
--workers) WORKERS="$2"; shift 2 ;;
34+
*) echo "Unknown option: $1"; exit 1 ;;
35+
esac
36+
done
37+
38+
mkdir -p logs
39+
40+
echo "=== PR Complexity Sync ===" | tee -a "$LOG_FILE"
41+
echo "Started: $(date -u +%Y-%m-%dT%H:%M:%SZ)" | tee -a "$LOG_FILE"
42+
echo "Search window: last $DAYS days" | tee -a "$LOG_FILE"
43+
echo "Workers: $WORKERS" | tee -a "$LOG_FILE"
44+
45+
if [[ ! -f "$CSV_FILE" ]]; then
46+
echo "Warning: $CSV_FILE not found — will create a fresh one" | tee -a "$LOG_FILE"
47+
fi
48+
49+
if [[ ! -f "$REPOS_FILE" ]]; then
50+
echo "Error: $REPOS_FILE not found. Create it with one owner/repo per line." | tee -a "$LOG_FILE"
51+
exit 1
52+
fi
53+
54+
LATEST_MERGED=$(tail -1 "$CSV_FILE" 2>/dev/null | awk -F',' '{print $6}' || echo "none")
55+
echo "Latest merged_at in CSV: $LATEST_MERGED" | tee -a "$LOG_FILE"
56+
57+
ROWS_BEFORE=0
58+
if [[ -f "$CSV_FILE" ]]; then
59+
ROWS_BEFORE=$(( $(wc -l < "$CSV_FILE" | tr -d ' ') - 1 ))
60+
fi
61+
echo "Rows before: $ROWS_BEFORE" | tee -a "$LOG_FILE"
62+
63+
if [[ "${DRY_RUN:-0}" == "1" ]]; then
64+
echo "DRY_RUN=1 — fetching PR list only, no analysis or labeling" | tee -a "$LOG_FILE"
65+
complexity-cli batch-analyze \
66+
--repos-file "$REPOS_FILE" \
67+
--days "$DAYS" \
68+
--output "$CSV_FILE" \
69+
--fetch-only \
70+
--cache "cache/sync-dryrun-$(date +%Y%m%d).txt" \
71+
2>&1 | tee -a "$LOG_FILE"
72+
echo "Done (dry run). Check the cache file for the PR list." | tee -a "$LOG_FILE"
73+
exit 0
74+
fi
75+
76+
CLI_OUTPUT=$(complexity-cli batch-analyze \
77+
--repos-file "$REPOS_FILE" \
78+
--days "$DAYS" \
79+
--output "$CSV_FILE" \
80+
--label \
81+
--workers "$WORKERS" \
82+
--resume \
83+
2>&1)
84+
85+
echo "$CLI_OUTPUT" | tee -a "$LOG_FILE"
86+
87+
FOUND=$(echo "$CLI_OUTPUT" | grep -oE 'Found [0-9]+ PRs' | head -1 | grep -oE '[0-9]+' || echo "0")
88+
89+
ROWS_AFTER=0
90+
if [[ -f "$CSV_FILE" ]]; then
91+
ROWS_AFTER=$(( $(wc -l < "$CSV_FILE" | tr -d ' ') - 1 ))
92+
fi
93+
UPDATED=$(( ROWS_AFTER - ROWS_BEFORE ))
94+
95+
NEW_LATEST=$(tail -1 "$CSV_FILE" 2>/dev/null | awk -F',' '{print $6}' || echo "none")
96+
97+
echo "──────────────────────────" | tee -a "$LOG_FILE"
98+
echo "Finished: $(date -u +%Y-%m-%dT%H:%M:%SZ)" | tee -a "$LOG_FILE"
99+
echo "New latest merged_at: $NEW_LATEST" | tee -a "$LOG_FILE"
100+
echo "Total CSV rows: $ROWS_AFTER" | tee -a "$LOG_FILE"
101+
echo "METRICS: found=$FOUND labeled=$UPDATED total=$ROWS_AFTER" | tee -a "$LOG_FILE"
102+
echo "Log: $LOG_FILE"

0 commit comments

Comments
 (0)