push

OhadPerryBoomi · OhadPerryBoomi · commit 52ec1d40bd8f · 2026-02-24T08:21:19.000+02:00
diff --git a/cli/main.py b/cli/main.py
@@ -596,6 +596,9 @@ def batch_analyze(
     overwrite: bool = typer.Option(
         False, "--overwrite", "--full-sync", help="Ignore existing CSV; fetch full date range (default: incremental fetch from latest CSV data)"
     ),
+    fetch_only: bool = typer.Option(
+        False, "--fetch-only", help="Only fetch PR URLs to cache; skip analysis and labeling"
+    ),
 ):
     """
     Batch analyze multiple PRs from a file or date range.
@@ -647,26 +650,26 @@ def batch_analyze(
             )
             raise typer.Exit(1)
 
-        # Require output_file unless --label is used
-        if not label and not output_file:
-            typer.echo("Error: --output is required unless --label is used", err=True)
+        # Require output_file unless --label or --fetch-only is used
+        if not label and not output_file and not fetch_only:
+            typer.echo("Error: --output is required unless --label or --fetch-only is used", err=True)
             raise typer.Exit(1)
 
         # When labeling, default to writing CSV as well (override with --output)
         if label and not output_file:
             output_file = Path("complexity-report.csv")
 
-        # Get credentials
-        openai_key = get_openai_api_key()
+        # Get credentials (skip LLM keys when fetch-only)
+        openai_key = get_openai_api_key() if not fetch_only else None
 
-        if provider == "openai" and not openai_key:
+        if not fetch_only and provider == "openai" and not openai_key:
             typer.echo("Error: OPENAI_API_KEY environment variable is required for openai provider", err=True)
             typer.echo("Set it with: export OPENAI_API_KEY='your-key'", err=True)
             raise typer.Exit(1)
-        if provider == "anthropic" and not get_anthropic_api_key():
+        if not fetch_only and provider == "anthropic" and not get_anthropic_api_key():
             typer.echo("Error: ANTHROPIC_API_KEY is required for anthropic provider", err=True)
             raise typer.Exit(1)
-        if provider == "bedrock":
+        if not fetch_only and provider == "bedrock":
             typer.echo("Using Bedrock provider. Ensure AWS_PROFILE and AWS_REGION are set.", err=True)
 
         # Get GitHub tokens - CLI option takes precedence over environment
@@ -711,12 +714,15 @@ def batch_analyze(
                 err=True,
             )
 
-        # Load prompt
-        try:
-            prompt_text = load_prompt(prompt_file)
-        except FileNotFoundError as e:
-            typer.echo(f"Error: {e}", err=True)
-            raise typer.Exit(1)
+        # Load prompt (skip when fetch-only)
+        if not fetch_only:
+            try:
+                prompt_text = load_prompt(prompt_file)
+            except FileNotFoundError as e:
+                typer.echo(f"Error: {e}", err=True)
+                raise typer.Exit(1)
+        else:
+            prompt_text = None
 
         # Get PR URLs
         if input_file:
@@ -785,6 +791,14 @@ def batch_analyze(
                     since_override=since_override,
                 )
 
+        # Fetch-only mode: save to cache and exit
+        if fetch_only:
+            if not cache_file:
+                typer.echo("Error: --cache is required with --fetch-only", err=True)
+                raise typer.Exit(1)
+            typer.echo(f"✓ Fetched {len(pr_urls)} PR URLs to cache: {cache_file}", err=True)
+            return
+
         # Create analyzer function with progress callback
         def progress_msg(msg: str) -> None:
             """Display progress messages."""
diff --git a/reports/advanced/__init__.py b/reports/advanced/__init__.py
@@ -1,7 +1,6 @@
-"""Advanced reports (13, 15, 16)."""
+"""Advanced reports (15, 16)."""
 
 from .reports import (
     report_complexity_trend_by_team,
-    report_complexity_weighted_velocity,
     report_cumulative_complexity,
 )
diff --git a/reports/advanced/reports.py b/reports/advanced/reports.py
@@ -19,36 +19,6 @@ def _ensure_date(df: pd.DataFrame) -> pd.DataFrame:
     return df
 
 
-def report_complexity_weighted_velocity(df: pd.DataFrame, output_dir: Path) -> Optional[str]:
-    """Report 13: Complexity Weighted Velocity - per sprint, total_complexity / #developers."""
-    df = _ensure_date(df)
-    if df.empty:
-        return None
-    df = df.copy()
-    df["sprint"] = pd.to_datetime(df["date"]).dt.to_period("2W").dt.start_time
-    sprint_total = df.groupby("sprint")["complexity"].sum()
-    dev_col = "developer" if "developer" in df.columns else "author"
-    sprint_devs = df.groupby("sprint")[dev_col].nunique().replace(0, 1)
-    velocity = (sprint_total / sprint_devs).sort_index()
-    if not has_plottable_series(velocity):
-        return None
-    fig, ax = plt.subplots(figsize=(12, 6))
-    velocity.index = pd.to_datetime(velocity.index).strftime("%Y-%m-%d")
-    velocity.plot(kind="bar", ax=ax, color="green", alpha=0.8)
-    ax.set_title(
-        "Complexity Weighted Velocity (per Sprint, per Developer)\n"
-        "What: Output per sprint normalized by headcount. When: Sprint reviews. How: Compare bars for velocity trends."
-    )
-    ax.set_ylabel("Complexity / #developers")
-    ax.set_xlabel("Sprint")
-    ax.tick_params(axis="x", rotation=45)
-    fig.tight_layout()
-    out = output_dir / "13-complexity-weighted-velocity.png"
-    fig.savefig(out, dpi=150, bbox_inches="tight")
-    plt.close(fig)
-    return str(out) if validate_png_has_content(out) else None
-
-
 def report_complexity_trend_by_team(df: pd.DataFrame, output_dir: Path) -> Optional[str]:
     """Report 15: Complexity Trend by Team - rolling median."""
     df = _ensure_date(df)
diff --git a/reports/runner.py b/reports/runner.py
@@ -82,7 +82,6 @@ def run_reports(
         from reports.risk import report_complexity_histogram
         from reports.fairness import report_pr_size_vs_complexity
         from reports.fairness import report_pr_count_vs_avg_complexity
-        from reports.advanced import report_complexity_weighted_velocity
         from reports.advanced import report_complexity_trend_by_team
         from reports.advanced import report_cumulative_complexity
 
@@ -104,7 +103,6 @@ def run_reports(
             (report_complexity_histogram, "risk"),
             (report_pr_size_vs_complexity, "fairness"),
             (report_pr_count_vs_avg_complexity, "fairness"),
-            (report_complexity_weighted_velocity, "advanced"),
             (report_complexity_trend_by_team, "advanced"),
             (report_cumulative_complexity, "advanced"),
         ]
diff --git a/reports/team/reports.py b/reports/team/reports.py
@@ -63,7 +63,7 @@ def report_complexity_distribution_by_team(df: pd.DataFrame, output_dir: Path) -
 
 
 def report_developer_contribution(df: pd.DataFrame, output_dir: Path) -> Optional[Union[str, List[str]]]:
-    """Report 5: Developer Complexity Contribution - stacked by sprint, one per team."""
+    """Report 5: Developer Complexity Contribution - stacked by week, one per team."""
     mapping = load_team_mapping()
     if not mapping:
         return None
@@ -78,12 +78,12 @@ def report_developer_contribution(df: pd.DataFrame, output_dir: Path) -> Optiona
     df = df[df["developer"] != ""]
     if df.empty:
         return None
-    df["sprint"] = pd.to_datetime(df["date"]).dt.to_period("2W").dt.start_time
+    df["week"] = pd.to_datetime(df["date"]).dt.to_period("W").dt.start_time
     generated = []
     for team in df["team"].unique():
         tdf = df[df["team"] == team]
         pivot = tdf.pivot_table(
-            index="sprint", columns="developer", values="complexity", aggfunc="sum", fill_value=0
+            index="week", columns="developer", values="complexity", aggfunc="sum", fill_value=0
         )
         pivot = pivot.reindex(pivot.sum().sort_values(ascending=False).index, axis=1)
         if not has_plottable_agg(pivot):
@@ -92,11 +92,11 @@ def report_developer_contribution(df: pd.DataFrame, output_dir: Path) -> Optiona
         fig, ax = plt.subplots(figsize=(12, 6))
         pivot.plot(kind="bar", stacked=True, ax=ax, width=0.8, legend=True)
         ax.set_title(
-            f"Developer Complexity Contribution — {team} (per Sprint)\n"
-            "What: Who delivered what per sprint. When: Sprint reviews. How: Compare stacked bars."
+            f"Developer Complexity Contribution — {team} (per Week)\n"
+            "What: Who delivered what per week. When: Weekly reviews. How: Compare stacked bars."
         )
         ax.set_ylabel("Complexity")
-        ax.set_xlabel("Sprint")
+        ax.set_xlabel("Week")
         ax.tick_params(axis="x", rotation=45)
         ax.legend(bbox_to_anchor=(1.02, 1), ncol=2)
         fig.tight_layout()
diff --git a/scripts/fetch-jan-feb-2026.sh b/scripts/fetch-jan-feb-2026.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+# Fetch January–February 2026 PR URLs to cache (no analysis or labeling).
+# Run with --fetch-only first, then later run without it to analyze and label.
+
+cd "$(dirname "$0")/.."
+
+complexity-cli batch-analyze \
+  --all-repos \
+  --since 2026-01-01 \
+  --until 2026-02-28 \
+  --overwrite \
+  --fetch-only \
+  --cache cache/jan-feb-2026-prs.txt
+
+echo "PR URLs cached to: cache/jan-feb-2026-prs.txt"
diff --git a/scripts/fetch-oct-dec-2025.sh b/scripts/fetch-oct-dec-2025.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+# Fetch October–December 2025 PR URLs to cache (no analysis or labeling).
+# Run with --fetch-only first, then later run without it to analyze and label.
+
+cd "$(dirname "$0")/.."
+
+complexity-cli batch-analyze \
+  --all-repos \
+  --since 2025-10-01 \
+  --until 2025-12-31 \
+  --overwrite \
+  --fetch-only \
+  --cache cache/oct-dec-2025-prs.txt
+
+echo "PR URLs cached to: cache/oct-dec-2025-prs.txt"

Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,6 @@`
`1`		`-"""Advanced reports (13, 15, 16)."""`
	`1`	`+"""Advanced reports (15, 16)."""`
`2`	`2`
`3`	`3`	`from .reports import (`
`4`	`4`	`report_complexity_trend_by_team,`
`5`		`- report_complexity_weighted_velocity,`
`6`	`5`	`report_cumulative_complexity,`
`7`	`6`	`)`