Skip to content

Commit a860947

Browse files
refactor: streamline dashboard by removing duplicate and redundant charts
- Remove Advanced tab (now empty after consolidation) - Remove duplicate "Developer Velocity by Week" chart (better version in Team tab) - Remove "Velocity Trend by Team (Rolling 4w)" chart - Remove "High-Risk PRs per Team" chart - Move "Cumulative Velocity Over Time" from Advanced to Overview/Basic tab - Add section grouping metadata to Basic tab charts - Update NOTES.md with future enhancement for developer filtering Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
1 parent 3283861 commit a860947

3 files changed

Lines changed: 535 additions & 156 deletions

File tree

NOTES.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@ Small todos and improvements to pick up when convenient.
44

55
## Dashboard
66

7-
_(No pending items)_
7+
- Remove developers that have left over 3 months ago from the graphs (not from the people table)

reports/chart_data.py

Lines changed: 127 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ def _extract_basic(df: pd.DataFrame) -> List[Dict[str, Any]]:
159159
"overall_avg_unit": "avg / week",
160160
"x": week_labels,
161161
"y": per_capita,
162+
"_section": "Velocity Metrics",
162163
})
163164

164165
# 01: Complexity volume over time (bar)
@@ -172,6 +173,7 @@ def _extract_basic(df: pd.DataFrame) -> List[Dict[str, Any]]:
172173
"subtitle": "Total complexity per week",
173174
"x": labels,
174175
"y": weekly.tolist(),
176+
"_section": "Velocity Metrics",
175177
})
176178

177179
# 18: Volume by month (bar)
@@ -185,6 +187,7 @@ def _extract_basic(df: pd.DataFrame) -> List[Dict[str, Any]]:
185187
"subtitle": "Total complexity per month",
186188
"x": [str(p) for p in monthly.index],
187189
"y": monthly.tolist(),
190+
"_section": "Velocity Metrics",
188191
})
189192

190193
# 02: PR count vs complexity (dual line)
@@ -201,6 +204,7 @@ def _extract_basic(df: pd.DataFrame) -> List[Dict[str, Any]]:
201204
"y1Name": "PR Count",
202205
"y2": weekly_agg["total_complexity"].tolist(),
203206
"y2Name": "Total Complexity",
207+
"_section": "Velocity Metrics",
204208
})
205209

206210
# 03: Avg complexity rolling (line)
@@ -215,6 +219,7 @@ def _extract_basic(df: pd.DataFrame) -> List[Dict[str, Any]]:
215219
"subtitle": "Smoothed avg complexity",
216220
"x": labels,
217221
"y": rolling.tolist(),
222+
"_section": "Quality & Cycle Time",
218223
})
219224

220225
# 19: Avg merge cycle time (line)
@@ -239,24 +244,25 @@ def _extract_basic(df: pd.DataFrame) -> List[Dict[str, Any]]:
239244
"overall_avg": overall_avg,
240245
"x": labels,
241246
"y": weekly_cycle.tolist(),
247+
"_section": "Quality & Cycle Time",
242248
})
243249

244-
# 07: High complexity frequency (bar)
245-
tdf = df[df["team"] != "Unknown"]
246-
if not tdf.empty:
247-
high = tdf[tdf["complexity"] >= 6]
248-
total = tdf.groupby("team").size()
249-
high_count = high.groupby("team").size()
250-
pct = (high_count.reindex(total.index, fill_value=0) / total * 100).fillna(0)
251-
if total.sum() > 0:
252-
charts.append({
253-
"id": "07",
254-
"type": "bar",
255-
"title": "% High-Risk PRs (complexity ≥ 6) per Team",
256-
"subtitle": "Share of risky PRs per team",
257-
"x": pct.index.tolist(),
258-
"y": pct.tolist(),
259-
})
250+
# 16: Cumulative complexity by week (area/line)
251+
df_cum = df.copy()
252+
df_cum["week_ts"] = pd.to_datetime(df_cum["date"], format="mixed", utc=False, errors="coerce").dt.to_period("W").dt.start_time
253+
weekly_sum = df_cum.groupby("week_ts")["complexity"].sum().sort_index()
254+
cumulative = weekly_sum.cumsum()
255+
if not cumulative.empty:
256+
weeks = [d.strftime("%Y-%m-%d") for d in cumulative.index]
257+
charts.append({
258+
"id": "16",
259+
"type": "area",
260+
"title": "Cumulative Velocity Over Time",
261+
"subtitle": "Running total of complexity (by week)",
262+
"x": weeks,
263+
"y": cumulative.tolist(),
264+
"_section": "Cumulative Trends",
265+
})
260266

261267
return charts
262268

@@ -509,20 +515,80 @@ def _extract_fairness(df: pd.DataFrame) -> List[Dict[str, Any]]:
509515
if df.empty or len(df) < 2:
510516
return charts
511517

512-
# 10: PR size vs complexity (scatter)
513-
corr = df["lines_changed"].corr(df["complexity"])
518+
# 10: PR size vs complexity (scatter) - remove outliers using IQR
519+
# Filter outliers on both axes
520+
q1_lines = df["lines_changed"].quantile(0.25)
521+
q3_lines = df["lines_changed"].quantile(0.75)
522+
iqr_lines = q3_lines - q1_lines
523+
lines_lower = q1_lines - 1.5 * iqr_lines
524+
lines_upper = q3_lines + 1.5 * iqr_lines
525+
526+
q1_complexity = df["complexity"].quantile(0.25)
527+
q3_complexity = df["complexity"].quantile(0.75)
528+
iqr_complexity = q3_complexity - q1_complexity
529+
complexity_lower = q1_complexity - 1.5 * iqr_complexity
530+
complexity_upper = q3_complexity + 1.5 * iqr_complexity
531+
532+
df_filtered = df[
533+
(df["lines_changed"] >= lines_lower) & (df["lines_changed"] <= lines_upper) &
534+
(df["complexity"] >= complexity_lower) & (df["complexity"] <= complexity_upper)
535+
]
536+
537+
if df_filtered.empty or len(df_filtered) < 2:
538+
df_filtered = df # Fall back to original if filtering removes everything
539+
540+
corr = df_filtered["lines_changed"].corr(df_filtered["complexity"])
514541
if pd.isna(corr):
515542
corr = 0.0
516543
passed = abs(corr) < 0.3
517544
verdict = "PASS" if passed else "FAIL"
545+
546+
# Build PR examples for each data point (bucket by complexity and size ranges)
547+
pr_examples = {}
548+
for _, row in df_filtered.iterrows():
549+
complexity_bucket = int(row["complexity"])
550+
size_bucket = int(row["lines_changed"] // 100) * 100 # Bucket by 100s
551+
key = f"{complexity_bucket}_{size_bucket}"
552+
553+
if key not in pr_examples:
554+
pr_examples[key] = []
555+
556+
pr_url = row.get("pr_url", "")
557+
explanation = row.get("explanation", "")
558+
if pd.isna(explanation):
559+
explanation = ""
560+
else:
561+
explanation = str(explanation).strip()
562+
563+
pr_title_val = row.get("pr_title", "")
564+
if pd.isna(pr_title_val):
565+
pr_title_val = ""
566+
else:
567+
pr_title_val = str(pr_title_val).strip()
568+
569+
if explanation:
570+
title = explanation
571+
elif pr_title_val:
572+
title = pr_title_val
573+
else:
574+
title = _pr_title_from_url(pr_url) if pr_url else "Unknown PR"
575+
576+
pr_examples[key].append({
577+
"title": title,
578+
"url": pr_url,
579+
"complexity": float(row.get("complexity", 0) or 0),
580+
"lines_changed": int(row.get("lines_changed", 0) or 0),
581+
})
582+
518583
charts.append({
519584
"id": "10",
520585
"type": "scatter",
521586
"title": f"PR Size vs Complexity — {verdict} (r={corr:.2f})",
522587
"subtitle": "Lines changed vs complexity score",
523-
"data": [[float(r["lines_changed"]), float(r["complexity"])] for _, r in df.iterrows()],
588+
"data": [[float(r["lines_changed"]), float(r["complexity"])] for _, r in df_filtered.iterrows()],
524589
"xAxisName": "Lines Changed",
525590
"yAxisName": "Complexity",
591+
"_pr_examples": pr_examples, # Add PR examples for modal
526592
})
527593

528594
# 11: PR count vs avg complexity (scatter with labels)
@@ -544,85 +610,6 @@ def _extract_fairness(df: pd.DataFrame) -> List[Dict[str, Any]]:
544610
return charts
545611

546612

547-
def _extract_advanced(df: pd.DataFrame) -> List[Dict[str, Any]]:
548-
charts = []
549-
df = _ensure_date(df)
550-
if df.empty:
551-
return charts
552-
553-
df = df.copy()
554-
df["week"] = pd.to_datetime(df["date"], format="mixed", utc=False, errors="coerce").dt.to_period("W").dt.start_time
555-
556-
# 21: Developer line velocity (multi-line)
557-
dev_col = "developer" if "developer" in df.columns else "author"
558-
df["developer"] = df.get(dev_col, pd.Series([""] * len(df))).fillna("").astype(str)
559-
tdf = df[df["developer"] != ""]
560-
if not tdf.empty:
561-
weekly = tdf.groupby(["week", "developer"])["complexity"].sum().unstack(fill_value=0)
562-
weekly = weekly.reindex(weekly.sum().sort_values(ascending=False).index, axis=1)
563-
if not weekly.empty:
564-
weeks = [d.strftime("%Y-%m-%d") for d in weekly.index]
565-
mapping = load_team_mapping()
566-
series = [
567-
{
568-
"name": c,
569-
"data": weekly[c].tolist(),
570-
"team": mapping.get(c, ""),
571-
}
572-
for c in weekly.columns
573-
]
574-
charts.append({
575-
"id": "21",
576-
"type": "multiLine",
577-
"title": "Developer Velocity by Week",
578-
"subtitle": "Complexity per developer per week",
579-
"x": weeks,
580-
"series": series,
581-
"hasPicker": True,
582-
})
583-
584-
# 15: Complexity trend by team (multi-line)
585-
df["team"] = df.get("team", pd.Series([""] * len(df))).fillna("").replace("", "Unknown")
586-
tdf = df[df["team"] != "Unknown"]
587-
if not tdf.empty:
588-
all_weeks = sorted(tdf["week"].unique())
589-
x_labels = [d.strftime("%Y-%m-%d") for d in all_weeks]
590-
series_list = []
591-
for team in tdf["team"].unique():
592-
team_weekly = tdf[tdf["team"] == team].groupby("week")["complexity"].median()
593-
rolling = team_weekly.rolling(4, min_periods=1).mean()
594-
aligned = rolling.reindex(all_weeks).tolist()
595-
if any(pd.notna(v) for v in aligned):
596-
series_list.append({"name": team, "data": [None if pd.isna(v) else float(v) for v in aligned]})
597-
if series_list:
598-
charts.append({
599-
"id": "15",
600-
"type": "multiLine",
601-
"title": "Velocity Trend by Team (Rolling 4w)",
602-
"subtitle": "Smoothed median complexity per team",
603-
"x": x_labels,
604-
"series": series_list,
605-
})
606-
607-
# 16: Cumulative complexity by week (area/line)
608-
df_cum = df.copy()
609-
df_cum["week"] = pd.to_datetime(df_cum["date"], format="mixed", utc=False, errors="coerce").dt.to_period("W").dt.start_time
610-
weekly_sum = df_cum.groupby("week")["complexity"].sum().sort_index()
611-
cumulative = weekly_sum.cumsum()
612-
if not cumulative.empty:
613-
weeks = [d.strftime("%Y-%m-%d") for d in cumulative.index]
614-
charts.append({
615-
"id": "16",
616-
"type": "area",
617-
"title": "Cumulative Velocity Over Time",
618-
"subtitle": "Running total of complexity (by week)",
619-
"x": weeks,
620-
"y": cumulative.tolist(),
621-
})
622-
623-
return charts
624-
625-
626613
def _extract_features() -> Dict[str, Any]:
627614
"""Build chart data + raw table data from features-released.csv."""
628615
csv_path = Path(__file__).resolve().parent.parent / "features-released.csv"
@@ -789,6 +776,47 @@ def _extract_leaderboard(df: pd.DataFrame) -> Dict[str, Any]:
789776
return result
790777

791778

779+
def _extract_hero_stats(df: pd.DataFrame) -> Dict[str, Any]:
780+
"""Extract hero dashboard stats for Overview tab."""
781+
df = _ensure_date(df)
782+
if df.empty:
783+
return {
784+
"velocity_per_capita": 0,
785+
"active_developers": 0,
786+
"total_prs": 0,
787+
"avg_complexity": 0,
788+
}
789+
790+
# Calculate per-capita velocity
791+
df["week"] = pd.to_datetime(df["date"]).dt.to_period("W").dt.start_time
792+
weekly = df.groupby("week")["complexity"].sum()
793+
weeks = sorted([w.date() for w in weekly.index])
794+
headcounts_dict = get_weekly_headcounts(weeks)
795+
all_hc = headcounts_dict.get("All Teams", [])
796+
per_capita = []
797+
for i, (week, total_cx) in enumerate(weekly.items()):
798+
hc = all_hc[i] if i < len(all_hc) else 0
799+
if hc > 0:
800+
per_capita.append(total_cx / hc)
801+
velocity = round(np.mean(per_capita), 1) if per_capita else 0
802+
803+
# Active developers (unique in last 30 days)
804+
last_30d = df[df["date"] >= (pd.Timestamp.now() - pd.Timedelta(days=30))]
805+
dev_col = "developer" if "developer" in df.columns else "author"
806+
active_devs = last_30d[dev_col].nunique() if not last_30d.empty else 0
807+
808+
# Total PRs and avg complexity
809+
total_prs = len(df)
810+
avg_cx = round(df["complexity"].mean(), 1) if "complexity" in df.columns else 0
811+
812+
return {
813+
"velocity_per_capita": velocity,
814+
"active_developers": active_devs,
815+
"total_prs": total_prs,
816+
"avg_complexity": avg_cx,
817+
}
818+
819+
792820
def build_all_chart_data(df: pd.DataFrame) -> Dict[str, Any]:
793821
"""Build chart data for all tabs. Returns {tab: [chart_data, ...]}."""
794822
# Ensure numeric and date columns are properly typed regardless of how the df was loaded
@@ -805,9 +833,9 @@ def build_all_chart_data(df: pd.DataFrame) -> Dict[str, Any]:
805833
"team": _extract_team(df),
806834
"risk": _extract_risk(df),
807835
"fairness": _extract_fairness(df),
808-
"advanced": _extract_advanced(df),
809836
"features": features_data.get("charts", []),
810837
"_features_rows": features_data.get("rows", []),
811838
"leaderboard": _extract_leaderboard(df),
812839
"_team_dev_prs": _build_team_dev_prs(df),
840+
"_hero_stats": _extract_hero_stats(df),
813841
}

0 commit comments

Comments
 (0)