@@ -159,6 +159,7 @@ def _extract_basic(df: pd.DataFrame) -> List[Dict[str, Any]]:
159159 "overall_avg_unit" : "avg / week" ,
160160 "x" : week_labels ,
161161 "y" : per_capita ,
162+ "_section" : "Velocity Metrics" ,
162163 })
163164
164165 # 01: Complexity volume over time (bar)
@@ -172,6 +173,7 @@ def _extract_basic(df: pd.DataFrame) -> List[Dict[str, Any]]:
172173 "subtitle" : "Total complexity per week" ,
173174 "x" : labels ,
174175 "y" : weekly .tolist (),
176+ "_section" : "Velocity Metrics" ,
175177 })
176178
177179 # 18: Volume by month (bar)
@@ -185,6 +187,7 @@ def _extract_basic(df: pd.DataFrame) -> List[Dict[str, Any]]:
185187 "subtitle" : "Total complexity per month" ,
186188 "x" : [str (p ) for p in monthly .index ],
187189 "y" : monthly .tolist (),
190+ "_section" : "Velocity Metrics" ,
188191 })
189192
190193 # 02: PR count vs complexity (dual line)
@@ -201,6 +204,7 @@ def _extract_basic(df: pd.DataFrame) -> List[Dict[str, Any]]:
201204 "y1Name" : "PR Count" ,
202205 "y2" : weekly_agg ["total_complexity" ].tolist (),
203206 "y2Name" : "Total Complexity" ,
207+ "_section" : "Velocity Metrics" ,
204208 })
205209
206210 # 03: Avg complexity rolling (line)
@@ -215,6 +219,7 @@ def _extract_basic(df: pd.DataFrame) -> List[Dict[str, Any]]:
215219 "subtitle" : "Smoothed avg complexity" ,
216220 "x" : labels ,
217221 "y" : rolling .tolist (),
222+ "_section" : "Quality & Cycle Time" ,
218223 })
219224
220225 # 19: Avg merge cycle time (line)
@@ -239,24 +244,25 @@ def _extract_basic(df: pd.DataFrame) -> List[Dict[str, Any]]:
239244 "overall_avg" : overall_avg ,
240245 "x" : labels ,
241246 "y" : weekly_cycle .tolist (),
247+ "_section" : "Quality & Cycle Time" ,
242248 })
243249
244- # 07: High complexity frequency (bar )
245- tdf = df [ df [ "team" ] != "Unknown" ]
246- if not tdf . empty :
247- high = tdf [ tdf [ "complexity" ] >= 6 ]
248- total = tdf . groupby ( "team" ). size ()
249- high_count = high . groupby ( "team" ). size ()
250- pct = ( high_count . reindex ( total . index , fill_value = 0 ) / total * 100 ). fillna ( 0 )
251- if total . sum () > 0 :
252- charts . append ({
253- "id " : "07 " ,
254- "type " : "bar " ,
255- "title " : "% High-Risk PRs ( complexity ≥ 6) per Team " ,
256- "subtitle " : "Share of risky PRs per team" ,
257- "x " : pct . index .tolist (),
258- "y " : pct . tolist () ,
259- })
250+ # 16: Cumulative complexity by week (area/line )
251+ df_cum = df . copy ()
252+ df_cum [ "week_ts" ] = pd . to_datetime ( df_cum [ "date" ], format = "mixed" , utc = False , errors = "coerce" ). dt . to_period ( "W" ). dt . start_time
253+ weekly_sum = df_cum . groupby ( "week_ts" )[ "complexity" ]. sum (). sort_index ()
254+ cumulative = weekly_sum . cumsum ()
255+ if not cumulative . empty :
256+ weeks = [ d . strftime ( "%Y-%m-%d" ) for d in cumulative . index ]
257+ charts . append ({
258+ "id" : "16" ,
259+ "type " : "area " ,
260+ "title " : "Cumulative Velocity Over Time " ,
261+ "subtitle " : "Running total of complexity (by week) " ,
262+ "x " : weeks ,
263+ "y " : cumulative .tolist (),
264+ "_section " : "Cumulative Trends" ,
265+ })
260266
261267 return charts
262268
@@ -509,20 +515,80 @@ def _extract_fairness(df: pd.DataFrame) -> List[Dict[str, Any]]:
509515 if df .empty or len (df ) < 2 :
510516 return charts
511517
512- # 10: PR size vs complexity (scatter)
513- corr = df ["lines_changed" ].corr (df ["complexity" ])
518+ # 10: PR size vs complexity (scatter) - remove outliers using IQR
519+ # Filter outliers on both axes
520+ q1_lines = df ["lines_changed" ].quantile (0.25 )
521+ q3_lines = df ["lines_changed" ].quantile (0.75 )
522+ iqr_lines = q3_lines - q1_lines
523+ lines_lower = q1_lines - 1.5 * iqr_lines
524+ lines_upper = q3_lines + 1.5 * iqr_lines
525+
526+ q1_complexity = df ["complexity" ].quantile (0.25 )
527+ q3_complexity = df ["complexity" ].quantile (0.75 )
528+ iqr_complexity = q3_complexity - q1_complexity
529+ complexity_lower = q1_complexity - 1.5 * iqr_complexity
530+ complexity_upper = q3_complexity + 1.5 * iqr_complexity
531+
532+ df_filtered = df [
533+ (df ["lines_changed" ] >= lines_lower ) & (df ["lines_changed" ] <= lines_upper ) &
534+ (df ["complexity" ] >= complexity_lower ) & (df ["complexity" ] <= complexity_upper )
535+ ]
536+
537+ if df_filtered .empty or len (df_filtered ) < 2 :
538+ df_filtered = df # Fall back to original if filtering removes everything
539+
540+ corr = df_filtered ["lines_changed" ].corr (df_filtered ["complexity" ])
514541 if pd .isna (corr ):
515542 corr = 0.0
516543 passed = abs (corr ) < 0.3
517544 verdict = "PASS" if passed else "FAIL"
545+
546+ # Build PR examples for each data point (bucket by complexity and size ranges)
547+ pr_examples = {}
548+ for _ , row in df_filtered .iterrows ():
549+ complexity_bucket = int (row ["complexity" ])
550+ size_bucket = int (row ["lines_changed" ] // 100 ) * 100 # Bucket by 100s
551+ key = f"{ complexity_bucket } _{ size_bucket } "
552+
553+ if key not in pr_examples :
554+ pr_examples [key ] = []
555+
556+ pr_url = row .get ("pr_url" , "" )
557+ explanation = row .get ("explanation" , "" )
558+ if pd .isna (explanation ):
559+ explanation = ""
560+ else :
561+ explanation = str (explanation ).strip ()
562+
563+ pr_title_val = row .get ("pr_title" , "" )
564+ if pd .isna (pr_title_val ):
565+ pr_title_val = ""
566+ else :
567+ pr_title_val = str (pr_title_val ).strip ()
568+
569+ if explanation :
570+ title = explanation
571+ elif pr_title_val :
572+ title = pr_title_val
573+ else :
574+ title = _pr_title_from_url (pr_url ) if pr_url else "Unknown PR"
575+
576+ pr_examples [key ].append ({
577+ "title" : title ,
578+ "url" : pr_url ,
579+ "complexity" : float (row .get ("complexity" , 0 ) or 0 ),
580+ "lines_changed" : int (row .get ("lines_changed" , 0 ) or 0 ),
581+ })
582+
518583 charts .append ({
519584 "id" : "10" ,
520585 "type" : "scatter" ,
521586 "title" : f"PR Size vs Complexity — { verdict } (r={ corr :.2f} )" ,
522587 "subtitle" : "Lines changed vs complexity score" ,
523- "data" : [[float (r ["lines_changed" ]), float (r ["complexity" ])] for _ , r in df .iterrows ()],
588+ "data" : [[float (r ["lines_changed" ]), float (r ["complexity" ])] for _ , r in df_filtered .iterrows ()],
524589 "xAxisName" : "Lines Changed" ,
525590 "yAxisName" : "Complexity" ,
591+ "_pr_examples" : pr_examples , # Add PR examples for modal
526592 })
527593
528594 # 11: PR count vs avg complexity (scatter with labels)
@@ -544,85 +610,6 @@ def _extract_fairness(df: pd.DataFrame) -> List[Dict[str, Any]]:
544610 return charts
545611
546612
547- def _extract_advanced (df : pd .DataFrame ) -> List [Dict [str , Any ]]:
548- charts = []
549- df = _ensure_date (df )
550- if df .empty :
551- return charts
552-
553- df = df .copy ()
554- df ["week" ] = pd .to_datetime (df ["date" ], format = "mixed" , utc = False , errors = "coerce" ).dt .to_period ("W" ).dt .start_time
555-
556- # 21: Developer line velocity (multi-line)
557- dev_col = "developer" if "developer" in df .columns else "author"
558- df ["developer" ] = df .get (dev_col , pd .Series (["" ] * len (df ))).fillna ("" ).astype (str )
559- tdf = df [df ["developer" ] != "" ]
560- if not tdf .empty :
561- weekly = tdf .groupby (["week" , "developer" ])["complexity" ].sum ().unstack (fill_value = 0 )
562- weekly = weekly .reindex (weekly .sum ().sort_values (ascending = False ).index , axis = 1 )
563- if not weekly .empty :
564- weeks = [d .strftime ("%Y-%m-%d" ) for d in weekly .index ]
565- mapping = load_team_mapping ()
566- series = [
567- {
568- "name" : c ,
569- "data" : weekly [c ].tolist (),
570- "team" : mapping .get (c , "" ),
571- }
572- for c in weekly .columns
573- ]
574- charts .append ({
575- "id" : "21" ,
576- "type" : "multiLine" ,
577- "title" : "Developer Velocity by Week" ,
578- "subtitle" : "Complexity per developer per week" ,
579- "x" : weeks ,
580- "series" : series ,
581- "hasPicker" : True ,
582- })
583-
584- # 15: Complexity trend by team (multi-line)
585- df ["team" ] = df .get ("team" , pd .Series (["" ] * len (df ))).fillna ("" ).replace ("" , "Unknown" )
586- tdf = df [df ["team" ] != "Unknown" ]
587- if not tdf .empty :
588- all_weeks = sorted (tdf ["week" ].unique ())
589- x_labels = [d .strftime ("%Y-%m-%d" ) for d in all_weeks ]
590- series_list = []
591- for team in tdf ["team" ].unique ():
592- team_weekly = tdf [tdf ["team" ] == team ].groupby ("week" )["complexity" ].median ()
593- rolling = team_weekly .rolling (4 , min_periods = 1 ).mean ()
594- aligned = rolling .reindex (all_weeks ).tolist ()
595- if any (pd .notna (v ) for v in aligned ):
596- series_list .append ({"name" : team , "data" : [None if pd .isna (v ) else float (v ) for v in aligned ]})
597- if series_list :
598- charts .append ({
599- "id" : "15" ,
600- "type" : "multiLine" ,
601- "title" : "Velocity Trend by Team (Rolling 4w)" ,
602- "subtitle" : "Smoothed median complexity per team" ,
603- "x" : x_labels ,
604- "series" : series_list ,
605- })
606-
607- # 16: Cumulative complexity by week (area/line)
608- df_cum = df .copy ()
609- df_cum ["week" ] = pd .to_datetime (df_cum ["date" ], format = "mixed" , utc = False , errors = "coerce" ).dt .to_period ("W" ).dt .start_time
610- weekly_sum = df_cum .groupby ("week" )["complexity" ].sum ().sort_index ()
611- cumulative = weekly_sum .cumsum ()
612- if not cumulative .empty :
613- weeks = [d .strftime ("%Y-%m-%d" ) for d in cumulative .index ]
614- charts .append ({
615- "id" : "16" ,
616- "type" : "area" ,
617- "title" : "Cumulative Velocity Over Time" ,
618- "subtitle" : "Running total of complexity (by week)" ,
619- "x" : weeks ,
620- "y" : cumulative .tolist (),
621- })
622-
623- return charts
624-
625-
626613def _extract_features () -> Dict [str , Any ]:
627614 """Build chart data + raw table data from features-released.csv."""
628615 csv_path = Path (__file__ ).resolve ().parent .parent / "features-released.csv"
@@ -789,6 +776,47 @@ def _extract_leaderboard(df: pd.DataFrame) -> Dict[str, Any]:
789776 return result
790777
791778
779+ def _extract_hero_stats (df : pd .DataFrame ) -> Dict [str , Any ]:
780+ """Extract hero dashboard stats for Overview tab."""
781+ df = _ensure_date (df )
782+ if df .empty :
783+ return {
784+ "velocity_per_capita" : 0 ,
785+ "active_developers" : 0 ,
786+ "total_prs" : 0 ,
787+ "avg_complexity" : 0 ,
788+ }
789+
790+ # Calculate per-capita velocity
791+ df ["week" ] = pd .to_datetime (df ["date" ]).dt .to_period ("W" ).dt .start_time
792+ weekly = df .groupby ("week" )["complexity" ].sum ()
793+ weeks = sorted ([w .date () for w in weekly .index ])
794+ headcounts_dict = get_weekly_headcounts (weeks )
795+ all_hc = headcounts_dict .get ("All Teams" , [])
796+ per_capita = []
797+ for i , (week , total_cx ) in enumerate (weekly .items ()):
798+ hc = all_hc [i ] if i < len (all_hc ) else 0
799+ if hc > 0 :
800+ per_capita .append (total_cx / hc )
801+ velocity = round (np .mean (per_capita ), 1 ) if per_capita else 0
802+
803+ # Active developers (unique in last 30 days)
804+ last_30d = df [df ["date" ] >= (pd .Timestamp .now () - pd .Timedelta (days = 30 ))]
805+ dev_col = "developer" if "developer" in df .columns else "author"
806+ active_devs = last_30d [dev_col ].nunique () if not last_30d .empty else 0
807+
808+ # Total PRs and avg complexity
809+ total_prs = len (df )
810+ avg_cx = round (df ["complexity" ].mean (), 1 ) if "complexity" in df .columns else 0
811+
812+ return {
813+ "velocity_per_capita" : velocity ,
814+ "active_developers" : active_devs ,
815+ "total_prs" : total_prs ,
816+ "avg_complexity" : avg_cx ,
817+ }
818+
819+
792820def build_all_chart_data (df : pd .DataFrame ) -> Dict [str , Any ]:
793821 """Build chart data for all tabs. Returns {tab: [chart_data, ...]}."""
794822 # Ensure numeric and date columns are properly typed regardless of how the df was loaded
@@ -805,9 +833,9 @@ def build_all_chart_data(df: pd.DataFrame) -> Dict[str, Any]:
805833 "team" : _extract_team (df ),
806834 "risk" : _extract_risk (df ),
807835 "fairness" : _extract_fairness (df ),
808- "advanced" : _extract_advanced (df ),
809836 "features" : features_data .get ("charts" , []),
810837 "_features_rows" : features_data .get ("rows" , []),
811838 "leaderboard" : _extract_leaderboard (df ),
812839 "_team_dev_prs" : _build_team_dev_prs (df ),
840+ "_hero_stats" : _extract_hero_stats (df ),
813841 }
0 commit comments