@@ -1973,3 +1973,116 @@ def create_pass_fail_by_evaluation_type_chart(data):
19731973 fig3 .update_xaxes (showgrid = True , gridwidth = 1 , gridcolor = 'lightgray' )
19741974 fig3 .update_yaxes (showgrid = True , gridwidth = 1 , gridcolor = 'lightgray' )
19751975 return fig3
1976+
1977+
1978+ def create_combined_assertions_bar_chart (data : Dict [str , pd .DataFrame ]) -> go .Figure :
1979+ """
1980+ Create a combined bar chart showing assertions passed, failed, and missed for all models.
1981+ This replaces the separate single-shot and multi-shot charts.
1982+
1983+ Args:
1984+ data: Dictionary of datasets
1985+
1986+ Returns:
1987+ Plotly Figure with combined assertions data
1988+ """
1989+ # Prepare data for plotting
1990+ models = []
1991+ assertions_passed = []
1992+ assertions_failed = []
1993+ assertions_missed = []
1994+
1995+ for key , df in data .items ():
1996+ # Handle different naming conventions
1997+ if '_singleshot_' in key or '_multishot_' in key :
1998+ # Advanced results naming convention
1999+ model_name = key .split ('_' )[0 ]
2000+ # Add configuration info to make it unique
2001+ if '_rag_' in key :
2002+ rag_status = 'rag' if '_rag_detailed' in key or '_rag_simple' in key else 'norag'
2003+ else :
2004+ rag_status = 'norag'
2005+
2006+ instruction_type = 'detailed' if '_detailed' in key else 'simple'
2007+ eval_type = 'multishot' if '_multishot_' in key else 'singleshot'
2008+ model_name_full = f"{ model_name } _{ rag_status } _{ instruction_type } _{ eval_type } "
2009+ else :
2010+ # Exercism results naming: use existing logic
2011+ model_name = key .split ('_multi_shot' )[0 ]
2012+ is_multishot = 'multi_shot_build_error_enabled_True' in key
2013+ eval_type = 'multishot' if is_multishot else 'singleshot'
2014+ model_name_full = f"{ model_name } _{ eval_type } "
2015+
2016+ models .append (model_name_full )
2017+ total_passed = df ['number_of_assertions_passed' ].sum ()
2018+ total_failed = df ['number_of_assertions_failed' ].sum ()
2019+ total_missed = df ['number_of_assertions_missed' ].sum ()
2020+
2021+ assertions_passed .append (total_passed )
2022+ assertions_failed .append (total_failed )
2023+ assertions_missed .append (total_missed )
2024+
2025+ # Create interactive bar chart
2026+ fig = go .Figure ()
2027+
2028+ fig .add_trace (go .Bar (
2029+ name = 'Assertions Passed' ,
2030+ x = models ,
2031+ y = assertions_passed ,
2032+ marker_color = '#28a745' , # Green
2033+ opacity = 0.8 ,
2034+ text = assertions_passed ,
2035+ textposition = 'outside' ,
2036+ hovertemplate = '<b>%{x}</b><br>Assertions Passed: %{y}<extra></extra>'
2037+ ))
2038+
2039+ fig .add_trace (go .Bar (
2040+ name = 'Assertions Failed' ,
2041+ x = models ,
2042+ y = assertions_failed ,
2043+ marker_color = '#dc3545' , # Red
2044+ opacity = 0.8 ,
2045+ text = assertions_failed ,
2046+ textposition = 'outside' ,
2047+ hovertemplate = '<b>%{x}</b><br>Assertions Failed: %{y}<extra></extra>'
2048+ ))
2049+
2050+ fig .add_trace (go .Bar (
2051+ name = 'Assertions Missed' ,
2052+ x = models ,
2053+ y = assertions_missed ,
2054+ marker_color = '#ffc107' , # Yellow/Orange
2055+ opacity = 0.8 ,
2056+ text = assertions_missed ,
2057+ textposition = 'outside' ,
2058+ hovertemplate = '<b>%{x}</b><br>Assertions Missed: %{y}<br><i>(Due to build errors, etc.)</i><extra></extra>'
2059+ ))
2060+
2061+ fig .update_layout (
2062+ title = {
2063+ 'text' : 'Assertions Performance by Model and Configuration' ,
2064+ 'x' : 0.5 ,
2065+ 'xanchor' : 'center' ,
2066+ 'font' : {'size' : 18 }
2067+ },
2068+ xaxis_title = 'Models and Configurations' ,
2069+ yaxis_title = 'Number of Assertions' ,
2070+ barmode = 'group' ,
2071+ width = 1200 ,
2072+ height = 700 ,
2073+ template = 'plotly_white' ,
2074+ legend = dict (
2075+ orientation = "h" ,
2076+ yanchor = "bottom" ,
2077+ y = 1.02 ,
2078+ xanchor = "right" ,
2079+ x = 1
2080+ ),
2081+ margin = dict (b = 150 , l = 80 , r = 80 , t = 100 )
2082+ )
2083+
2084+ fig .update_xaxes (tickangle = 45 , tickfont = dict (size = 10 ))
2085+ fig .update_yaxes (showgrid = True , gridwidth = 1 , gridcolor = 'lightgray' )
2086+
2087+ return fig
2088+
0 commit comments