@@ -48,10 +48,22 @@ class GenerativeBenchmarkerSummaryJson(GenerativeBenchmarkerOutput):
4848 default_factory = lambda : Path .cwd (),
4949 description = "Directory or file path for saving the serialized report." ,
5050 )
51+ error_limit : int | None = Field (
52+ default = 20 ,
53+ description = "Maximum number of errored requests to include." ,
54+ )
55+ incomplete_limit : int | None = Field (
56+ default = 20 ,
57+ description = "Maximum number of incomplete requests to include." ,
58+ )
5159
5260 @classmethod
5361 def validated_kwargs (
54- cls , output_path : str | Path | None , ** _kwargs
62+ cls ,
63+ output_path : str | Path | None ,
64+ error_limit : int | None = None ,
65+ incomplete_limit : int | None = None ,
66+ ** _kwargs ,
5567 ) -> dict [str , Any ]:
5668 """
5769 Validate and normalize keyword arguments for output path.
@@ -64,9 +76,17 @@ def validated_kwargs(
6476 """
6577 validated : dict [str , Any ] = {}
6678 if output_path is not None :
67- validated [ " output_path" ] = (
68- Path ( output_path ) if not isinstance (output_path , Path ) else output_path
79+ output_path = (
80+ output_path if isinstance (output_path , Path ) else Path ( output_path )
6981 )
82+ if output_path .suffix .lower () == ".summary_json" :
83+ output_path = output_path .with_suffix (".json" )
84+ validated ["output_path" ] = output_path
85+
86+ if error_limit is not None :
87+ validated ["error_limit" ] = error_limit
88+ if incomplete_limit is not None :
89+ validated ["incomplete_limit" ] = incomplete_limit
7090 return validated
7191
7292 async def finalize (self , report : GenerativeBenchmarksReport ) -> Path :
@@ -81,13 +101,44 @@ async def finalize(self, report: GenerativeBenchmarksReport) -> Path:
81101 output_path = self .output_path
82102 if output_path .is_dir ():
83103 output_path = output_path / self .DEFAULT_FILE
104+
84105 output_path .parent .mkdir (parents = True , exist_ok = True )
85106
86- # Exclude specified fields from the report
87- model_dict = report .model_dump (exclude = self .EXCLUDE_FIELDS )
88- save_str = json .dumps (model_dict , indent = 4 )
107+ # Exclude specified fields from the report, but keep a small error sample
108+ full_dict = report .model_dump ()
109+ summary_dict = report .model_dump (exclude = self .EXCLUDE_FIELDS )
110+ self ._attach_error_samples (summary_dict , full_dict )
111+ save_str = json .dumps (summary_dict , indent = 4 )
89112
90113 with output_path .open ("w" , encoding = "utf-8" ) as file :
91114 file .write (save_str )
92115
93116 return output_path
117+
118+ def _attach_error_samples (
119+ self , summary_dict : dict [str , Any ], full_dict : dict [str , Any ]
120+ ) -> None :
121+ summary_benchmarks = summary_dict .get ("benchmarks" ) or []
122+ full_benchmarks = full_dict .get ("benchmarks" ) or []
123+
124+ for idx , benchmark in enumerate (summary_benchmarks ):
125+ full_benchmark = full_benchmarks [idx ] if idx < len (full_benchmarks ) else {}
126+ requests = (full_benchmark or {}).get ("requests" ) or {}
127+
128+ errored = self ._limit_items (requests .get ("errored" ) or [], self .error_limit )
129+ incomplete = self ._limit_items (
130+ requests .get ("incomplete" ) or [], self .incomplete_limit
131+ )
132+
133+ if errored or incomplete :
134+ benchmark ["requests_truncated" ] = {}
135+ if errored :
136+ benchmark ["requests_truncated" ]["errored" ] = errored
137+ if incomplete :
138+ benchmark ["requests_truncated" ]["incomplete" ] = incomplete
139+
140+ @staticmethod
141+ def _limit_items (items : list [Any ], limit : int | None ) -> list [Any ]:
142+ if limit is None :
143+ return list (items )
144+ return list (items )[: max (limit , 0 )]
0 commit comments