Skip to content

Commit f6f03e7

Browse files
committed
refactor: add error and incomplete request to summary result
1 parent 307fa79 commit f6f03e7

1 file changed

Lines changed: 57 additions & 6 deletions

File tree

benchmark_runner/output_summary_json.py

Lines changed: 57 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,22 @@ class GenerativeBenchmarkerSummaryJson(GenerativeBenchmarkerOutput):
4848
default_factory=lambda: Path.cwd(),
4949
description="Directory or file path for saving the serialized report.",
5050
)
51+
error_limit: int | None = Field(
52+
default=20,
53+
description="Maximum number of errored requests to include.",
54+
)
55+
incomplete_limit: int | None = Field(
56+
default=20,
57+
description="Maximum number of incomplete requests to include.",
58+
)
5159

5260
@classmethod
5361
def validated_kwargs(
54-
cls, output_path: str | Path | None, **_kwargs
62+
cls,
63+
output_path: str | Path | None,
64+
error_limit: int | None = None,
65+
incomplete_limit: int | None = None,
66+
**_kwargs,
5567
) -> dict[str, Any]:
5668
"""
5769
Validate and normalize keyword arguments for output path.
@@ -64,9 +76,17 @@ def validated_kwargs(
6476
"""
6577
validated: dict[str, Any] = {}
6678
if output_path is not None:
67-
validated["output_path"] = (
68-
Path(output_path) if not isinstance(output_path, Path) else output_path
79+
output_path = (
80+
output_path if isinstance(output_path, Path) else Path(output_path)
6981
)
82+
if output_path.suffix.lower() == ".summary_json":
83+
output_path = output_path.with_suffix(".json")
84+
validated["output_path"] = output_path
85+
86+
if error_limit is not None:
87+
validated["error_limit"] = error_limit
88+
if incomplete_limit is not None:
89+
validated["incomplete_limit"] = incomplete_limit
7090
return validated
7191

7292
async def finalize(self, report: GenerativeBenchmarksReport) -> Path:
@@ -81,13 +101,44 @@ async def finalize(self, report: GenerativeBenchmarksReport) -> Path:
81101
output_path = self.output_path
82102
if output_path.is_dir():
83103
output_path = output_path / self.DEFAULT_FILE
104+
84105
output_path.parent.mkdir(parents=True, exist_ok=True)
85106

86-
# Exclude specified fields from the report
87-
model_dict = report.model_dump(exclude=self.EXCLUDE_FIELDS)
88-
save_str = json.dumps(model_dict, indent=4)
107+
# Exclude specified fields from the report, but keep a small error sample
108+
full_dict = report.model_dump()
109+
summary_dict = report.model_dump(exclude=self.EXCLUDE_FIELDS)
110+
self._attach_error_samples(summary_dict, full_dict)
111+
save_str = json.dumps(summary_dict, indent=4)
89112

90113
with output_path.open("w", encoding="utf-8") as file:
91114
file.write(save_str)
92115

93116
return output_path
117+
118+
def _attach_error_samples(
119+
self, summary_dict: dict[str, Any], full_dict: dict[str, Any]
120+
) -> None:
121+
summary_benchmarks = summary_dict.get("benchmarks") or []
122+
full_benchmarks = full_dict.get("benchmarks") or []
123+
124+
for idx, benchmark in enumerate(summary_benchmarks):
125+
full_benchmark = full_benchmarks[idx] if idx < len(full_benchmarks) else {}
126+
requests = (full_benchmark or {}).get("requests") or {}
127+
128+
errored = self._limit_items(requests.get("errored") or [], self.error_limit)
129+
incomplete = self._limit_items(
130+
requests.get("incomplete") or [], self.incomplete_limit
131+
)
132+
133+
if errored or incomplete:
134+
benchmark["requests_truncated"] = {}
135+
if errored:
136+
benchmark["requests_truncated"]["errored"] = errored
137+
if incomplete:
138+
benchmark["requests_truncated"]["incomplete"] = incomplete
139+
140+
@staticmethod
141+
def _limit_items(items: list[Any], limit: int | None) -> list[Any]:
142+
if limit is None:
143+
return list(items)
144+
return list(items)[: max(limit, 0)]

0 commit comments

Comments
 (0)