Skip to content

Commit adc751e

Browse files
authored
Improved FileResultLogger (#4)
* improved results logger to accept pathlib Paths and explicit handling of existing logging files.
1 parent d196cdd commit adc751e

3 files changed

Lines changed: 126 additions & 2 deletions

File tree

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1313

1414
### Changed
1515

16+
- `FileResultLogger` now accepts `pathlib.Path` for argument `output_dir` and has an `overwrite` argument to prevent overwriting of existing logs files.
17+
1618
### Fixed
1719

1820
- Consistent naming of agent `adapter` over `wrapper` (PR: #3)

maseval/core/callbacks/result_logger.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -297,10 +297,11 @@ class FileResultLogger(ResultLogger):
297297

298298
def __init__(
299299
self,
300-
output_dir: str = "./results",
300+
output_dir: Path | str = "./results",
301301
filename_pattern: str = "benchmark_{timestamp}.jsonl",
302302
write_metadata: bool = True,
303303
atomic_writes: bool = True,
304+
overwrite: bool = False,
304305
include_traces: bool = True,
305306
include_config: bool = True,
306307
include_eval: bool = True,
@@ -309,11 +310,14 @@ def __init__(
309310
"""Initialize the file logger.
310311
311312
Args:
312-
output_dir: Directory where result files will be written (created if needed)
313+
output_dir: Directory where result files will be written (created if needed).
314+
Accepts either a Path object or a string path.
313315
filename_pattern: Pattern for result filename. Use {timestamp} for
314316
automatic timestamp insertion (format: YYYYMMDD_HHMMSS)
315317
write_metadata: If True, write a metadata file alongside results
316318
atomic_writes: If True, use atomic writes (write to temp, then rename)
319+
overwrite: If True, overwrite existing files. If False, raise an error
320+
when the output file already exists.
317321
include_traces: If True, include execution traces in logged results
318322
include_config: If True, include configuration in logged results
319323
include_eval: If True, include evaluation results in logged results
@@ -330,6 +334,7 @@ def __init__(
330334
self.filename_pattern = filename_pattern
331335
self.write_metadata = write_metadata
332336
self.atomic_writes = atomic_writes
337+
self.overwrite = overwrite
333338

334339
# Runtime state
335340
self._output_path: Optional[Path] = None
@@ -447,6 +452,7 @@ def _initialize_output_file(self) -> None:
447452
448453
Raises:
449454
IOError: If file or directory creation fails
455+
FileExistsError: If output file exists and overwrite is False
450456
"""
451457
# Create output directory
452458
self.output_dir.mkdir(parents=True, exist_ok=True)
@@ -458,6 +464,10 @@ def _initialize_output_file(self) -> None:
458464
filename = self.filename_pattern.replace("{timestamp}", self._timestamp)
459465
self._output_path = self.output_dir / filename
460466

467+
# Check if file exists and handle overwrite
468+
if self._output_path.exists() and not self.overwrite:
469+
raise FileExistsError(f"Output file already exists: {self._output_path}. Set overwrite=True to allow overwriting existing files.")
470+
461471
# Open file for writing
462472
self._file_handle = open(self._output_path, "w")
463473

tests/test_core/test_callbacks/test_file_result_logger.py

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
"""
88

99
import json
10+
from pathlib import Path
1011

1112
import pytest
1213

@@ -63,3 +64,114 @@ def test_file_result_logger_writes_jsonl(tmp_path):
6364
assert obj["task_id"] == report["task_id"]
6465
assert obj["repeat_idx"] == report["repeat_idx"]
6566
assert "traces" in obj and "config" in obj and "eval" in obj
67+
68+
69+
@pytest.mark.core
70+
def test_file_result_logger_accepts_pathlib_path(tmp_path):
71+
"""Test that FileResultLogger accepts pathlib.Path for output_dir.
72+
73+
Verifies that the logger works correctly when output_dir is specified
74+
as a Path object instead of a string.
75+
"""
76+
out_dir = tmp_path / "results"
77+
out_dir.mkdir()
78+
79+
# Pass Path object directly instead of string
80+
logger = FileResultLogger(output_dir=out_dir, filename_pattern="test_results.jsonl")
81+
82+
benchmark = MockBenchmark(n_tasks=1, n_repeats=1)
83+
logger.on_run_start(benchmark) # type: ignore[arg-type]
84+
85+
report = {
86+
"task_id": benchmark.task_ids[0],
87+
"repeat_idx": 0,
88+
"traces": {"agent": "trace"},
89+
"config": {"model": "gpt"},
90+
"eval": {"score": 1.0},
91+
}
92+
logger.on_task_repeat_end(benchmark, report) # type: ignore[arg-type]
93+
logger.on_run_end(benchmark, [report]) # type: ignore[arg-type]
94+
95+
# Verify file was created
96+
out_file = out_dir / "test_results.jsonl"
97+
assert out_file.exists()
98+
assert isinstance(logger.output_dir, Path)
99+
100+
lines = out_file.read_text().strip().splitlines()
101+
assert len(lines) == 1
102+
103+
104+
@pytest.mark.core
105+
def test_file_result_logger_overwrite_false_prevents_overwriting(tmp_path):
106+
"""Test that FileResultLogger raises error when file exists and overwrite=False.
107+
108+
Verifies that when overwrite is False (default), attempting to write to
109+
an existing file raises FileExistsError.
110+
"""
111+
out_dir = tmp_path / "results"
112+
out_dir.mkdir()
113+
114+
# Create an existing file
115+
existing_file = out_dir / "test_results.jsonl"
116+
existing_file.write_text("existing content\n")
117+
118+
# Try to create logger with overwrite=False (default)
119+
logger = FileResultLogger(output_dir=out_dir, filename_pattern="test_results.jsonl", overwrite=False)
120+
121+
benchmark = MockBenchmark(n_tasks=1, n_repeats=1)
122+
logger.on_run_start(benchmark) # type: ignore[arg-type]
123+
124+
report = {
125+
"task_id": benchmark.task_ids[0],
126+
"repeat_idx": 0,
127+
"traces": {"agent": "trace"},
128+
"config": {"model": "gpt"},
129+
"eval": {"score": 1.0},
130+
}
131+
132+
# Should raise FileExistsError when trying to log first iteration
133+
with pytest.raises(FileExistsError, match="Output file already exists.*Set overwrite=True"):
134+
logger.on_task_repeat_end(benchmark, report) # type: ignore[arg-type]
135+
136+
# Verify original file is unchanged
137+
assert existing_file.read_text() == "existing content\n"
138+
139+
140+
@pytest.mark.core
141+
def test_file_result_logger_overwrite_true_allows_overwriting(tmp_path):
142+
"""Test that FileResultLogger overwrites existing file when overwrite=True.
143+
144+
Verifies that when overwrite is True, the logger successfully overwrites
145+
an existing file with the same name.
146+
"""
147+
out_dir = tmp_path / "results"
148+
out_dir.mkdir()
149+
150+
# Create an existing file
151+
existing_file = out_dir / "test_results.jsonl"
152+
existing_file.write_text("existing content\n")
153+
154+
# Create logger with overwrite=True
155+
logger = FileResultLogger(output_dir=out_dir, filename_pattern="test_results.jsonl", overwrite=True)
156+
157+
benchmark = MockBenchmark(n_tasks=1, n_repeats=1)
158+
logger.on_run_start(benchmark) # type: ignore[arg-type]
159+
160+
report = {
161+
"task_id": benchmark.task_ids[0],
162+
"repeat_idx": 0,
163+
"traces": {"agent": "trace"},
164+
"config": {"model": "gpt"},
165+
"eval": {"score": 1.0},
166+
}
167+
logger.on_task_repeat_end(benchmark, report) # type: ignore[arg-type]
168+
logger.on_run_end(benchmark, [report]) # type: ignore[arg-type]
169+
170+
# Verify file was overwritten with new content
171+
lines = existing_file.read_text().strip().splitlines()
172+
assert len(lines) == 1
173+
assert "existing content" not in existing_file.read_text()
174+
175+
obj = json.loads(lines[0])
176+
assert obj["task_id"] == report["task_id"]
177+
assert obj["repeat_idx"] == report["repeat_idx"]

0 commit comments

Comments
 (0)