added tests for new overwrite feature

cemde · cemde · commit 59d86fb024af · 2025-11-23T17:01:34.000+01:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -11,7 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Changed
 
-- `FileResultLogger` now accepts `pathlib.Path` for path and has an `overwrite` argument to prevent overwriting of existing logs files.
+- `FileResultLogger` now accepts `pathlib.Path` for argument `output_dir` and has an `overwrite` argument to prevent overwriting of existing logs files.
 
 ### Fixed
 
diff --git a/tests/test_core/test_callbacks/test_file_result_logger.py b/tests/test_core/test_callbacks/test_file_result_logger.py
@@ -7,6 +7,7 @@
 """
 
 import json
+from pathlib import Path
 
 import pytest
 
@@ -63,3 +64,114 @@ def test_file_result_logger_writes_jsonl(tmp_path):
     assert obj["task_id"] == report["task_id"]
     assert obj["repeat_idx"] == report["repeat_idx"]
     assert "traces" in obj and "config" in obj and "eval" in obj
+
+
+@pytest.mark.core
+def test_file_result_logger_accepts_pathlib_path(tmp_path):
+    """Test that FileResultLogger accepts pathlib.Path for output_dir.
+
+    Verifies that the logger works correctly when output_dir is specified
+    as a Path object instead of a string.
+    """
+    out_dir = tmp_path / "results"
+    out_dir.mkdir()
+
+    # Pass Path object directly instead of string
+    logger = FileResultLogger(output_dir=out_dir, filename_pattern="test_results.jsonl")
+
+    benchmark = MockBenchmark(n_tasks=1, n_repeats=1)
+    logger.n_run_start(benchmark)  # type: ignore[arg-type]
+
+    report = {
+        "task_id": benchmark.task_ids[0],
+        "repeat_idx": 0,
+        "traces": {"agent": "trace"},
+        "config": {"model": "gpt"},
+        "eval": {"score": 1.0},
+    }
+    logger.on_task_repeat_end(benchmark, report)  # type: ignore[arg-type]
+    logger.on_run_end(benchmark, [report])  # type: ignore[arg-type]
+
+    # Verify file was created
+    out_file = out_dir / "test_results.jsonl"
+    assert out_file.exists()
+    assert isinstance(logger.output_dir, Path)
+
+    lines = out_file.read_text().strip().splitlines()
+    assert len(lines) == 1
+
+
+@pytest.mark.core
+def test_file_result_logger_overwrite_false_prevents_overwriting(tmp_path):
+    """Test that FileResultLogger raises error when file exists and overwrite=False.
+
+    Verifies that when overwrite is False (default), attempting to write to
+    an existing file raises FileExistsError.
+    """
+    out_dir = tmp_path / "results"
+    out_dir.mkdir()
+
+    # Create an existing file
+    existing_file = out_dir / "test_results.jsonl"
+    existing_file.write_text("existing content\n")
+
+    # Try to create logger with overwrite=False (default)
+    logger = FileResultLogger(output_dir=out_dir, filename_pattern="test_results.jsonl", overwrite=False)
+
+    benchmark = MockBenchmark(n_tasks=1, n_repeats=1)
+    logger.on_run_start(benchmark)  # type: ignore[arg-type]
+
+    report = {
+        "task_id": benchmark.task_ids[0],
+        "repeat_idx": 0,
+        "traces": {"agent": "trace"},
+        "config": {"model": "gpt"},
+        "eval": {"score": 1.0},
+    }
+
+    # Should raise FileExistsError when trying to log first iteration
+    with pytest.raises(FileExistsError, match="Output file already exists.*Set overwrite=True"):
+        logger.on_task_repeat_end(benchmark, report)  # type: ignore[arg-type]
+
+    # Verify original file is unchanged
+    assert existing_file.read_text() == "existing content\n"
+
+
+@pytest.mark.core
+def test_file_result_logger_overwrite_true_allows_overwriting(tmp_path):
+    """Test that FileResultLogger overwrites existing file when overwrite=True.
+
+    Verifies that when overwrite is True, the logger successfully overwrites
+    an existing file with the same name.
+    """
+    out_dir = tmp_path / "results"
+    out_dir.mkdir()
+
+    # Create an existing file
+    existing_file = out_dir / "test_results.jsonl"
+    existing_file.write_text("existing content\n")
+
+    # Create logger with overwrite=True
+    logger = FileResultLogger(output_dir=out_dir, filename_pattern="test_results.jsonl", overwrite=True)
+
+    benchmark = MockBenchmark(n_tasks=1, n_repeats=1)
+    logger.on_run_start(benchmark)  # type: ignore[arg-type]
+
+    report = {
+        "task_id": benchmark.task_ids[0],
+        "repeat_idx": 0,
+        "traces": {"agent": "trace"},
+        "config": {"model": "gpt"},
+        "eval": {"score": 1.0},
+    }
+    logger.on_task_repeat_end(benchmark, report)  # type: ignore[arg-type]
+    logger.on_run_end(benchmark, [report])  # type: ignore[arg-type]
+
+    # Verify file was overwritten with new content
+    lines = existing_file.read_text().strip().splitlines()
+    assert len(lines) == 1
+    assert "existing content" not in existing_file.read_text()
+
+    obj = json.loads(lines[0])
+    assert obj["task_id"] == report["task_id"]
+    assert obj["repeat_idx"] == report["repeat_idx"]