Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 73 additions & 0 deletions tests/test_subgraph_loader_cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
"""Tests for ``workflow.subgraph_loader`` cache invalidation.

Regression coverage for issue #622: editing a subgraph YAML that has already
been loaded, then loading it again in the same process, must return the *new*
on-disk content. The original implementation cached parsed subgraphs in a
module-level dict keyed only on the resolved file path, with no staleness
check, so a subgraph stayed stale until ChatDev was restarted.
"""

import importlib
import os
from pathlib import Path

import pytest

import workflow.subgraph_loader as subgraph_loader


@pytest.fixture(autouse=True)
def _clear_cache():
"""Start each test with an empty module-level cache."""
importlib.reload(subgraph_loader)
yield


def _write_subgraph(path: Path, node_name: str) -> None:
path.write_text(
"graph:\n"
" nodes:\n"
f" - name: {node_name}\n"
"vars:\n"
" answer: 1\n",
encoding="utf-8",
)


def test_reload_after_edit_returns_new_content(tmp_path: Path):
subgraph = tmp_path / "subgraph.yaml"
_write_subgraph(subgraph, "v1")

graph_dict, _vars, _resolved = subgraph_loader.load_subgraph_config(str(subgraph))
assert graph_dict["nodes"][0]["name"] == "v1"

# Edit the file in place, bumping mtime to defeat coarse filesystem clocks.
_write_subgraph(subgraph, "v2")
stat = subgraph.stat()
os.utime(subgraph, (stat.st_atime + 1, stat.st_mtime + 1))

graph_dict, _vars, _resolved = subgraph_loader.load_subgraph_config(str(subgraph))
assert graph_dict["nodes"][0]["name"] == "v2", (
"subgraph_loader served a stale cached copy after the file was edited"
)


def test_unchanged_file_is_served_from_cache(tmp_path: Path, monkeypatch):
"""An unedited file must not be re-parsed on every load (cache preserved)."""
subgraph = tmp_path / "subgraph.yaml"
_write_subgraph(subgraph, "v1")

subgraph_loader.load_subgraph_config(str(subgraph))

calls = {"count": 0}
original = subgraph_loader._load_graph_dict

def _counting(path):
calls["count"] += 1
return original(path)

monkeypatch.setattr(subgraph_loader, "_load_graph_dict", _counting)

subgraph_loader.load_subgraph_config(str(subgraph))
subgraph_loader.load_subgraph_config(str(subgraph))
assert calls["count"] == 0, "unchanged subgraph should be served from cache"
12 changes: 8 additions & 4 deletions workflow/subgraph_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@

_REPO_ROOT = Path(__file__).resolve().parents[1]
_DEFAULT_SUBGRAPH_ROOT = (_REPO_ROOT / "yaml_instance").resolve()
_SUBGRAPH_CACHE: Dict[Path, Dict[str, Any]] = {}
# Cache parsed subgraphs keyed on the resolved path, alongside the file's
# mtime at load time so an on-disk edit invalidates the cached copy (see #622).
_SUBGRAPH_CACHE: Dict[Path, Tuple[float, Dict[str, Any]]] = {}


def _resolve_candidate_paths(file_path: str, parent_source: str | None) -> List[Path]:
Expand Down Expand Up @@ -69,10 +71,12 @@ def load_subgraph_config(file_path: str, *, parent_source: str | None = None) ->
candidates = _resolve_candidate_paths(file_path, parent_source)
resolved_path = _resolve_existing_path(candidates).resolve()

if resolved_path not in _SUBGRAPH_CACHE:
_SUBGRAPH_CACHE[resolved_path] = _load_graph_dict(resolved_path)
mtime = resolved_path.stat().st_mtime
cached = _SUBGRAPH_CACHE.get(resolved_path)
if cached is None or cached[0] != mtime:
_SUBGRAPH_CACHE[resolved_path] = (mtime, _load_graph_dict(resolved_path))

payload = _SUBGRAPH_CACHE[resolved_path]
payload = _SUBGRAPH_CACHE[resolved_path][1]
graph_dict = deepcopy(payload["graph"])
vars_dict = dict(payload["vars"])
return graph_dict, vars_dict, str(resolved_path)
Expand Down