Skip to content

Commit ac3f1de

Browse files
committed
Refactor tests and complete architectural hardening
1 parent 7babad0 commit ac3f1de

4 files changed

Lines changed: 161 additions & 5 deletions

File tree

KnowCode.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -950,22 +950,22 @@ Commands invoked without the required extra should fail fast with: *"Install kno
950950
> - *"Write a summary of what changed in this module since last release."*
951951
> - *"Which parts of the documentation are stale and need updating?"*
952952
953-
### **Phase 4.5: Architectural Hardening (NEXT)** *(addresses AD-1 through AD-7)*
953+
### **Phase 4.5: Architectural Hardening (COMPLETED)** *(addresses AD-1 through AD-7)*
954954
15. **[x] Dependency Modularisation (AD-1)**: Move heavy dependencies behind optional extras (`server`, `search`, `llm`, `watch`, `all`). Core install stays lightweight.
955955
16. **[x] Side-Effect-Free Query Paths (AD-2)**: Remove auto-analyze/index from `retrieve_context_for_query()`. Fail fast with actionable errors. Add explicit `ensure_store()` / `ensure_index()` helpers.
956956
17. **[x] Schema Versioning (AD-3)**: Add `schema_version` to knowledge store JSON, index manifest, chunks metadata, and vector metadata. Include migration/validation shims on load.
957957
18. **[x] Data Model Fixes (AD-4)**: Change `metadata: dict[str, str]` to `dict[str, Any]` across `Entity`, `Relationship`, and `CodeChunk` with mixed-type roundtrip coverage.
958958
19. **[x] Configuration Hardening (AD-5)**: Replace `print()` with `logging`; raise on invalid config in server/MCP contexts via strict mode; validate known YAML keys and warn on unknown keys.
959959
20. **[x] Service Layer Decomposition (AD-6)**: Extracted `RetrievalOrchestrator` from `KnowCodeService`. Added `Protocol` interfaces for `EmbeddingProvider`, `VectorStore`, and `KnowledgeStoreProtocol`.
960960
21. **[x] Entity Identity Resilience (AD-7)**: Add `content_hash` to entity metadata for rename-resilient correlation.
961-
22. **[ ] Layer Contract Tests**: Parser → `ParseResult` contract tests; store save/load roundtrip with schema version; retrieval golden-query tests; CLI smoke tests (Click runner); API endpoint contract tests (conditional on `server` extra).
961+
22. **[x] Layer Contract Tests**: Parser → `ParseResult` contract tests; store save/load roundtrip with schema version; retrieval golden-query tests; CLI smoke tests (Click runner); API endpoint contract tests (conditional on `server` extra).
962962

963963
> *This phase does not unlock new user-facing questions — it makes the existing answers more reliable, portable, and predictable. For example:*
964964
> - *"I upgraded KnowCode — will my existing analysis still work?"* (schema versioning)
965965
> - *"I renamed a file — does KnowCode still recognise the same functions?"* (entity identity resilience)
966966
> - *"Can I install KnowCode without all the heavy AI dependencies?"* (dependency modularisation)
967967
968-
### **Phase 5: Deep Analysis**
968+
### **Phase 5: Deep Analysis (NEXT)**
969969
23. **[ ] Static Behavioral Analysis (Layer 4)**: Data flow, state transitions, side-effect classification.
970970
24. **[ ] Intent Extraction (Layer 6)**: ADR/PR/commit intent linking beyond commit metadata.
971971
25. **[ ] Confidence Scoring (Layer 3)**: Weighted edges/entities by evidence source.

tests/e2e/test_server_refinement.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,13 @@ def _disable_rate_limiter() -> None:
3030

3131

3232
@pytest.fixture(scope="module")
33-
def service() -> KnowCodeService:
34-
return KnowCodeService(store_path=".")
33+
def service(tmp_path_factory: pytest.TempPathFactory) -> KnowCodeService:
34+
tmp_dir = tmp_path_factory.mktemp("temp_data")
35+
(tmp_dir / "app.py").write_text("class GraphBuilder:\n pass\n", encoding="utf-8")
36+
s = KnowCodeService(store_path=str(tmp_dir))
37+
s.ensure_store()
38+
s.ensure_index()
39+
return s
3540

3641

3742
def test_reload_endpoint(service: KnowCodeService) -> None:
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
"""Retrieval Golden-Query Tests."""
2+
3+
import pytest
4+
from knowcode.indexing.indexer import Indexer
5+
from knowcode.retrieval.search_engine import SearchEngine
6+
from knowcode.data_models import EmbeddingConfig
7+
from knowcode.retrieval.hybrid_index import HybridIndex
8+
from knowcode.storage.chunk_repository import InMemoryChunkRepository
9+
from knowcode.storage.vector_store import VectorStore
10+
from knowcode.storage.knowledge_store import KnowledgeStore
11+
12+
# Mocking the embedding provider so we don't hit external APIs in tests.
13+
class DeterministicEmbeddingProvider:
14+
def __init__(self) -> None:
15+
self.config = EmbeddingConfig(dimension=4)
16+
17+
def embed(self, texts: list[str]) -> list[list[float]]:
18+
# Return deterministic dummy vectors.
19+
# BM25 will do the heavy lifting for relevance in this test.
20+
return [[0.1, 0.1, 0.1, 0.1] for _ in texts]
21+
22+
def embed_single(self, text: str) -> list[float]:
23+
return [0.1, 0.1, 0.1, 0.1]
24+
25+
@pytest.fixture
26+
def search_engine(tmp_path):
27+
repo = InMemoryChunkRepository()
28+
vs = VectorStore(dimension=4)
29+
provider = DeterministicEmbeddingProvider()
30+
31+
indexer = Indexer(provider, chunk_repo=repo, vector_store=vs)
32+
33+
# Create a small realistic codebase
34+
files = {
35+
"auth.py": '''
36+
def login(username, password):
37+
"""Handle user authentication and login flow."""
38+
return True
39+
''',
40+
"database.py": '''
41+
def connect_to_db(connection_string):
42+
"""Establish connection to the main postgres database."""
43+
pass
44+
''',
45+
"utils.py": '''
46+
def reverse_string(s):
47+
"""A helper function to reverse a string."""
48+
return s[::-1]
49+
''',
50+
}
51+
52+
for filename, content in files.items():
53+
p = tmp_path / filename
54+
p.write_text(content)
55+
indexer.index_file(p)
56+
57+
hybrid = HybridIndex(repo, vs)
58+
store = KnowledgeStore()
59+
engine = SearchEngine(repo, provider, hybrid, store)
60+
61+
return engine
62+
63+
@pytest.mark.parametrize("query, expected_filename", [
64+
("user login authentication flow", "auth.py"),
65+
("postgres database connection", "database.py"),
66+
("helper reverse string", "utils.py"),
67+
])
68+
def test_golden_queries(search_engine, query, expected_filename):
69+
"""Golden-query test: ensure specific queries return expected files as top result."""
70+
results = search_engine.search(query, limit=3)
71+
assert len(results) > 0
72+
73+
top_result = results[0]
74+
# Check if the expected filename is in the chunk ID (e.g. auth.py::login::chunk_0)
75+
assert expected_filename in top_result.id
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
import pytest
2+
from pathlib import Path
3+
4+
from knowcode.data_models import ParseResult, Entity, Relationship
5+
from knowcode.parsers.python_parser import PythonParser
6+
from knowcode.parsers.markdown_parser import MarkdownParser
7+
from knowcode.parsers.yaml_parser import YamlParser
8+
from knowcode.parsers.javascript_parser import JavaScriptParser
9+
from knowcode.parsers.java_parser import JavaParser
10+
from knowcode.parsers.rust_parser import RustParser
11+
from knowcode.parsers.vue_parser import VueParser
12+
from knowcode.parsers.typescript_parser import TypeScriptParser
13+
14+
PARSERS = [
15+
(PythonParser, "x = 1\n", ".py"),
16+
(MarkdownParser, "# Hello\n", ".md"),
17+
(YamlParser, "key: value\n", ".yaml"),
18+
(JavaScriptParser, "let x = 1;\n", ".js"),
19+
(JavaParser, "class Main {}\n", ".java"),
20+
(RustParser, "fn main() {}\n", ".rs"),
21+
(VueParser, "<template><div></div></template>\n", ".vue"),
22+
(TypeScriptParser, "let x: number = 1;\n", ".ts"),
23+
]
24+
25+
@pytest.mark.parametrize("parser_class, code, ext", PARSERS)
26+
def test_contract_missing_file(tmp_path, parser_class, code, ext):
27+
parser = parser_class()
28+
missing_file = tmp_path / f"missing_file{ext}"
29+
result = parser.parse_file(missing_file)
30+
31+
assert isinstance(result, ParseResult)
32+
assert result.file_path == str(missing_file)
33+
assert len(result.errors) > 0
34+
assert isinstance(result.entities, list)
35+
assert isinstance(result.relationships, list)
36+
37+
@pytest.mark.parametrize("parser_class, code, ext", PARSERS)
38+
def test_contract_empty_file(tmp_path, parser_class, code, ext):
39+
parser = parser_class()
40+
empty_file = tmp_path / f"empty{ext}"
41+
empty_file.write_text("")
42+
43+
result = parser.parse_file(empty_file)
44+
assert isinstance(result, ParseResult)
45+
assert result.file_path == str(empty_file)
46+
assert isinstance(result.entities, list)
47+
assert isinstance(result.relationships, list)
48+
assert isinstance(result.errors, list)
49+
50+
for entity in result.entities:
51+
assert isinstance(entity, Entity)
52+
for rel in result.relationships:
53+
assert isinstance(rel, Relationship)
54+
55+
@pytest.mark.parametrize("parser_class, code, ext", PARSERS)
56+
def test_contract_valid_file(tmp_path, parser_class, code, ext):
57+
parser = parser_class()
58+
valid_file = tmp_path / f"valid{ext}"
59+
valid_file.write_text(code)
60+
61+
result = parser.parse_file(valid_file)
62+
assert isinstance(result, ParseResult)
63+
assert result.file_path == str(valid_file)
64+
assert isinstance(result.entities, list)
65+
assert isinstance(result.relationships, list)
66+
assert isinstance(result.errors, list)
67+
68+
for entity in result.entities:
69+
assert isinstance(entity, Entity)
70+
assert isinstance(entity.id, str)
71+
assert entity.id != ""
72+
assert isinstance(entity.qualified_name, str)
73+
for rel in result.relationships:
74+
assert isinstance(rel, Relationship)
75+
assert isinstance(rel.source_id, str)
76+
assert isinstance(rel.target_id, str)

0 commit comments

Comments
 (0)