Skip to content

Commit 1369cf1

Browse files
committed
test(sdk): add unit tests for all SDK layers
1 parent cd7f589 commit 1369cf1

14 files changed

Lines changed: 553 additions & 0 deletions

tests/test_agent.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
from pageindex.agent import AgentRunner, SYSTEM_PROMPT
2+
from pageindex.backend.protocol import AgentTools
3+
4+
5+
def test_agent_runner_init():
6+
tools = AgentTools(function_tools=["mock_tool"])
7+
runner = AgentRunner(tools=tools, model="gpt-4o")
8+
assert runner._model == "gpt-4o"
9+
10+
11+
def test_system_prompt_has_tool_instructions():
12+
assert "list_documents" in SYSTEM_PROMPT
13+
assert "get_document_structure" in SYSTEM_PROMPT
14+
assert "get_page_content" in SYSTEM_PROMPT

tests/test_client.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# tests/sdk/test_client.py
2+
import pytest
3+
from pageindex.client import PageIndexClient, LocalClient, CloudClient
4+
5+
6+
def test_local_client_is_pageindex_client(tmp_path):
7+
client = LocalClient(model="gpt-4o", storage_path=str(tmp_path / "pi"))
8+
assert isinstance(client, PageIndexClient)
9+
10+
11+
def test_cloud_client_is_pageindex_client():
12+
client = CloudClient(api_key="pi-test")
13+
assert isinstance(client, PageIndexClient)
14+
15+
16+
def test_collection_default_name(tmp_path):
17+
client = LocalClient(model="gpt-4o", storage_path=str(tmp_path / "pi"))
18+
col = client.collection()
19+
assert col.name == "default"
20+
21+
22+
def test_collection_custom_name(tmp_path):
23+
client = LocalClient(model="gpt-4o", storage_path=str(tmp_path / "pi"))
24+
col = client.collection("papers")
25+
assert col.name == "papers"
26+
27+
28+
def test_list_collections_empty(tmp_path):
29+
client = LocalClient(model="gpt-4o", storage_path=str(tmp_path / "pi"))
30+
assert client.list_collections() == []
31+
32+
33+
def test_list_collections_after_create(tmp_path):
34+
client = LocalClient(model="gpt-4o", storage_path=str(tmp_path / "pi"))
35+
client.collection("papers")
36+
assert "papers" in client.list_collections()
37+
38+
39+
def test_delete_collection(tmp_path):
40+
client = LocalClient(model="gpt-4o", storage_path=str(tmp_path / "pi"))
41+
client.collection("papers")
42+
client.delete_collection("papers")
43+
assert "papers" not in client.list_collections()
44+
45+
46+
def test_register_parser(tmp_path):
47+
client = LocalClient(model="gpt-4o", storage_path=str(tmp_path / "pi"))
48+
class FakeParser:
49+
def supported_extensions(self): return [".txt"]
50+
def parse(self, file_path, **kwargs): pass
51+
client.register_parser(FakeParser())

tests/test_cloud_backend.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
from pageindex.backend.cloud import CloudBackend, API_BASE
2+
3+
4+
def test_cloud_backend_init():
5+
backend = CloudBackend(api_key="pi-test")
6+
assert backend._api_key == "pi-test"
7+
assert backend._headers["api_key"] == "pi-test"
8+
9+
10+
def test_api_base_url():
11+
assert "pageindex.ai" in API_BASE
12+
13+
14+
def test_get_retrieve_model_is_none():
15+
backend = CloudBackend(api_key="pi-test")
16+
assert backend.get_agent_tools("col").function_tools == []

tests/test_collection.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# tests/sdk/test_collection.py
2+
import pytest
3+
from unittest.mock import MagicMock
4+
from pageindex.collection import Collection
5+
6+
7+
@pytest.fixture
8+
def col():
9+
backend = MagicMock()
10+
backend.list_documents.return_value = [
11+
{"doc_id": "d1", "doc_name": "paper.pdf", "doc_type": "pdf"}
12+
]
13+
backend.get_document.return_value = {"doc_id": "d1", "doc_name": "paper.pdf"}
14+
backend.add_document.return_value = "d1"
15+
return Collection(name="papers", backend=backend)
16+
17+
18+
def test_add(col):
19+
doc_id = col.add("paper.pdf")
20+
assert doc_id == "d1"
21+
col._backend.add_document.assert_called_once_with("papers", "paper.pdf")
22+
23+
24+
def test_list_documents(col):
25+
docs = col.list_documents()
26+
assert len(docs) == 1
27+
assert docs[0]["doc_id"] == "d1"
28+
29+
30+
def test_get_document(col):
31+
doc = col.get_document("d1")
32+
assert doc["doc_name"] == "paper.pdf"
33+
34+
35+
def test_delete_document(col):
36+
col.delete_document("d1")
37+
col._backend.delete_document.assert_called_once_with("papers", "d1")
38+
39+
40+
def test_name_property(col):
41+
assert col.name == "papers"

tests/test_config.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# tests/sdk/test_config.py
2+
import pytest
3+
from pageindex.config import ConfigLoader
4+
5+
6+
def test_load_defaults():
7+
c = ConfigLoader()
8+
opt = c.load()
9+
assert opt.model == "gpt-5.4"
10+
assert opt.retrieve_model == "gpt-5.4"
11+
assert opt.toc_check_page_num == 20
12+
13+
14+
def test_load_with_overrides():
15+
c = ConfigLoader()
16+
opt = c.load({"model": "gpt-5.4", "retrieve_model": "claude-sonnet"})
17+
assert opt.model == "gpt-5.4"
18+
assert opt.retrieve_model == "claude-sonnet"
19+
20+
21+
def test_unknown_key_raises():
22+
c = ConfigLoader()
23+
with pytest.raises(ValueError, match="Unknown config keys"):
24+
c.load({"nonexistent_key": "value"})

tests/test_content_node.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
from pageindex.parser.protocol import ContentNode, ParsedDocument, DocumentParser
2+
3+
4+
def test_content_node_required_fields():
5+
node = ContentNode(content="hello", tokens=5)
6+
assert node.content == "hello"
7+
assert node.tokens == 5
8+
assert node.title is None
9+
assert node.index is None
10+
assert node.level is None
11+
12+
13+
def test_content_node_all_fields():
14+
node = ContentNode(content="# Intro", tokens=10, title="Intro", index=1, level=1)
15+
assert node.title == "Intro"
16+
assert node.index == 1
17+
assert node.level == 1
18+
19+
20+
def test_parsed_document():
21+
nodes = [ContentNode(content="page1", tokens=100, index=1)]
22+
doc = ParsedDocument(doc_name="test.pdf", nodes=nodes)
23+
assert doc.doc_name == "test.pdf"
24+
assert len(doc.nodes) == 1
25+
assert doc.metadata is None
26+
27+
28+
def test_parsed_document_with_metadata():
29+
nodes = [ContentNode(content="page1", tokens=100)]
30+
doc = ParsedDocument(doc_name="test.pdf", nodes=nodes, metadata={"author": "John"})
31+
assert doc.metadata["author"] == "John"
32+
33+
34+
def test_document_parser_protocol():
35+
"""Verify a class implementing DocumentParser is structurally compatible."""
36+
class MyParser:
37+
def supported_extensions(self) -> list[str]:
38+
return [".txt"]
39+
def parse(self, file_path: str, **kwargs) -> ParsedDocument:
40+
return ParsedDocument(doc_name="test", nodes=[])
41+
42+
parser = MyParser()
43+
assert parser.supported_extensions() == [".txt"]
44+
result = parser.parse("test.txt")
45+
assert isinstance(result, ParsedDocument)

tests/test_errors.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
from pageindex.errors import (
2+
PageIndexError,
3+
CollectionNotFoundError,
4+
DocumentNotFoundError,
5+
IndexingError,
6+
CloudAPIError,
7+
FileTypeError,
8+
)
9+
10+
11+
def test_all_errors_inherit_from_base():
12+
for cls in [CollectionNotFoundError, DocumentNotFoundError, IndexingError, CloudAPIError, FileTypeError]:
13+
assert issubclass(cls, PageIndexError)
14+
assert issubclass(cls, Exception)
15+
16+
17+
def test_error_message():
18+
err = FileTypeError("Unsupported: .docx")
19+
assert str(err) == "Unsupported: .docx"
20+
21+
22+
def test_catch_base_catches_all():
23+
for cls in [CollectionNotFoundError, DocumentNotFoundError, IndexingError, CloudAPIError, FileTypeError]:
24+
try:
25+
raise cls("test")
26+
except PageIndexError:
27+
pass # expected

tests/test_events.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
from pageindex.events import QueryEvent
2+
from pageindex.backend.protocol import AgentTools
3+
4+
5+
def test_query_event():
6+
event = QueryEvent(type="answer_delta", data="hello")
7+
assert event.type == "answer_delta"
8+
assert event.data == "hello"
9+
10+
11+
def test_query_event_types():
12+
for t in ["reasoning", "tool_call", "tool_result", "answer_delta", "answer_done"]:
13+
event = QueryEvent(type=t, data="test")
14+
assert event.type == t
15+
16+
17+
def test_agent_tools_default_empty():
18+
tools = AgentTools()
19+
assert tools.function_tools == []
20+
assert tools.mcp_servers == []
21+
22+
23+
def test_agent_tools_with_values():
24+
tools = AgentTools(function_tools=["tool1"], mcp_servers=["server1"])
25+
assert len(tools.function_tools) == 1
26+
assert len(tools.mcp_servers) == 1

tests/test_local_backend.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# tests/sdk/test_local_backend.py
2+
import pytest
3+
from pathlib import Path
4+
from pageindex.backend.local import LocalBackend
5+
from pageindex.storage.sqlite import SQLiteStorage
6+
from pageindex.errors import FileTypeError
7+
8+
9+
@pytest.fixture
10+
def backend(tmp_path):
11+
storage = SQLiteStorage(str(tmp_path / "test.db"))
12+
files_dir = tmp_path / "files"
13+
return LocalBackend(storage=storage, files_dir=str(files_dir), model="gpt-4o")
14+
15+
16+
def test_collection_lifecycle(backend):
17+
backend.get_or_create_collection("papers")
18+
assert "papers" in backend.list_collections()
19+
backend.delete_collection("papers")
20+
assert "papers" not in backend.list_collections()
21+
22+
23+
def test_list_documents_empty(backend):
24+
backend.get_or_create_collection("papers")
25+
assert backend.list_documents("papers") == []
26+
27+
28+
def test_unsupported_file_type_raises(backend, tmp_path):
29+
backend.get_or_create_collection("papers")
30+
bad_file = tmp_path / "test.xyz"
31+
bad_file.write_text("hello")
32+
with pytest.raises(FileTypeError):
33+
backend.add_document("papers", str(bad_file))
34+
35+
36+
def test_register_custom_parser(backend):
37+
from pageindex.parser.protocol import ParsedDocument, ContentNode
38+
39+
class TxtParser:
40+
def supported_extensions(self):
41+
return [".txt"]
42+
def parse(self, file_path, **kwargs):
43+
text = Path(file_path).read_text()
44+
return ParsedDocument(doc_name="test", nodes=[
45+
ContentNode(content=text, tokens=len(text.split()), title="Content", index=1, level=1)
46+
])
47+
48+
backend.register_parser(TxtParser())
49+
# Now .txt should be supported (won't raise FileTypeError)
50+
assert backend._resolve_parser("test.txt") is not None

tests/test_markdown_parser.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
import pytest
2+
from pathlib import Path
3+
from pageindex.parser.markdown import MarkdownParser
4+
from pageindex.parser.protocol import ContentNode, ParsedDocument
5+
6+
@pytest.fixture
7+
def sample_md(tmp_path):
8+
md = tmp_path / "test.md"
9+
md.write_text("""# Chapter 1
10+
Some intro text.
11+
12+
## Section 1.1
13+
Details here.
14+
15+
## Section 1.2
16+
More details.
17+
18+
# Chapter 2
19+
Another chapter.
20+
""")
21+
return str(md)
22+
23+
def test_supported_extensions():
24+
parser = MarkdownParser()
25+
exts = parser.supported_extensions()
26+
assert ".md" in exts
27+
assert ".markdown" in exts
28+
29+
def test_parse_returns_parsed_document(sample_md):
30+
parser = MarkdownParser()
31+
result = parser.parse(sample_md)
32+
assert isinstance(result, ParsedDocument)
33+
assert result.doc_name == "test"
34+
35+
def test_parse_nodes_have_level(sample_md):
36+
parser = MarkdownParser()
37+
result = parser.parse(sample_md)
38+
assert len(result.nodes) == 4
39+
assert result.nodes[0].level == 1
40+
assert result.nodes[0].title == "Chapter 1"
41+
assert result.nodes[1].level == 2
42+
assert result.nodes[1].title == "Section 1.1"
43+
assert result.nodes[3].level == 1
44+
45+
def test_parse_nodes_have_content(sample_md):
46+
parser = MarkdownParser()
47+
result = parser.parse(sample_md)
48+
assert "Some intro text" in result.nodes[0].content
49+
assert "Details here" in result.nodes[1].content
50+
51+
def test_parse_nodes_have_index(sample_md):
52+
parser = MarkdownParser()
53+
result = parser.parse(sample_md)
54+
for node in result.nodes:
55+
assert node.index is not None

0 commit comments

Comments
 (0)