Skip to content

Commit cf6433f

Browse files
author
Jet Xu
committed
test: implement comprehensive unit test suite and fixtures
- Add `conftest.py` for shared pytest fixtures and global mocks. - Implement unit tests for RAG processor, LLM handler, and GitHub integration. - Add coverage for utility functions, data retrieval, and logging. - Ensure tests run in isolation without external API calls.
1 parent dec5b25 commit cf6433f

9 files changed

Lines changed: 502 additions & 0 deletions

tests/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# tests/__init__.py
2+
# Marker file for test package

tests/conftest.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import pytest
2+
from unittest.mock import MagicMock, AsyncMock
3+
import sys
4+
from datetime import datetime, timezone
5+
6+
# Mock external dependencies that might try to connect to internet or load heavy models
7+
sys.modules['langchain_openai'] = MagicMock()
8+
sys.modules['langchain_mistralai'] = MagicMock()
9+
sys.modules['transformers'] = MagicMock()
10+
sys.modules['sentence_transformers'] = MagicMock()
11+
12+
@pytest.fixture
13+
def mock_github_instance():
14+
"""Mocks the ExtendedGithub instance."""
15+
mock = MagicMock()
16+
mock.get_user.return_value.login = "test_user"
17+
return mock
18+
19+
@pytest.fixture
20+
def mock_repo_object():
21+
"""Mocks a PyGithub Repository object."""
22+
mock_repo = MagicMock()
23+
mock_repo.id = 12345
24+
mock_repo.name = "test-repo"
25+
mock_repo.full_name = "owner/test-repo"
26+
mock_repo.description = "A test repository"
27+
mock_repo.html_url = "https://github.com/owner/test-repo"
28+
mock_repo.stargazers_count = 100
29+
mock_repo.subscribers_count = 10
30+
mock_repo.language = "Python"
31+
mock_repo.default_branch = "main"
32+
mock_repo.updated_at = datetime.now(timezone.utc)
33+
return mock_repo
34+
35+
@pytest.fixture
36+
def mock_content_file():
37+
"""Mocks a PyGithub ContentFile object."""
38+
mock_file = MagicMock()
39+
mock_file.name = "test.py"
40+
mock_file.path = "src/test.py"
41+
mock_file.encoding = "base64"
42+
mock_file.content = "cHJpbnQoImhlbGxvIik=" # print("hello") in base64
43+
mock_file.decoded_content = b'print("hello")'
44+
return mock_file
45+
46+
@pytest.fixture
47+
def mock_llm_handler():
48+
"""Mocks the LLMHandler."""
49+
handler = MagicMock()
50+
handler.ainvoke = AsyncMock()
51+
return handler

tests/test_data_retrieval.py

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
import pytest
2+
from unittest.mock import MagicMock, patch, PropertyMock
3+
from llama_github.data_retrieval.github_api import GitHubAPIHandler
4+
from llama_github.data_retrieval.github_entities import Repository, RepositoryPool
5+
6+
class TestRepository:
7+
def test_repository_initialization(self, mock_github_instance, mock_repo_object):
8+
mock_github_instance.get_repo.return_value = mock_repo_object
9+
10+
repo = Repository("owner/test-repo", mock_github_instance)
11+
12+
assert repo.full_name == "owner/test-repo"
13+
assert repo.id == 12345
14+
assert repo.language == "Python"
15+
16+
def test_get_readme_caching(self, mock_github_instance, mock_repo_object):
17+
mock_github_instance.get_repo.return_value = mock_repo_object
18+
repo = Repository("owner/test-repo", mock_github_instance)
19+
20+
# Mock the internal repo object's get_readme
21+
mock_readme = MagicMock()
22+
mock_readme.decoded_content = b"# Readme"
23+
mock_repo_object.get_readme.return_value = mock_readme
24+
25+
# First call
26+
content1 = repo.get_readme()
27+
assert content1 == "# Readme"
28+
29+
# Second call should not trigger API
30+
content2 = repo.get_readme()
31+
assert content2 == "# Readme"
32+
assert mock_repo_object.get_readme.call_count == 1
33+
34+
def test_get_file_content_base64(self, mock_github_instance, mock_repo_object, mock_content_file):
35+
mock_github_instance.get_repo.return_value = mock_repo_object
36+
mock_repo_object.get_contents.return_value = mock_content_file
37+
38+
repo = Repository("owner/test-repo", mock_github_instance)
39+
content = repo.get_file_content("src/test.py")
40+
41+
assert content == 'print("hello")'
42+
43+
class TestRepositoryPool:
44+
def setup_method(self):
45+
# Reset singleton for testing
46+
RepositoryPool._instance = None
47+
RepositoryPool._instance_lock = MagicMock() # Reset lock mock if needed
48+
49+
def test_singleton_behavior(self, mock_github_instance):
50+
pool1 = RepositoryPool(mock_github_instance)
51+
pool2 = RepositoryPool(mock_github_instance)
52+
assert pool1 is pool2
53+
54+
def test_get_repository_caching(self, mock_github_instance, mock_repo_object):
55+
mock_github_instance.get_repo.return_value = mock_repo_object
56+
pool = RepositoryPool(mock_github_instance)
57+
58+
repo1 = pool.get_repository("owner/test-repo")
59+
repo2 = pool.get_repository("owner/test-repo")
60+
61+
assert repo1 is repo2
62+
# Ensure we didn't create two Repository objects
63+
assert len(pool._pool) == 1
64+
65+
def test_cleanup_logic(self, mock_github_instance):
66+
# This test requires careful mocking of time and threads
67+
# We will mock the _cleanup method to avoid thread waiting
68+
pool = RepositoryPool(mock_github_instance, cleanup_interval=0.1, max_idle_time=0.1)
69+
pool.stop_cleanup() # Stop the real thread immediately
70+
71+
# Manually insert an expired repo
72+
mock_repo = MagicMock()
73+
mock_repo.last_read_time = datetime(2000, 1, 1, tzinfo=timezone.utc)
74+
mock_repo.creation_time = datetime(2000, 1, 1, tzinfo=timezone.utc)
75+
76+
with pool._registry_lock:
77+
pool._pool["expired/repo"] = mock_repo
78+
pool._locks_registry["expired/repo"] = MagicMock()
79+
80+
# Manually invoke cleanup logic once
81+
with patch('llama_github.data_retrieval.github_entities.datetime') as mock_dt:
82+
mock_dt.now.return_value = datetime(2025, 1, 1, tzinfo=timezone.utc)
83+
84+
# Extract the logic from _cleanup loop for testing
85+
with pool._registry_lock:
86+
current_time = mock_dt.now(timezone.utc)
87+
if (current_time - mock_repo.last_read_time).total_seconds() > pool.max_idle_time:
88+
del pool._locks_registry["expired/repo"]
89+
mock_repo.clear_cache()
90+
91+
mock_repo.clear_cache.assert_called()
92+
93+
class TestGitHubAPIHandler:
94+
def test_search_code_integration(self, mock_github_instance):
95+
handler = GitHubAPIHandler(mock_github_instance)
96+
97+
# Mock search_code response
98+
mock_code_result = MagicMock()
99+
mock_code_result.name = "test.py"
100+
mock_code_result.path = "test.py"
101+
mock_code_result.repository.full_name = "owner/repo"
102+
mock_code_result.html_url = "http://url"
103+
104+
mock_github_instance.search_code.return_value = [mock_code_result]
105+
106+
# Mock RepositoryPool to return a mock repo that returns content
107+
with patch.object(handler, 'get_repository') as mock_get_repo:
108+
mock_repo = MagicMock()
109+
mock_repo.get_file_content.return_value = "content"
110+
mock_get_repo.return_value = mock_repo
111+
112+
results = handler.search_code("query")
113+
114+
assert len(results) == 1
115+
assert results[0]['content'] == "content"

tests/test_github_auth_manager.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
import pytest
2+
from unittest.mock import patch, MagicMock
3+
from llama_github.github_integration.github_auth_manager import GitHubAuthManager, ExtendedGithub
4+
5+
class TestGitHubAuthManager:
6+
def setup_method(self):
7+
self.auth_manager = GitHubAuthManager()
8+
9+
@patch('llama_github.github_integration.github_auth_manager.ExtendedGithub')
10+
def test_authenticate_with_token(self, mock_extended_github):
11+
token = "fake_token"
12+
instance = self.auth_manager.authenticate_with_token(token)
13+
14+
assert self.auth_manager.access_token == token
15+
assert instance == mock_extended_github.return_value
16+
mock_extended_github.assert_called_with(login_or_token=token)
17+
18+
@patch('llama_github.github_integration.github_auth_manager.GithubIntegration')
19+
@patch('llama_github.github_integration.github_auth_manager.ExtendedGithub')
20+
def test_authenticate_with_app(self, mock_extended_github, mock_integration):
21+
# Setup mocks
22+
mock_integration_instance = mock_integration.return_value
23+
mock_integration_instance.get_access_token.return_value.token = "app_token"
24+
25+
instance = self.auth_manager.authenticate_with_app(1, "key", 123)
26+
27+
assert self.auth_manager.access_token == "app_token"
28+
mock_extended_github.assert_called_with(login_or_token="app_token")
29+
30+
class TestExtendedGithub:
31+
@patch('requests.get')
32+
def test_get_repo_structure(self, mock_get):
33+
# Mock response
34+
mock_response = MagicMock()
35+
mock_response.status_code = 200
36+
mock_response.json.return_value = {
37+
"tree": [
38+
{"path": "folder", "type": "tree"},
39+
{"path": "folder/file.py", "type": "blob", "size": 100}
40+
]
41+
}
42+
mock_get.return_value = mock_response
43+
44+
gh = ExtendedGithub("token")
45+
structure = gh.get_repo_structure("owner/repo")
46+
47+
assert "folder" in structure
48+
assert "file.py" in structure["folder"]["children"]
49+
assert structure["folder"]["children"]["file.py"]["size"] == 100
50+
51+
@patch('requests.Session')
52+
def test_search_code_retry_logic(self, mock_session):
53+
mock_adapter = MagicMock()
54+
mock_session.return_value.mount = MagicMock()
55+
56+
gh = ExtendedGithub("token")
57+
# We just want to ensure no exception is raised and session is used
58+
with patch.object(gh, 'access_token', 'token'):
59+
# Mock the get call to raise then succeed or just return
60+
mock_session.return_value.get.return_value.status_code = 200
61+
mock_session.return_value.get.return_value.json.return_value = {'items': []}
62+
63+
result = gh.search_code("query")
64+
assert isinstance(result, list)

tests/test_initial_load.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import pytest
2+
from unittest.mock import patch, MagicMock
3+
from llama_github.llm_integration.initial_load import LLMManager
4+
5+
@pytest.fixture(autouse=True)
6+
def reset_singleton():
7+
LLMManager._instance = None
8+
LLMManager._initialized = False
9+
yield
10+
LLMManager._instance = None
11+
12+
class TestLLMManager:
13+
@patch('llama_github.llm_integration.initial_load.ChatOpenAI')
14+
def test_init_openai(self, mock_chat_openai):
15+
manager = LLMManager(openai_api_key="sk-test", simple_mode=True)
16+
assert manager.model_type == "OpenAI"
17+
assert manager.llm is not None
18+
mock_chat_openai.assert_called()
19+
20+
@patch('llama_github.llm_integration.initial_load.AutoTokenizer')
21+
@patch('llama_github.llm_integration.initial_load.AutoModel')
22+
@patch('llama_github.llm_integration.initial_load.AutoModelForSequenceClassification')
23+
def test_init_huggingface_full_mode(self, mock_seq, mock_model, mock_tokenizer):
24+
# Mock system checks
25+
with patch('sys.platform', 'linux'), \
26+
patch('subprocess.run'):
27+
28+
manager = LLMManager(
29+
open_source_models_hg_dir="/tmp",
30+
simple_mode=False,
31+
embedding_model="emb-model",
32+
rerank_model="rerank-model"
33+
)
34+
35+
assert manager.tokenizer is not None
36+
assert manager.embedding_model is not None
37+
assert manager.rerank_model is not None
38+
mock_tokenizer.from_pretrained.assert_called_with("emb-model")
39+
40+
def test_simple_mode_skips_heavy_models(self):
41+
manager = LLMManager(simple_mode=True)
42+
assert manager.embedding_model is None
43+
assert manager.rerank_model is None

tests/test_llm_handler.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import pytest
2+
from unittest.mock import MagicMock, AsyncMock
3+
from llama_github.llm_integration.llm_handler import LLMHandler
4+
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
5+
6+
@pytest.mark.asyncio
7+
async def test_ainvoke_basic():
8+
mock_manager = MagicMock()
9+
mock_llm = MagicMock()
10+
mock_llm.ainvoke = AsyncMock(return_value="AI Response")
11+
mock_manager.get_llm.return_value = mock_llm
12+
mock_manager.model_type = "OpenAI"
13+
14+
handler = LLMHandler(llm_manager=mock_manager)
15+
16+
response = await handler.ainvoke("Hello")
17+
assert response == "AI Response"
18+
19+
def test_compose_chat_history():
20+
handler = LLMHandler(MagicMock())
21+
history = ["Hi", "Hello"]
22+
messages = handler._compose_chat_history_messages(history)
23+
24+
assert len(messages) == 2
25+
assert isinstance(messages[0], HumanMessage)
26+
assert messages[0].content == "Hi"
27+
assert isinstance(messages[1], AIMessage)
28+
assert messages[1].content == "Hello"
29+
30+
def test_compose_context_messages():
31+
handler = LLMHandler(MagicMock())
32+
context = ["ctx1", "ctx2"]
33+
messages = handler._compose_context_messages(context)
34+
35+
assert len(messages) == 2
36+
assert isinstance(messages[0], SystemMessage)

tests/test_logger.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import logging
2+
import pytest
3+
from llama_github.logger import configure_logging, logger
4+
5+
def test_configure_logging_defaults():
6+
"""Test default logging configuration."""
7+
# Reset handlers
8+
logger.handlers = []
9+
10+
configure_logging()
11+
12+
assert logger.level == logging.INFO
13+
assert len(logger.handlers) == 1
14+
assert isinstance(logger.handlers[0], logging.StreamHandler)
15+
16+
def test_configure_logging_custom_level():
17+
"""Test logging with custom level."""
18+
logger.handlers = []
19+
configure_logging(level=logging.DEBUG)
20+
assert logger.level == logging.DEBUG
21+
22+
def test_configure_logging_custom_handler():
23+
"""Test logging with a custom handler."""
24+
logger.handlers = []
25+
custom_handler = logging.NullHandler()
26+
configure_logging(handler=custom_handler)
27+
28+
assert len(logger.handlers) == 1
29+
assert logger.handlers[0] == custom_handler

0 commit comments

Comments
 (0)