fix(cli): test coverage.

Zhe Yu · Zhe Yu · commit f2aadff648cc · 2025-08-28T17:22:28.000+08:00
diff --git a/src/vectorcode/mcp_main.py b/src/vectorcode/mcp_main.py
@@ -3,6 +3,7 @@
 import logging
 import os
 import sys
+import traceback
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Optional, cast
@@ -161,13 +162,16 @@ async def vectorise_files(paths: list[str], project_root: str) -> dict[str, int]
 
         return stats.to_dict()
     except Exception as e:
-        logger.error("Failed to access collection at %s", project_root)
-        raise McpError(
-            ErrorData(
-                code=1,
-                message=f"{e.__class__.__name__}: Failed to create the collection at {project_root}.",
-            )
-        )
+        if isinstance(e, McpError):
+            logger.error("Failed to access collection at %s", project_root)
+            raise
+        else:
+            raise McpError(
+                ErrorData(
+                    code=1,
+                    message="\n".join(traceback.format_exception(e)),
+                )
+            ) from e
 
 
 async def query_tool(
@@ -222,13 +226,16 @@ async def query_tool(
             return results
 
     except Exception as e:
-        logger.error("Failed to access collection at %s", project_root)
-        raise McpError(
-            ErrorData(
-                code=1,
-                message=f"{e.__class__.__name__}: Failed to access the collection at {project_root}. Use `list_collections` tool to get a list of valid paths for this field.",
-            )
-        )
+        if isinstance(e, McpError):
+            logger.error("Failed to access collection at %s", project_root)
+            raise
+        else:
+            raise McpError(
+                ErrorData(
+                    code=1,
+                    message="\n".join(traceback.format_exception(e)),
+                )
+            ) from e
 
 
 async def ls_files(project_root: str) -> list[str]:
diff --git a/src/vectorcode/subcommands/query/__init__.py b/src/vectorcode/subcommands/query/__init__.py
@@ -48,9 +48,9 @@ def conver_query_results(
         metadatas = chroma_result["metadatas"][q_i]
         for doc, dist, meta in zip(documents, distances, metadatas):
             chunk = Chunk(text=doc)
-            if meta["start"]:
+            if meta.get("start"):
                 chunk.start = Point(int(meta.get("start", 0)), 0)
-            if meta["end"]:
+            if meta.get("end"):
                 chunk.end = Point(int(meta.get("end", 0)) + 1, 0)
             chroma_results_list.append(
                 vectorcode_types.QueryResult(
diff --git a/src/vectorcode/subcommands/query/reranker/base.py b/src/vectorcode/subcommands/query/reranker/base.py
@@ -46,7 +46,9 @@ def create(cls, configs: Config, **kwargs: Any):
             raise
 
     @abstractmethod
-    async def compute_similarity(self, results: list[QueryResult]):  # pragma: nocover
+    async def compute_similarity(
+        self, results: list[QueryResult]
+    ) -> None:  # pragma: nocover
         """
         Modify the `QueryResult.scores` field IN-PLACE so that they contain the correct scores.
         """
@@ -55,7 +57,7 @@ async def compute_similarity(self, results: list[QueryResult]):  # pragma: nocov
     async def rerank(self, results: list[QueryResult]) -> list[str]:
         if len(results) == 0:
             return []
-        results = await self.compute_similarity(results)
+        await self.compute_similarity(results)
 
         group_by = "path"
         if QueryInclude.chunk in self.configs.include:
diff --git a/tests/subcommands/query/test_query.py b/tests/subcommands/query/test_query.py
@@ -9,6 +9,7 @@
 from vectorcode.cli_utils import CliAction, Config, QueryInclude
 from vectorcode.subcommands.query import (
     build_query_results,
+    conver_query_results,
     get_query_result_files,
     query,
 )
@@ -47,7 +48,7 @@ def mock_collection():
 @pytest.fixture
 def mock_config():
     return Config(
-        query=["test query"],
+        query=["test query", "test query 2"],
         n_result=3,
         query_multiplier=2,
         chunk_size=100,
@@ -88,7 +89,7 @@ async def test_get_query_result_files(mock_collection, mock_config):
         # Check that query was called with the right parameters
         mock_collection.query.assert_called_once()
         args, kwargs = mock_collection.query.call_args
-        mock_embedding_function.assert_called_once_with(["test query"])
+        mock_embedding_function.assert_called_once_with(["test query", "test query 2"])
         assert kwargs["n_results"] == 6  # n_result(3) * query_multiplier(2)
         assert IncludeEnum.metadatas in kwargs["include"]
         assert IncludeEnum.distances in kwargs["include"]
@@ -98,7 +99,7 @@ async def test_get_query_result_files(mock_collection, mock_config):
         # Check reranker was used correctly
         mock_get_reranker.assert_called_once_with(mock_config)
         mock_reranker_instance.rerank.assert_called_once_with(
-            mock_collection.query.return_value
+            conver_query_results(mock_collection.query.return_value, mock_config.query)
         )
 
         # Check the result
@@ -323,40 +324,6 @@ async def test_get_query_result_files_chunking(mock_collection, mock_config):
         assert result == ["file1.py", "file2.py"]
 
 
-@pytest.mark.asyncio
-async def test_get_query_result_files_multiple_queries(mock_collection, mock_config):
-    # Set multiple query terms
-    mock_config.query = ["term1", "term2", "term3"]
-    mock_config.embedding_dims = 10
-
-    with (
-        patch("vectorcode.subcommands.query.StringChunker") as MockChunker,
-        patch("vectorcode.subcommands.query.reranker.NaiveReranker") as MockReranker,
-    ):
-        # Set up MockChunker to return the query terms as is
-        mock_chunker_instance = MagicMock()
-        mock_chunker_instance.chunk.side_effect = lambda q: [q]
-        MockChunker.return_value = mock_chunker_instance
-
-        mock_reranker_instance = MagicMock()
-        mock_reranker_instance.rerank = AsyncMock(return_value=["file1.py", "file2.py"])
-        MockReranker.return_value = mock_reranker_instance
-
-        # Call the function
-        result = await get_query_result_files(mock_collection, mock_config)
-
-        # Check that chunker was called for each query term
-        assert mock_chunker_instance.chunk.call_count == 3
-
-        # Check query was called with all query terms
-        mock_collection.query.assert_called_once()
-        _, kwargs = mock_collection.query.call_args
-        assert all(len(i) == 10 for i in kwargs["query_embeddings"])
-
-        # Check the result
-        assert result == ["file1.py", "file2.py"]
-
-
 @pytest.mark.asyncio
 async def test_query_success(mock_config):
     # Mock all the necessary dependencies
diff --git a/tests/subcommands/query/test_reranker.py b/tests/subcommands/query/test_reranker.py
@@ -4,7 +4,7 @@
 import numpy
 import pytest
 
-from vectorcode.cli_utils import Config, QueryInclude
+from vectorcode.cli_utils import Config
 from vectorcode.subcommands.query.reranker import (
     CrossEncoderReranker,
     NaiveReranker,
@@ -14,6 +14,7 @@
     get_available_rerankers,
     get_reranker,
 )
+from vectorcode.subcommands.query.types import QueryResult
 
 
 @pytest.fixture(scope="function")
@@ -37,29 +38,50 @@ def naive_reranker_conf():
 
 
 @pytest.fixture(scope="function")
-def query_result():
-    return {
-        "ids": [["id1", "id2", "id3"], ["id4", "id5", "id6"]],
-        "distances": [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]],
-        "metadatas": [
-            [{"path": "file1.py"}, {"path": "file2.py"}, {"path": "file3.py"}],
-            [{"path": "file2.py"}, {"path": "file4.py"}, {"path": "file3.py"}],
-        ],
-        "documents": [
-            ["content1", "content2", "content3"],
-            ["content4", "content5", "content6"],
-        ],
-    }
+def query_result() -> list[QueryResult]:
+    return [
+        QueryResult(
+            path="file1.py",
+            chunk=MagicMock(),
+            query=("query chunk 1",),
+            scores=(0.5,),
+        ),
+        QueryResult(
+            path="file2.py",
+            chunk=MagicMock(),
+            query=("query chunk 1",),
+            scores=(0.9,),
+        ),
+        QueryResult(
+            path="file3.py",
+            chunk=MagicMock(),
+            query=("query chunk 1",),
+            scores=(0.3,),
+        ),
+        QueryResult(
+            path="file2.py",
+            chunk=MagicMock(),
+            query=("query chunk 2",),
+            scores=(0.6,),
+        ),
+        QueryResult(
+            path="file4.py",
+            chunk=MagicMock(),
+            query=("query chunk 2",),
+            scores=(0.7,),
+        ),
+        QueryResult(
+            path="file3.py",
+            chunk=MagicMock(),
+            query=("query chunk 2",),
+            scores=(0.2,),
+        ),
+    ]
 
 
 @pytest.fixture(scope="function")
 def empty_query_result():
-    return {
-        "ids": [],
-        "distances": [],
-        "metadatas": [],
-        "documents": [],
-    }
+    return []
 
 
 @pytest.fixture(scope="function")
@@ -103,8 +125,8 @@ async def test_naive_reranker_rerank(naive_reranker_conf, query_result):
     assert len(result) <= naive_reranker_conf.n_result
 
     # Check all returned items are strings (paths)
-    for path in result:
-        assert isinstance(path, str)
+    for res in result:
+        assert isinstance(res, str)
 
 
 @pytest.mark.asyncio
@@ -143,21 +165,7 @@ async def test_cross_encoder_reranker_rerank(mock_cross_encoder, config, query_r
     mock_model = MagicMock()
     mock_cross_encoder.return_value = mock_model
 
-    # Configure mock predict to return numpy array with float32 dtype
-    scores = numpy.array([0.9, 0.7, 0.8], dtype=numpy.float32)
-    mock_model.predict.return_value = scores
-
-    # Ensure complete query_result structure
-    query_result.update(
-        {
-            "ids": [["id1", "id2", "id3"], ["id4", "id5", "id6"]],
-            "documents": [["doc1", "doc2", "doc3"], ["doc4", "doc5", "doc6"]],
-            "metadatas": [
-                [{"path": "p1"}, {"path": "p2"}, {"path": "p3"}],
-                [{"path": "p4"}, {"path": "p5"}, {"path": "p6"}],
-            ],
-        }
-    )
+    mock_model.predict = lambda x: numpy.random.random((len(x),))
 
     reranker = CrossEncoderReranker(config)
     result = await reranker.rerank(query_result)
@@ -184,46 +192,6 @@ async def test_naive_reranker_document_selection_logic(
     assert "file2.py" in result or "file3.py" in result
 
 
-@pytest.mark.asyncio
-async def test_naive_reranker_with_chunk_ids(naive_reranker_conf, query_result):
-    """Test NaiveReranker returns chunk IDs when QueryInclude.chunk is set"""
-    naive_reranker_conf.include.append(
-        QueryInclude.chunk
-    )  # Assuming QueryInclude.chunk would be "chunk"
-
-    reranker = NaiveReranker(naive_reranker_conf)
-    result = await reranker.rerank(query_result)
-
-    assert isinstance(result, list)
-    assert len(result) <= naive_reranker_conf.n_result
-    assert all(isinstance(id, str) for id in result)
-    assert all(id.startswith("id") for id in result)  # Verify IDs not paths
-
-
-@pytest.mark.asyncio
-@patch("sentence_transformers.CrossEncoder")
-async def test_cross_encoder_reranker_with_chunk_ids(
-    mock_cross_encoder, config, query_result
-):
-    """Test CrossEncoderReranker returns chunk IDs when QueryInclude.chunk is set"""
-    mock_model = MagicMock()
-    mock_cross_encoder.return_value = mock_model
-
-    # Setup mock to return numpy array scores
-    scores = numpy.array([0.9, 0.7], dtype=numpy.float32)
-    mock_model.predict.return_value = scores
-
-    config.include = {QueryInclude.chunk}
-    reranker = CrossEncoderReranker(config)
-
-    result = await reranker.rerank(query_result)
-
-    mock_model.predict.assert_called()
-    assert isinstance(result, list)
-    assert all(isinstance(id, str) for id in result)
-    assert all(id in ["id1", "id2", "id3", "id4"] for id in result)
-
-
 def test_get_reranker(config, naive_reranker_conf):
     assert get_reranker(naive_reranker_conf).configs.reranker == "NaiveReranker"
 
diff --git a/tests/test_mcp.py b/tests/test_mcp.py