Merge pull request #1273 from codeflash-ai/test/java-comparator-test-results-schema

mohamedashrraf222 · web-flow · commit 7fc14fdf4ffc · 2026-02-03T04:06:04.000+02:00
test: add integration tests for test_results schema validation
diff --git a/tests/test_languages/test_java/test_comparator.py b/tests/test_languages/test_java/test_comparator.py
@@ -1,6 +1,7 @@
 """Tests for Java test result comparison."""
 
 import json
+import shutil
 import sqlite3
 import tempfile
 from pathlib import Path
@@ -13,6 +14,12 @@
 )
 from codeflash.models.models import TestDiffScope
 
+# Skip tests that require Java runtime if Java is not available
+requires_java = pytest.mark.skipif(
+    shutil.which("java") is None,
+    reason="Java not found - skipping Comparator integration tests",
+)
+
 
 class TestDirectComparison:
     """Tests for direct Python-based comparison."""
@@ -308,3 +315,241 @@ def test_deeply_nested_objects(self):
 
         equivalent, diffs = compare_invocations_directly(original, candidate)
         assert equivalent is True
+
+
+@requires_java
+class TestTestResultsTableSchema:
+    """Tests for Java Comparator reading from test_results table schema.
+
+    This validates the schema integration between instrumentation (which writes
+    to test_results) and the Comparator (which reads from test_results).
+
+    These tests require Java to be installed to run the actual Comparator.jar.
+    """
+
+    @pytest.fixture
+    def create_test_results_db(self):
+        """Create a test SQLite database with test_results table (actual schema used by instrumentation)."""
+
+        def _create(path: Path, results: list[dict]):
+            conn = sqlite3.connect(path)
+            cursor = conn.cursor()
+
+            # Create test_results table matching instrumentation schema
+            cursor.execute(
+                """
+                CREATE TABLE test_results (
+                    test_module_path TEXT,
+                    test_class_name TEXT,
+                    test_function_name TEXT,
+                    function_getting_tested TEXT,
+                    loop_index INTEGER,
+                    iteration_id TEXT,
+                    runtime INTEGER,
+                    return_value TEXT,
+                    verification_type TEXT
+                )
+            """
+            )
+
+            for result in results:
+                cursor.execute(
+                    """
+                    INSERT INTO test_results
+                    (test_module_path, test_class_name, test_function_name,
+                     function_getting_tested, loop_index, iteration_id,
+                     runtime, return_value, verification_type)
+                    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
+                """,
+                    (
+                        result.get("test_module_path", "TestModule"),
+                        result.get("test_class_name", "TestClass"),
+                        result.get("test_function_name", "testMethod"),
+                        result.get("function_getting_tested", "targetMethod"),
+                        result.get("loop_index", 1),
+                        result.get("iteration_id", "1_0"),
+                        result.get("runtime", 1000000),
+                        result.get("return_value"),
+                        result.get("verification_type", "function_call"),
+                    ),
+                )
+
+            conn.commit()
+            conn.close()
+            return path
+
+        return _create
+
+    def test_comparator_reads_test_results_table_identical(
+        self, tmp_path: Path, create_test_results_db
+    ):
+        """Test that Comparator correctly reads test_results table with identical results."""
+        original_path = tmp_path / "original.db"
+        candidate_path = tmp_path / "candidate.db"
+
+        # Create databases with identical results
+        results = [
+            {
+                "test_class_name": "CalculatorTest",
+                "function_getting_tested": "add",
+                "loop_index": 1,
+                "iteration_id": "1_0",
+                "return_value": '{"value": 42}',
+            },
+            {
+                "test_class_name": "CalculatorTest",
+                "function_getting_tested": "add",
+                "loop_index": 1,
+                "iteration_id": "2_0",
+                "return_value": '{"value": 100}',
+            },
+        ]
+
+        create_test_results_db(original_path, results)
+        create_test_results_db(candidate_path, results)
+
+        # Compare using Java Comparator
+        equivalent, diffs = compare_test_results(original_path, candidate_path)
+
+        assert equivalent is True
+        assert len(diffs) == 0
+
+    def test_comparator_reads_test_results_table_different_values(
+        self, tmp_path: Path, create_test_results_db
+    ):
+        """Test that Comparator detects different return values from test_results table."""
+        original_path = tmp_path / "original.db"
+        candidate_path = tmp_path / "candidate.db"
+
+        original_results = [
+            {
+                "test_class_name": "StringUtilsTest",
+                "function_getting_tested": "reverse",
+                "loop_index": 1,
+                "iteration_id": "1_0",
+                "return_value": '"olleh"',
+            },
+        ]
+
+        candidate_results = [
+            {
+                "test_class_name": "StringUtilsTest",
+                "function_getting_tested": "reverse",
+                "loop_index": 1,
+                "iteration_id": "1_0",
+                "return_value": '"wrong"',  # Different result
+            },
+        ]
+
+        create_test_results_db(original_path, original_results)
+        create_test_results_db(candidate_path, candidate_results)
+
+        # Compare using Java Comparator
+        equivalent, diffs = compare_test_results(original_path, candidate_path)
+
+        assert equivalent is False
+        assert len(diffs) == 1
+        assert diffs[0].scope == TestDiffScope.RETURN_VALUE
+
+    def test_comparator_handles_multiple_loop_iterations(
+        self, tmp_path: Path, create_test_results_db
+    ):
+        """Test that Comparator correctly handles multiple loop iterations."""
+        original_path = tmp_path / "original.db"
+        candidate_path = tmp_path / "candidate.db"
+
+        # Simulate multiple benchmark loops
+        results = []
+        for loop in range(1, 4):  # 3 loops
+            for iteration in range(1, 3):  # 2 iterations per loop
+                results.append(
+                    {
+                        "test_class_name": "AlgorithmTest",
+                        "function_getting_tested": "fibonacci",
+                        "loop_index": loop,
+                        "iteration_id": f"{iteration}_0",
+                        "return_value": str(loop * iteration),
+                    }
+                )
+
+        create_test_results_db(original_path, results)
+        create_test_results_db(candidate_path, results)
+
+        # Compare using Java Comparator
+        equivalent, diffs = compare_test_results(original_path, candidate_path)
+
+        assert equivalent is True
+        assert len(diffs) == 0
+
+    def test_comparator_iteration_id_parsing(
+        self, tmp_path: Path, create_test_results_db
+    ):
+        """Test that Comparator correctly parses iteration_id format 'iter_testIteration'."""
+        original_path = tmp_path / "original.db"
+        candidate_path = tmp_path / "candidate.db"
+
+        # Test various iteration_id formats
+        results = [
+            {
+                "loop_index": 1,
+                "iteration_id": "1_0",  # Standard format
+                "return_value": '{"result": 1}',
+            },
+            {
+                "loop_index": 1,
+                "iteration_id": "2_5",  # With test iteration
+                "return_value": '{"result": 2}',
+            },
+            {
+                "loop_index": 2,
+                "iteration_id": "1_0",  # Different loop
+                "return_value": '{"result": 3}',
+            },
+        ]
+
+        create_test_results_db(original_path, results)
+        create_test_results_db(candidate_path, results)
+
+        # Compare using Java Comparator
+        equivalent, diffs = compare_test_results(original_path, candidate_path)
+
+        assert equivalent is True
+        assert len(diffs) == 0
+
+    def test_comparator_missing_result_in_candidate(
+        self, tmp_path: Path, create_test_results_db
+    ):
+        """Test that Comparator detects missing results in candidate."""
+        original_path = tmp_path / "original.db"
+        candidate_path = tmp_path / "candidate.db"
+
+        original_results = [
+            {
+                "loop_index": 1,
+                "iteration_id": "1_0",
+                "return_value": '{"value": 1}',
+            },
+            {
+                "loop_index": 1,
+                "iteration_id": "2_0",
+                "return_value": '{"value": 2}',
+            },
+        ]
+
+        candidate_results = [
+            {
+                "loop_index": 1,
+                "iteration_id": "1_0",
+                "return_value": '{"value": 1}',
+            },
+            # Missing second iteration
+        ]
+
+        create_test_results_db(original_path, original_results)
+        create_test_results_db(candidate_path, candidate_results)
+
+        # Compare using Java Comparator
+        equivalent, diffs = compare_test_results(original_path, candidate_path)
+
+        assert equivalent is False
+        assert len(diffs) >= 1  # Should detect missing invocation