Skip to content

Commit 7fc14fd

Browse files
Merge pull request #1273 from codeflash-ai/test/java-comparator-test-results-schema
test: add integration tests for test_results schema validation
2 parents a00eb39 + 665895c commit 7fc14fd

1 file changed

Lines changed: 245 additions & 0 deletions

File tree

tests/test_languages/test_java/test_comparator.py

Lines changed: 245 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Tests for Java test result comparison."""
22

33
import json
4+
import shutil
45
import sqlite3
56
import tempfile
67
from pathlib import Path
@@ -13,6 +14,12 @@
1314
)
1415
from codeflash.models.models import TestDiffScope
1516

17+
# Skip tests that require Java runtime if Java is not available
18+
requires_java = pytest.mark.skipif(
19+
shutil.which("java") is None,
20+
reason="Java not found - skipping Comparator integration tests",
21+
)
22+
1623

1724
class TestDirectComparison:
1825
"""Tests for direct Python-based comparison."""
@@ -308,3 +315,241 @@ def test_deeply_nested_objects(self):
308315

309316
equivalent, diffs = compare_invocations_directly(original, candidate)
310317
assert equivalent is True
318+
319+
320+
@requires_java
321+
class TestTestResultsTableSchema:
322+
"""Tests for Java Comparator reading from test_results table schema.
323+
324+
This validates the schema integration between instrumentation (which writes
325+
to test_results) and the Comparator (which reads from test_results).
326+
327+
These tests require Java to be installed to run the actual Comparator.jar.
328+
"""
329+
330+
@pytest.fixture
331+
def create_test_results_db(self):
332+
"""Create a test SQLite database with test_results table (actual schema used by instrumentation)."""
333+
334+
def _create(path: Path, results: list[dict]):
335+
conn = sqlite3.connect(path)
336+
cursor = conn.cursor()
337+
338+
# Create test_results table matching instrumentation schema
339+
cursor.execute(
340+
"""
341+
CREATE TABLE test_results (
342+
test_module_path TEXT,
343+
test_class_name TEXT,
344+
test_function_name TEXT,
345+
function_getting_tested TEXT,
346+
loop_index INTEGER,
347+
iteration_id TEXT,
348+
runtime INTEGER,
349+
return_value TEXT,
350+
verification_type TEXT
351+
)
352+
"""
353+
)
354+
355+
for result in results:
356+
cursor.execute(
357+
"""
358+
INSERT INTO test_results
359+
(test_module_path, test_class_name, test_function_name,
360+
function_getting_tested, loop_index, iteration_id,
361+
runtime, return_value, verification_type)
362+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
363+
""",
364+
(
365+
result.get("test_module_path", "TestModule"),
366+
result.get("test_class_name", "TestClass"),
367+
result.get("test_function_name", "testMethod"),
368+
result.get("function_getting_tested", "targetMethod"),
369+
result.get("loop_index", 1),
370+
result.get("iteration_id", "1_0"),
371+
result.get("runtime", 1000000),
372+
result.get("return_value"),
373+
result.get("verification_type", "function_call"),
374+
),
375+
)
376+
377+
conn.commit()
378+
conn.close()
379+
return path
380+
381+
return _create
382+
383+
def test_comparator_reads_test_results_table_identical(
384+
self, tmp_path: Path, create_test_results_db
385+
):
386+
"""Test that Comparator correctly reads test_results table with identical results."""
387+
original_path = tmp_path / "original.db"
388+
candidate_path = tmp_path / "candidate.db"
389+
390+
# Create databases with identical results
391+
results = [
392+
{
393+
"test_class_name": "CalculatorTest",
394+
"function_getting_tested": "add",
395+
"loop_index": 1,
396+
"iteration_id": "1_0",
397+
"return_value": '{"value": 42}',
398+
},
399+
{
400+
"test_class_name": "CalculatorTest",
401+
"function_getting_tested": "add",
402+
"loop_index": 1,
403+
"iteration_id": "2_0",
404+
"return_value": '{"value": 100}',
405+
},
406+
]
407+
408+
create_test_results_db(original_path, results)
409+
create_test_results_db(candidate_path, results)
410+
411+
# Compare using Java Comparator
412+
equivalent, diffs = compare_test_results(original_path, candidate_path)
413+
414+
assert equivalent is True
415+
assert len(diffs) == 0
416+
417+
def test_comparator_reads_test_results_table_different_values(
418+
self, tmp_path: Path, create_test_results_db
419+
):
420+
"""Test that Comparator detects different return values from test_results table."""
421+
original_path = tmp_path / "original.db"
422+
candidate_path = tmp_path / "candidate.db"
423+
424+
original_results = [
425+
{
426+
"test_class_name": "StringUtilsTest",
427+
"function_getting_tested": "reverse",
428+
"loop_index": 1,
429+
"iteration_id": "1_0",
430+
"return_value": '"olleh"',
431+
},
432+
]
433+
434+
candidate_results = [
435+
{
436+
"test_class_name": "StringUtilsTest",
437+
"function_getting_tested": "reverse",
438+
"loop_index": 1,
439+
"iteration_id": "1_0",
440+
"return_value": '"wrong"', # Different result
441+
},
442+
]
443+
444+
create_test_results_db(original_path, original_results)
445+
create_test_results_db(candidate_path, candidate_results)
446+
447+
# Compare using Java Comparator
448+
equivalent, diffs = compare_test_results(original_path, candidate_path)
449+
450+
assert equivalent is False
451+
assert len(diffs) == 1
452+
assert diffs[0].scope == TestDiffScope.RETURN_VALUE
453+
454+
def test_comparator_handles_multiple_loop_iterations(
455+
self, tmp_path: Path, create_test_results_db
456+
):
457+
"""Test that Comparator correctly handles multiple loop iterations."""
458+
original_path = tmp_path / "original.db"
459+
candidate_path = tmp_path / "candidate.db"
460+
461+
# Simulate multiple benchmark loops
462+
results = []
463+
for loop in range(1, 4): # 3 loops
464+
for iteration in range(1, 3): # 2 iterations per loop
465+
results.append(
466+
{
467+
"test_class_name": "AlgorithmTest",
468+
"function_getting_tested": "fibonacci",
469+
"loop_index": loop,
470+
"iteration_id": f"{iteration}_0",
471+
"return_value": str(loop * iteration),
472+
}
473+
)
474+
475+
create_test_results_db(original_path, results)
476+
create_test_results_db(candidate_path, results)
477+
478+
# Compare using Java Comparator
479+
equivalent, diffs = compare_test_results(original_path, candidate_path)
480+
481+
assert equivalent is True
482+
assert len(diffs) == 0
483+
484+
def test_comparator_iteration_id_parsing(
485+
self, tmp_path: Path, create_test_results_db
486+
):
487+
"""Test that Comparator correctly parses iteration_id format 'iter_testIteration'."""
488+
original_path = tmp_path / "original.db"
489+
candidate_path = tmp_path / "candidate.db"
490+
491+
# Test various iteration_id formats
492+
results = [
493+
{
494+
"loop_index": 1,
495+
"iteration_id": "1_0", # Standard format
496+
"return_value": '{"result": 1}',
497+
},
498+
{
499+
"loop_index": 1,
500+
"iteration_id": "2_5", # With test iteration
501+
"return_value": '{"result": 2}',
502+
},
503+
{
504+
"loop_index": 2,
505+
"iteration_id": "1_0", # Different loop
506+
"return_value": '{"result": 3}',
507+
},
508+
]
509+
510+
create_test_results_db(original_path, results)
511+
create_test_results_db(candidate_path, results)
512+
513+
# Compare using Java Comparator
514+
equivalent, diffs = compare_test_results(original_path, candidate_path)
515+
516+
assert equivalent is True
517+
assert len(diffs) == 0
518+
519+
def test_comparator_missing_result_in_candidate(
520+
self, tmp_path: Path, create_test_results_db
521+
):
522+
"""Test that Comparator detects missing results in candidate."""
523+
original_path = tmp_path / "original.db"
524+
candidate_path = tmp_path / "candidate.db"
525+
526+
original_results = [
527+
{
528+
"loop_index": 1,
529+
"iteration_id": "1_0",
530+
"return_value": '{"value": 1}',
531+
},
532+
{
533+
"loop_index": 1,
534+
"iteration_id": "2_0",
535+
"return_value": '{"value": 2}',
536+
},
537+
]
538+
539+
candidate_results = [
540+
{
541+
"loop_index": 1,
542+
"iteration_id": "1_0",
543+
"return_value": '{"value": 1}',
544+
},
545+
# Missing second iteration
546+
]
547+
548+
create_test_results_db(original_path, original_results)
549+
create_test_results_db(candidate_path, candidate_results)
550+
551+
# Compare using Java Comparator
552+
equivalent, diffs = compare_test_results(original_path, candidate_path)
553+
554+
assert equivalent is False
555+
assert len(diffs) >= 1 # Should detect missing invocation

0 commit comments

Comments
 (0)