|
1 | 1 | """Tests for Java test result comparison.""" |
2 | 2 |
|
3 | 3 | import json |
| 4 | +import shutil |
4 | 5 | import sqlite3 |
5 | 6 | import tempfile |
6 | 7 | from pathlib import Path |
|
13 | 14 | ) |
14 | 15 | from codeflash.models.models import TestDiffScope |
15 | 16 |
|
| 17 | +# Skip tests that require Java runtime if Java is not available |
| 18 | +requires_java = pytest.mark.skipif( |
| 19 | + shutil.which("java") is None, |
| 20 | + reason="Java not found - skipping Comparator integration tests", |
| 21 | +) |
| 22 | + |
16 | 23 |
|
17 | 24 | class TestDirectComparison: |
18 | 25 | """Tests for direct Python-based comparison.""" |
@@ -308,3 +315,241 @@ def test_deeply_nested_objects(self): |
308 | 315 |
|
309 | 316 | equivalent, diffs = compare_invocations_directly(original, candidate) |
310 | 317 | assert equivalent is True |
| 318 | + |
| 319 | + |
| 320 | +@requires_java |
| 321 | +class TestTestResultsTableSchema: |
| 322 | + """Tests for Java Comparator reading from test_results table schema. |
| 323 | +
|
| 324 | + This validates the schema integration between instrumentation (which writes |
| 325 | + to test_results) and the Comparator (which reads from test_results). |
| 326 | +
|
| 327 | + These tests require Java to be installed to run the actual Comparator.jar. |
| 328 | + """ |
| 329 | + |
| 330 | + @pytest.fixture |
| 331 | + def create_test_results_db(self): |
| 332 | + """Create a test SQLite database with test_results table (actual schema used by instrumentation).""" |
| 333 | + |
| 334 | + def _create(path: Path, results: list[dict]): |
| 335 | + conn = sqlite3.connect(path) |
| 336 | + cursor = conn.cursor() |
| 337 | + |
| 338 | + # Create test_results table matching instrumentation schema |
| 339 | + cursor.execute( |
| 340 | + """ |
| 341 | + CREATE TABLE test_results ( |
| 342 | + test_module_path TEXT, |
| 343 | + test_class_name TEXT, |
| 344 | + test_function_name TEXT, |
| 345 | + function_getting_tested TEXT, |
| 346 | + loop_index INTEGER, |
| 347 | + iteration_id TEXT, |
| 348 | + runtime INTEGER, |
| 349 | + return_value TEXT, |
| 350 | + verification_type TEXT |
| 351 | + ) |
| 352 | + """ |
| 353 | + ) |
| 354 | + |
| 355 | + for result in results: |
| 356 | + cursor.execute( |
| 357 | + """ |
| 358 | + INSERT INTO test_results |
| 359 | + (test_module_path, test_class_name, test_function_name, |
| 360 | + function_getting_tested, loop_index, iteration_id, |
| 361 | + runtime, return_value, verification_type) |
| 362 | + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) |
| 363 | + """, |
| 364 | + ( |
| 365 | + result.get("test_module_path", "TestModule"), |
| 366 | + result.get("test_class_name", "TestClass"), |
| 367 | + result.get("test_function_name", "testMethod"), |
| 368 | + result.get("function_getting_tested", "targetMethod"), |
| 369 | + result.get("loop_index", 1), |
| 370 | + result.get("iteration_id", "1_0"), |
| 371 | + result.get("runtime", 1000000), |
| 372 | + result.get("return_value"), |
| 373 | + result.get("verification_type", "function_call"), |
| 374 | + ), |
| 375 | + ) |
| 376 | + |
| 377 | + conn.commit() |
| 378 | + conn.close() |
| 379 | + return path |
| 380 | + |
| 381 | + return _create |
| 382 | + |
| 383 | + def test_comparator_reads_test_results_table_identical( |
| 384 | + self, tmp_path: Path, create_test_results_db |
| 385 | + ): |
| 386 | + """Test that Comparator correctly reads test_results table with identical results.""" |
| 387 | + original_path = tmp_path / "original.db" |
| 388 | + candidate_path = tmp_path / "candidate.db" |
| 389 | + |
| 390 | + # Create databases with identical results |
| 391 | + results = [ |
| 392 | + { |
| 393 | + "test_class_name": "CalculatorTest", |
| 394 | + "function_getting_tested": "add", |
| 395 | + "loop_index": 1, |
| 396 | + "iteration_id": "1_0", |
| 397 | + "return_value": '{"value": 42}', |
| 398 | + }, |
| 399 | + { |
| 400 | + "test_class_name": "CalculatorTest", |
| 401 | + "function_getting_tested": "add", |
| 402 | + "loop_index": 1, |
| 403 | + "iteration_id": "2_0", |
| 404 | + "return_value": '{"value": 100}', |
| 405 | + }, |
| 406 | + ] |
| 407 | + |
| 408 | + create_test_results_db(original_path, results) |
| 409 | + create_test_results_db(candidate_path, results) |
| 410 | + |
| 411 | + # Compare using Java Comparator |
| 412 | + equivalent, diffs = compare_test_results(original_path, candidate_path) |
| 413 | + |
| 414 | + assert equivalent is True |
| 415 | + assert len(diffs) == 0 |
| 416 | + |
| 417 | + def test_comparator_reads_test_results_table_different_values( |
| 418 | + self, tmp_path: Path, create_test_results_db |
| 419 | + ): |
| 420 | + """Test that Comparator detects different return values from test_results table.""" |
| 421 | + original_path = tmp_path / "original.db" |
| 422 | + candidate_path = tmp_path / "candidate.db" |
| 423 | + |
| 424 | + original_results = [ |
| 425 | + { |
| 426 | + "test_class_name": "StringUtilsTest", |
| 427 | + "function_getting_tested": "reverse", |
| 428 | + "loop_index": 1, |
| 429 | + "iteration_id": "1_0", |
| 430 | + "return_value": '"olleh"', |
| 431 | + }, |
| 432 | + ] |
| 433 | + |
| 434 | + candidate_results = [ |
| 435 | + { |
| 436 | + "test_class_name": "StringUtilsTest", |
| 437 | + "function_getting_tested": "reverse", |
| 438 | + "loop_index": 1, |
| 439 | + "iteration_id": "1_0", |
| 440 | + "return_value": '"wrong"', # Different result |
| 441 | + }, |
| 442 | + ] |
| 443 | + |
| 444 | + create_test_results_db(original_path, original_results) |
| 445 | + create_test_results_db(candidate_path, candidate_results) |
| 446 | + |
| 447 | + # Compare using Java Comparator |
| 448 | + equivalent, diffs = compare_test_results(original_path, candidate_path) |
| 449 | + |
| 450 | + assert equivalent is False |
| 451 | + assert len(diffs) == 1 |
| 452 | + assert diffs[0].scope == TestDiffScope.RETURN_VALUE |
| 453 | + |
| 454 | + def test_comparator_handles_multiple_loop_iterations( |
| 455 | + self, tmp_path: Path, create_test_results_db |
| 456 | + ): |
| 457 | + """Test that Comparator correctly handles multiple loop iterations.""" |
| 458 | + original_path = tmp_path / "original.db" |
| 459 | + candidate_path = tmp_path / "candidate.db" |
| 460 | + |
| 461 | + # Simulate multiple benchmark loops |
| 462 | + results = [] |
| 463 | + for loop in range(1, 4): # 3 loops |
| 464 | + for iteration in range(1, 3): # 2 iterations per loop |
| 465 | + results.append( |
| 466 | + { |
| 467 | + "test_class_name": "AlgorithmTest", |
| 468 | + "function_getting_tested": "fibonacci", |
| 469 | + "loop_index": loop, |
| 470 | + "iteration_id": f"{iteration}_0", |
| 471 | + "return_value": str(loop * iteration), |
| 472 | + } |
| 473 | + ) |
| 474 | + |
| 475 | + create_test_results_db(original_path, results) |
| 476 | + create_test_results_db(candidate_path, results) |
| 477 | + |
| 478 | + # Compare using Java Comparator |
| 479 | + equivalent, diffs = compare_test_results(original_path, candidate_path) |
| 480 | + |
| 481 | + assert equivalent is True |
| 482 | + assert len(diffs) == 0 |
| 483 | + |
| 484 | + def test_comparator_iteration_id_parsing( |
| 485 | + self, tmp_path: Path, create_test_results_db |
| 486 | + ): |
| 487 | + """Test that Comparator correctly parses iteration_id format 'iter_testIteration'.""" |
| 488 | + original_path = tmp_path / "original.db" |
| 489 | + candidate_path = tmp_path / "candidate.db" |
| 490 | + |
| 491 | + # Test various iteration_id formats |
| 492 | + results = [ |
| 493 | + { |
| 494 | + "loop_index": 1, |
| 495 | + "iteration_id": "1_0", # Standard format |
| 496 | + "return_value": '{"result": 1}', |
| 497 | + }, |
| 498 | + { |
| 499 | + "loop_index": 1, |
| 500 | + "iteration_id": "2_5", # With test iteration |
| 501 | + "return_value": '{"result": 2}', |
| 502 | + }, |
| 503 | + { |
| 504 | + "loop_index": 2, |
| 505 | + "iteration_id": "1_0", # Different loop |
| 506 | + "return_value": '{"result": 3}', |
| 507 | + }, |
| 508 | + ] |
| 509 | + |
| 510 | + create_test_results_db(original_path, results) |
| 511 | + create_test_results_db(candidate_path, results) |
| 512 | + |
| 513 | + # Compare using Java Comparator |
| 514 | + equivalent, diffs = compare_test_results(original_path, candidate_path) |
| 515 | + |
| 516 | + assert equivalent is True |
| 517 | + assert len(diffs) == 0 |
| 518 | + |
| 519 | + def test_comparator_missing_result_in_candidate( |
| 520 | + self, tmp_path: Path, create_test_results_db |
| 521 | + ): |
| 522 | + """Test that Comparator detects missing results in candidate.""" |
| 523 | + original_path = tmp_path / "original.db" |
| 524 | + candidate_path = tmp_path / "candidate.db" |
| 525 | + |
| 526 | + original_results = [ |
| 527 | + { |
| 528 | + "loop_index": 1, |
| 529 | + "iteration_id": "1_0", |
| 530 | + "return_value": '{"value": 1}', |
| 531 | + }, |
| 532 | + { |
| 533 | + "loop_index": 1, |
| 534 | + "iteration_id": "2_0", |
| 535 | + "return_value": '{"value": 2}', |
| 536 | + }, |
| 537 | + ] |
| 538 | + |
| 539 | + candidate_results = [ |
| 540 | + { |
| 541 | + "loop_index": 1, |
| 542 | + "iteration_id": "1_0", |
| 543 | + "return_value": '{"value": 1}', |
| 544 | + }, |
| 545 | + # Missing second iteration |
| 546 | + ] |
| 547 | + |
| 548 | + create_test_results_db(original_path, original_results) |
| 549 | + create_test_results_db(candidate_path, candidate_results) |
| 550 | + |
| 551 | + # Compare using Java Comparator |
| 552 | + equivalent, diffs = compare_test_results(original_path, candidate_path) |
| 553 | + |
| 554 | + assert equivalent is False |
| 555 | + assert len(diffs) >= 1 # Should detect missing invocation |
0 commit comments