add narrative pipelline test (#2307)

tevko · web-flow · commit 3019c78a1665 · 2025-12-03T09:10:52.000-06:00
* add narrative pipelline test

* change filename

* slight mocking adjustment

* mock sentence transformer

* better evoc

* try massaging mock data again

* more mocking

* diff mock strategy

* fix cov report

* test 500 gen embed

* syntax fixes

* update sytax again

* syntax fix again

* attempt mock fix

* another mock attempt

* fix action

* fix action again

* actions fix

* add another test

* add another test

* fix test
diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml
@@ -74,6 +74,9 @@ jobs:
         docker compose -f docker-compose.test.yml cp delphi/real_data delphi:/app/real_data
         echo "Copying coverage script into container..."
         docker compose -f docker-compose.test.yml cp delphi/generate_coverage_md.py delphi:/app/generate_coverage_md.py
+        echo "Copying script to be tested into container..."
+        docker compose -f docker-compose.test.yml cp delphi/polismath/run_math_pipeline.py delphi:/app/run_math_pipeline.py
+        docker compose -f docker-compose.test.yml cp delphi/umap_narrative delphi:/app/umap_narrative
 
         echo "Running tests and generating coverage report..."
         docker compose \
@@ -94,7 +97,7 @@ jobs:
             python create_dynamodb_tables.py --region us-east-1; \
             echo '--- Running Pytest ---'; \
             export PYTHONPATH=\$PYTHONPATH:/app; \
-            pytest --cov=polismath --cov-report=xml:/app/coverage.xml /app/tests --ignore=/app/tests/test_pakistan_conversation.py
+            pytest --cov=polismath --cov=run_math_pipeline --cov=./umap_narrative --cov-report=xml:/app/coverage.xml /app/tests --ignore=/app/tests/test_pakistan_conversation.py
             echo '--- Generating Coverage Comment Text ---'; \
             python /app/generate_coverage_md.py > /app/coverage-comment.md \
           "
diff --git a/delphi/tests/test_500_generate_embedding.py b/delphi/tests/test_500_generate_embedding.py
@@ -0,0 +1,87 @@
+import os
+import sys
+from unittest import mock
+import pytest
+import numpy as np
+import importlib
+
+# Add the 'umap_narrative' directory to the Python path to allow the target script to be imported.
+umap_narrative_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'umap_narrative'))
+if umap_narrative_dir not in sys.path:
+    sys.path.insert(0, umap_narrative_dir)
+
+@pytest.fixture(autouse=True)
+def setup_and_teardown(tmp_path, monkeypatch):
+    """Fixture to set up a clean environment for each test."""
+    monkeypatch.setenv("ANTHROPIC_API_KEY", "mock_key_for_testing")
+    cwd = os.getcwd()
+    os.chdir(tmp_path)
+    yield
+    os.chdir(cwd)
+
+def test_pipeline_flow_with_mocks(tmp_path):
+    """
+    Tests the control flow of the script's mock data path.
+    
+    This test verifies that when using `--use-mock-data`, the script calls the
+    correct subset of data processing and storage functions.
+    """
+    zid = "98765"
+    test_args = [
+        "500_generate_embedding_umap_cluster.py",
+        "--use-mock-data",
+        "--zid", zid,
+    ]
+
+    num_comments = 100
+    num_layers = 3
+
+    # Define a valid, predictable return value for the ML processing step.
+    mock_process_comments_return_value = (
+        np.random.rand(num_comments, 2),  # document_map
+        np.random.rand(num_comments, 32), # document_vectors
+        [np.random.randint(0, 5, num_comments) for _ in range(num_layers)], # cluster_layers
+        [f"comment text {i}" for i in range(num_comments)], # comment_texts
+        [str(i) for i in range(num_comments)] # comment_ids
+    )
+
+    # Import the module to be tested programmatically.
+    generate_embedding_module = importlib.import_module("500_generate_embedding_umap_cluster")
+
+    # Patch the external dependencies.
+    with mock.patch.object(generate_embedding_module, 'process_comments', return_value=mock_process_comments_return_value) as mock_process_comments, \
+         mock.patch.object(generate_embedding_module, 'DataConverter') as MockDataConverter, \
+         mock.patch.object(generate_embedding_module, 'DynamoDBStorage') as MockDynamoStorage:
+        
+        # Configure mocks to return simple, non-empty data.
+        MockDataConverter.create_conversation_meta.return_value = "mock_meta_model"
+        MockDataConverter.batch_convert_cluster_characteristics.return_value = ["mock_char_model"]
+        
+        mock_dynamo_instance = mock.MagicMock()
+        MockDynamoStorage.return_value = mock_dynamo_instance
+
+        # Run the main function from the script.
+        with mock.patch.object(sys, 'argv', test_args):
+            try:
+                generate_embedding_module.main()
+            except SystemExit as e:
+                pytest.fail(f"Script exited unexpectedly: {e}")
+
+    # Assert that the mocked functions were called as expected for the mock data path.
+    mock_process_comments.assert_called_once()
+    MockDynamoStorage.assert_called_once()
+    
+    # Assert that the DataConverter was used for the methods called in the mock path.
+    MockDataConverter.create_conversation_meta.assert_called_once()
+    assert MockDataConverter.batch_convert_cluster_characteristics.call_count == num_layers
+
+    # Assert that the correct subset of DynamoDB methods were called.
+    mock_dynamo_instance.create_conversation_meta.assert_called_with("mock_meta_model")
+    assert mock_dynamo_instance.batch_create_cluster_characteristics.call_count == num_layers
+    mock_dynamo_instance.batch_create_cluster_characteristics.assert_called_with(["mock_char_model"])
+
+    # Assert that methods NOT in the mock data path were NOT called.
+    mock_dynamo_instance.batch_create_comment_embeddings.assert_not_called()
+    mock_dynamo_instance.batch_create_graph_edges.assert_not_called()
+    mock_dynamo_instance.batch_create_comment_clusters.assert_not_called()
+    mock_dynamo_instance.batch_create_topics.assert_not_called()
diff --git a/delphi/tests/test_501_calculate_comment_extremity.py b/delphi/tests/test_501_calculate_comment_extremity.py
@@ -0,0 +1,92 @@
+import sys
+import os
+from unittest import mock
+import pytest
+import importlib
+
+# Add the 'umap_narrative' directory to the Python path to allow the script to be imported.
+umap_narrative_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'umap_narrative'))
+if umap_narrative_dir not in sys.path:
+    sys.path.insert(0, umap_narrative_dir)
+
+# Import the module and function to be tested using importlib
+extremity_module = importlib.import_module("501_calculate_comment_extremity")
+calculate_and_store_extremity = extremity_module.calculate_and_store_extremity
+
+def test_calculate_and_store_extremity_with_mocks():
+    """
+    Tests the main logic of calculate_and_store_extremity by mocking its dependencies.
+    - Mocks GroupDataProcessor to avoid database calls.
+    - Mocks check_existing_extremity_values to force recalculation.
+    - Verifies that the function correctly processes the mock output.
+    """
+    conversation_id = 12345
+
+    # 1. Define a mock return value for the GroupDataProcessor
+    mock_export_data = {
+        'comments': [
+            {'comment_id': 101, 'comment_extremity': 0.85},
+            {'comment_id': 102, 'comment_extremity': 0.25},
+            {'comment_id': 103, 'comment_extremity': 0.50},
+            # A comment that might be missing the extremity value
+            {'comment_id': 104}, 
+        ]
+    }
+
+    # 2. Patch the dependencies within the script's namespace
+    with mock.patch.object(extremity_module, 'GroupDataProcessor') as MockGroupDataProcessor, \
+         mock.patch.object(extremity_module, 'check_existing_extremity_values', return_value={}) as mock_check_existing:
+
+        # Configure the mock instance of GroupDataProcessor
+        mock_processor_instance = mock.MagicMock()
+        mock_processor_instance.get_export_data.return_value = mock_export_data
+        MockGroupDataProcessor.return_value = mock_processor_instance
+
+        # 3. Call the actual function to be tested
+        result = calculate_and_store_extremity(conversation_id, force_recalculation=True)
+
+    # 4. Assert the results
+    # Assert that the function correctly extracted the extremity values from the mock data
+    expected_result = {
+        101: 0.85,
+        102: 0.25,
+        103: 0.50,
+        104: 0, # Should default to 0 if key is missing
+    }
+    assert result == expected_result, "The returned extremity values do not match the expected output."
+
+    # Assert that the dependencies were called as expected
+    mock_check_existing.assert_not_called() # Should not be called when force_recalculation is True
+    MockGroupDataProcessor.assert_called_once()
+    mock_processor_instance.get_export_data.assert_called_once_with(conversation_id, False)
+
+def test_check_for_existing_values(monkeypatch):
+    """
+    Tests that the main function returns existing values and skips recalculation
+    if they are found and `force` is False.
+    """
+    conversation_id = 54321
+    existing_values = {201: 0.9, 202: 0.1}
+
+    # Patch the check function and the GroupDataProcessor class
+    with mock.patch.object(extremity_module, 'check_existing_extremity_values', return_value=existing_values) as mock_check_existing, \
+         mock.patch.object(extremity_module, 'GroupDataProcessor') as MockGroupDataProcessor:
+        
+        # Configure the mock instance that the class will produce upon instantiation
+        mock_processor_instance = mock.MagicMock()
+        MockGroupDataProcessor.return_value = mock_processor_instance
+        
+        # Call the function with force_recalculation=False
+        result = calculate_and_store_extremity(conversation_id, force_recalculation=False)
+
+    # Assert that the function correctly returned the pre-existing values
+    assert result == existing_values
+
+    # Assert that the check for existing values was performed
+    mock_check_existing.assert_called_once_with(conversation_id)
+    
+    # Assert that GroupDataProcessor was instantiated (due to the script's structure)
+    MockGroupDataProcessor.assert_called_once()
+    
+    # Crucially, assert that the expensive calculation method was NOT called on the instance
+    mock_processor_instance.get_export_data.assert_not_called()
diff --git a/delphi/tests/test_reset_conversation.py b/delphi/tests/test_reset_conversation.py
@@ -0,0 +1,105 @@
+import sys
+import os
+from unittest import mock
+import pytest
+
+# Add the 'umap_narrative' directory to the Python path to allow the script to be imported
+umap_narrative_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'umap_narrative'))
+if umap_narrative_dir not in sys.path:
+    sys.path.insert(0, umap_narrative_dir)
+
+# Now we can import the main function from the script to be tested
+from reset_conversation import main as reset_conversation_main
+
+@pytest.fixture
+def mock_boto3_resources():
+    """Mocks the get_boto_resource function to return mock objects for S3 and DynamoDB."""
+    
+    # --- Create Mock for S3 ---
+    mock_s3_resource = mock.MagicMock()
+    mock_bucket = mock.MagicMock()
+    mock_s3_object = mock.MagicMock()
+    mock_s3_object.key = 'visualizations/test-rid/some_file.html'
+    # Configure the mock bucket's objects.filter to return a list containing our mock object
+    mock_bucket.objects.filter.return_value = [mock_s3_object]
+    # The Bucket() method of the S3 resource will return our mock bucket
+    mock_s3_resource.Bucket.return_value = mock_bucket
+    
+    # --- Create Mock for DynamoDB ---
+    mock_dynamodb_resource = mock.MagicMock()
+    mock_table = mock.MagicMock()
+    # Configure the query/scan methods to return one fake item to trigger the deletion logic
+    mock_table.query.return_value = {'Items': [{'pk': 'some_key', 'sk': 'some_sort_key'}]}
+    mock_table.scan.return_value = {'Items': [{'pk': 'some_key', 'sk': 'some_sort_key'}]}
+    # The Table() method of the DynamoDB resource will return our mock table
+    mock_dynamodb_resource.Table.return_value = mock_table
+    
+    # --- Create the main mock that replaces get_boto_resource ---
+    with mock.patch('reset_conversation.get_boto_resource') as mock_get_resource:
+        # Define a side effect to return the correct mock based on service name
+        def get_resource_side_effect(service_name):
+            if service_name == 'dynamodb':
+                return mock_dynamodb_resource
+            if service_name == 's3':
+                return mock_s3_resource
+            return mock.MagicMock()
+
+        mock_get_resource.side_effect = get_resource_side_effect
+        
+        # Yield the mocks to the test function
+        yield {
+            "get_resource": mock_get_resource,
+            "dynamodb": mock_dynamodb_resource,
+            "s3": mock_s3_resource,
+            "table": mock_table,
+            "bucket": mock_bucket
+        }
+
+def test_reset_conversation_calls_all_services(mock_boto3_resources):
+    """
+    Tests that the main reset script calls both DynamoDB and S3 deletion logic.
+    """
+    test_zid = "12345"
+    test_rid = "r_test_12345"
+    
+    # Run the main function with test arguments
+    reset_conversation_main(zid=test_zid, rid=test_rid)
+
+    # 1. Assert that our main mock was called for both services
+    mock_boto3_resources["get_resource"].assert_any_call("dynamodb")
+    mock_boto3_resources["get_resource"].assert_any_call("s3")
+
+    # 2. Assert that the script tried to get a DynamoDB table
+    # It will be called many times, so just check it was called at all
+    mock_boto3_resources["dynamodb"].Table.assert_called()
+    
+    # 3. Assert that a deletion was attempted on a table
+    # This confirms that the query/scan + delete loop was entered
+    mock_table = mock_boto3_resources["table"]
+    # Check that batch_writer (for query results) or delete_item (for single items) was called
+    assert mock_table.batch_writer.called or mock_table.delete_item.called
+
+    # 4. Assert that the script tried to access the S3 bucket
+    mock_boto3_resources["s3"].Bucket.assert_called_with(mock.ANY) # bucket name is from env
+    
+    # 5. Assert that the script attempted to delete S3 objects
+    mock_bucket = mock_boto3_resources["bucket"]
+    mock_bucket.delete_objects.assert_called_once()
+
+def test_reset_conversation_skips_s3_if_no_rid(mock_boto3_resources):
+    """
+    Tests that S3 deletion is skipped if no report_id (rid) is provided.
+    """
+    test_zid = "54321"
+    
+    # Run the main function without the 'rid' argument
+    reset_conversation_main(zid=test_zid, rid=None)
+    
+    # Assert that the DynamoDB deletion logic was still called
+    mock_boto3_resources["get_resource"].assert_any_call("dynamodb")
+    mock_boto3_resources["dynamodb"].Table.assert_called()
+
+    # Assert that the S3 logic was SKIPPED
+    mock_bucket = mock_boto3_resources["bucket"]
+    mock_bucket.delete_objects.assert_not_called()
+
diff --git a/delphi/tests/test_umap_narrative_pipeline.py b/delphi/tests/test_umap_narrative_pipeline.py
@@ -0,0 +1,88 @@
+import os
+import sys
+from unittest import mock
+import pytest
+import numpy as np
+
+# Add the 'umap_narrative' directory to the Python path to import 'run_pipeline'
+umap_narrative_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'umap_narrative'))
+if umap_narrative_dir not in sys.path:
+    sys.path.insert(0, umap_narrative_dir)
+
+# Now we can import the main function from the script we want to test
+from run_pipeline import main as run_pipeline_main
+
+@pytest.fixture(autouse=True)
+def setup_and_teardown(tmp_path, monkeypatch):
+    """
+    This fixture will be used by all tests in this module.
+    - It changes the current working directory to a temporary directory.
+    - It restores the original working directory after the test.
+    - It mocks the ANTHROPIC_API_KEY to avoid warnings.
+    """
+    monkeypatch.setenv("ANTHROPIC_API_KEY", "mock_key_for_testing")
+    cwd = os.getcwd()
+    os.chdir(tmp_path)
+    yield
+    os.chdir(cwd)
+
+def test_pipeline_calls_correct_functions(tmp_path):
+    """
+    Tests the pipeline's control flow by mocking major functions and asserting
+    that they are called correctly, instead of asserting on file creation.
+    This avoids failures related to external library rendering issues.
+    """
+    zid = "12345"
+    test_args = [
+        "run_pipeline.py",
+        "--use-mock-data",
+        "--zid", zid,
+        "--no-dynamo",
+    ]
+
+    num_comments = 100
+    mock_called = False
+
+    def process_comments_side_effect(*args, **kwargs):
+        nonlocal mock_called
+        mock_called = True
+        return (
+            np.random.rand(num_comments, 2),
+            np.random.rand(num_comments, 32),
+            [np.random.randint(0, 5, num_comments) for _ in range(3)],
+            [f"comment text {i}" for i in range(num_comments)],
+            [i for i in range(num_comments)]
+        )
+
+    # Patch all major functions to test the control flow
+    with mock.patch('run_pipeline.process_comments', side_effect=process_comments_side_effect), \
+         mock.patch('run_pipeline.create_basic_layer_visualization') as mock_create_basic, \
+         mock.patch('run_pipeline.create_named_layer_visualization') as mock_create_named, \
+         mock.patch('run_pipeline.create_enhanced_multilayer_index') as mock_create_index:
+        
+        # Ensure the mocked visualization function returns a mock file path
+        mock_create_named.return_value = "mock/path/to/file.html"
+
+        with mock.patch.object(sys, 'argv', test_args):
+            try:
+                run_pipeline_main()
+            except SystemExit as e:
+                pytest.fail(f"run_pipeline.py exited unexpectedly: {e}")
+
+    # 1. Assert that our primary mock was called, confirming the setup is correct.
+    assert mock_called, "The mock for run_pipeline.process_comments was not called."
+
+    # 2. Assert that the visualization functions were called for each of the 3 mock layers.
+    assert mock_create_basic.call_count == 3, f"Expected basic visualization to be called 3 times, but was called {mock_create_basic.call_count} times."
+    assert mock_create_named.call_count == 3, f"Expected named visualization to be called 3 times, but was called {mock_create_named.call_count} times."
+
+    # 3. Assert that the final index file creation was attempted.
+    assert mock_create_index.call_count == 1, f"Expected index creation to be called once, but was called {mock_create_index.call_count} times."
+
+    # 4. Assert that the index function was called with the correct `zid` due to the known bug.
+    #    This confirms we are testing the actual behavior of the script.
+    mock_create_index.assert_called_once()
+    call_args, _ = mock_create_index.call_args
+    # The call is create_enhanced_multilayer_index(output_dir, conversation_name, layer_files, layer_info)
+    # We check the second argument, which should be the `conversation_id` (zid) because of the bug.
+    assert call_args[1] == zid, f"Expected conversation_id '{zid}' to be passed to index creation, but got '{call_args[1]}'"