compdemocracy
diff --git a/‎delphi/docs/CLJ-PARITY-FIXES-JOURNAL.md‎
Lines changed: 2 additions & 2 deletions b/‎delphi/docs/CLJ-PARITY-FIXES-JOURNAL.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎delphi/polismath/regression/datasets.py‎
Lines changed: 1 addition & 1 deletion b/‎delphi/polismath/regression/datasets.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎delphi/pyproject.toml‎
Lines changed: 6 additions & 2 deletions b/‎delphi/pyproject.toml‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎delphi/tests/conftest.py‎
Lines changed: 113 additions & 13 deletions b/‎delphi/tests/conftest.py‎
Lines changed: 113 additions & 13 deletions
diff --git a/‎delphi/tests/test_discrepancy_fixes.py‎
Lines changed: 8 additions & 45 deletions b/‎delphi/tests/test_discrepancy_fixes.py‎
Lines changed: 8 additions & 45 deletions
@@ -1,7 +1,7 @@
 # Journal: Fixing Python-Clojure Discrepancies
 
 This is the ongoing tracking document for the TDD fix process described in
-`CLJ-PARITY-FIXES-PLAN.md`. It serves as the single source of truth for
+`PLAN_DISCREPANCY_FIXES.md`. It serves as the single source of truth for
 our work, while commit messages and PR descriptions serve reviewers.
 
 ---
@@ -136,7 +136,7 @@ After rebase onto updated `origin/kmeans_analysis_docs`:
 - Updated PR title convention: `[Clj parity PR N]` prefix for reviewer clarity
 - Redacted private dataset names from git history across the full stack:
   - `SESSION_HANDOFF_KMEANS.md` in `kmeans_clustering_tooling` (amended deep commit via `GIT_SEQUENCE_EDITOR` rebase)
-  - `CLJ-PARITY-FIXES-PLAN.md` in `kmeans_analysis_docs` (amended tip)
+  - `PLAN_DISCREPANCY_FIXES.md` in `kmeans_analysis_docs` (amended tip)
   - `CLJ-PARITY-FIXES-JOURNAL.md` in `series-of-fixes` (amended tip)
   - Force-pushed all three branches, rebased the chain
 - Tests unchanged: 5 passed, 2 skipped, 18 xfailed, 5 xpassed
 
@@ -187,7 +187,7 @@ def find_file(pattern: str) -> str:
         math_blob_path = str(cold_start_blob)
     elif blob_type == 'incremental':
         if not original_blob.exists():
-            raise FileNotFoundError(f"No full blob for {name}")
+            raise FileNotFoundError(f"No incremental blob for {name}")
         math_blob_path = str(original_blob)
     elif prefer_cold_start and cold_start_blob.exists():
         math_blob_path = str(cold_start_blob)
 
@@ -124,8 +124,12 @@ include = [
 
 # Pytest configuration
 [tool.pytest.ini_options]
-# When using pytest-xdist (-n), group tests by xdist_group marker for efficient fixture sharing
-addopts = "--dist=loadgroup"
+# Force sequential execution (-n0) to leverage the session-scoped conversation cache.
+# The cache shares computed conversations across test files, but each xdist worker
+# has its own process with a separate cache. With only 2 datasets (biodiversity, vw),
+# sequential execution with caching is ~6x faster than parallel execution where each
+# worker recomputes the conversations independently.
+addopts = "-n0"
 filterwarnings = [
     # Ignore python_multipart deprecation warning from ddtrace (third-party)
     "ignore:Please use `import python_multipart`:PendingDeprecationWarning:ddtrace.internal.module",
 
@@ -5,44 +5,92 @@
 - Command line options --include-local and --datasets for dataset selection
 - Fixtures for accessing dataset information
 - @pytest.mark.use_discovered_datasets for dynamic dataset parametrization
-- Helper functions for parallel test execution with xdist_group markers
+- Session-scoped conversation cache for efficient test execution
 """
 
+from copy import deepcopy
+
 import pytest
+
+from polismath.conversation.conversation import Conversation
+from polismath.regression import get_dataset_files
 from polismath.regression.datasets import (
     discover_datasets,
     list_regression_datasets,
     get_blob_variants,
 )
+from tests.common_utils import load_votes, load_comments
 
 
 # =============================================================================
-# Parallel Execution Helpers
+# Session-scoped Conversation Cache
 # =============================================================================
 
-def make_dataset_params(datasets: list[str]) -> list:
+_SESSION_CONV_CACHE: dict = {}
+
+
+@pytest.fixture(scope="session")
+def get_or_compute_conversation():
+    """Session-wide conversation cache shared across all test files.
+
+    Returns a function that computes a Conversation once per dataset and
+    returns a deepcopy each time to preserve test isolation.
+
+    Only ONE dataset is kept in memory at a time. When a different dataset
+    is requested, the previous one is evicted. This works because tests are
+    reordered by pytest_collection_modifyitems to group all tests for a
+    dataset together (across all test files).
     """
-    Create pytest.param objects with xdist_group markers for parallel execution.
+    import gc
 
-    When using pytest-xdist with --dist=loadgroup, tests with the same
-    xdist_group marker will run on the same worker. This ensures fixtures
-    are computed only once per dataset per worker.
+    def _get(dataset_name: str) -> dict:
+        if dataset_name not in _SESSION_CONV_CACHE:
+            # Evict previous dataset (we only keep one at a time)
+            for ds in list(_SESSION_CONV_CACHE.keys()):
+                _SESSION_CONV_CACHE.pop(ds, None)
+            Conversation._reset_conversion_cache()
+            gc.collect()
+
+            files = get_dataset_files(dataset_name, blob_type='incremental')
+            votes = load_votes(files['votes'])
+            comments = load_comments(files['comments'])
+
+            conv = Conversation(dataset_name)
+            conv = conv.update_votes(votes)
+            conv = conv.recompute()
+
+            _SESSION_CONV_CACHE[dataset_name] = {
+                'conv': conv,
+                'dataset_name': dataset_name,
+                'files': files,
+                'comments': comments,
+            }
+
+        return deepcopy(_SESSION_CONV_CACHE[dataset_name])
+
+    return _get
+
+
+# =============================================================================
+# Dataset Parametrization Helpers
+# =============================================================================
+
+def make_dataset_params(datasets: list[str]) -> list:
+    """
+    Create pytest.param objects for dataset parametrization.
 
     Args:
         datasets: List of dataset names (or "dataset-blob_type" composite IDs)
 
     Returns:
-        List of pytest.param objects with xdist_group markers
+        List of pytest.param objects
 
     Example:
         @pytest.mark.parametrize("dataset_name", make_dataset_params(["biodiversity", "vw"]))
         def test_something(dataset_name):
             ...
     """
-    return [
-        pytest.param(ds, marks=pytest.mark.xdist_group(ds))
-        for ds in datasets
-    ]
+    return [pytest.param(ds) for ds in datasets]
 
 
 def parse_dataset_blob_id(composite_id: str) -> tuple[str, str]:
@@ -137,7 +185,7 @@ def pytest_generate_tests(metafunc):
     With use_blobs=True, parametrize with 'dataset-blob_type' composite IDs
     (e.g., 'biodiversity-incremental', 'engage-cold_start') for each filled blob variant.
 
-    Uses xdist_group markers for efficient parallel execution with pytest-xdist.
+    Uses the session-scoped conversation cache for efficient test execution.
     """
     markers = list(metafunc.definition.iter_markers("use_discovered_datasets"))
     if not markers:
@@ -169,6 +217,58 @@ def pytest_generate_tests(metafunc):
         metafunc.parametrize("dataset_name", make_dataset_params(datasets))
 
 
+# =============================================================================
+# Test Reordering for Cache Efficiency
+# =============================================================================
+
+def _extract_dataset_from_test(item) -> str:
+    """Extract the dataset name from a test item's parameters.
+
+    Handles both plain dataset names ('biodiversity') and composite IDs
+    ('biodiversity-incremental'). Returns empty string if no dataset parameter.
+    """
+    # Check callspec for parametrized values
+    if hasattr(item, 'callspec') and item.callspec.params:
+        for param_name in ('dataset_name', 'dataset_blob_id'):
+            if param_name in item.callspec.params:
+                value = item.callspec.params[param_name]
+                # Extract base dataset name from composite IDs
+                if value.endswith('-incremental'):
+                    return value[:-len('-incremental')]
+                elif value.endswith('-cold_start'):
+                    return value[:-len('-cold_start')]
+                return value
+    return ''
+
+
+def pytest_collection_modifyitems(session, config, items):
+    """Reorder tests to group by dataset for cache efficiency.
+
+    Groups all tests for a dataset together (across all test files) so that
+    the session-scoped conversation cache only needs to hold ONE dataset at
+    a time. This reduces peak memory from O(N datasets) to O(1 dataset).
+
+    Order: dataset1[file1, file2, ...], dataset2[file1, file2, ...], ...
+    Within each dataset, original test order is preserved.
+    """
+    # Separate tests into dataset-parametrized and non-parametrized
+    dataset_tests = []
+    other_tests = []
+
+    for item in items:
+        ds = _extract_dataset_from_test(item)
+        if ds:
+            dataset_tests.append((ds, item))
+        else:
+            other_tests.append(item)
+
+    # Sort dataset tests by dataset name (stable sort preserves order within dataset)
+    dataset_tests.sort(key=lambda x: x[0])
+
+    # Rebuild items list: non-parametrized first, then dataset tests grouped
+    items[:] = other_tests + [item for _, item in dataset_tests]
+
+
 # Provide summary of discovered datasets at start of test run
 def pytest_report_header(config):
     """Add dataset discovery info to pytest header."""
 
@@ -2,7 +2,7 @@
 Per-discrepancy tests for Python-Clojure parity fixes.
 
 Each test class targets ONE specific discrepancy from the fix plan
-(delphi/docs/CLJ-PARITY-FIXES-PLAN.md). Tests are designed to FAIL before
+(delphi/docs/PLAN_DISCREPANCY_FIXES.md). Tests are designed to FAIL before
 the fix is applied and PASS after. They are parametrized by ALL available
 datasets with Clojure reference blobs.
 
@@ -26,7 +26,6 @@
     D14    - Large conv optimization (deferred)
 """
 
-import json
 import math
 
 import numpy as np
@@ -44,13 +43,9 @@
     finalize_cmt_stats,
 )
 from polismath.regression import get_dataset_files, get_blob_variants
-from polismath.regression.clojure_comparer import (
-    ClojureComparer,
-    unfold_clojure_group_clusters,
-)
 from polismath.regression.datasets import discover_datasets
 from conftest import _get_requested_datasets, make_dataset_params, parse_dataset_blob_id
-from tests.common_utils import load_votes, load_comments, load_clojure_output
+from tests.common_utils import load_clojure_output
 
 
 # ---------------------------------------------------------------------------
@@ -85,55 +80,23 @@ def pytest_generate_tests(metafunc):
 # Shared fixtures
 # ---------------------------------------------------------------------------
 
-# Module-level caches — Conversation is keyed by dataset name (shared across
-# blob variants), blobs are keyed by composite ID.
-_CONV_CACHE: dict = {}
+# Module-level cache for blobs (keyed by composite ID)
 _BLOB_CACHE: dict = {}
 
 
-def _get_or_compute_conversation(dataset_name: str) -> dict:
-    """Compute (or retrieve cached) conversation for a dataset."""
-    import gc
-    if dataset_name in _CONV_CACHE:
-        return _CONV_CACHE[dataset_name]
-
-    # Evict other datasets
-    for ds in list(_CONV_CACHE.keys()):
-        if ds != dataset_name:
-            _CONV_CACHE.pop(ds, None)
-            Conversation._reset_conversion_cache()
-            gc.collect()
-
-    files = get_dataset_files(dataset_name, blob_type='incremental')
-    votes = load_votes(files['votes'])
-    comments = load_comments(files['comments'])
-
-    conv = Conversation(dataset_name)
-    conv = conv.update_votes(votes)
-    conv = conv.recompute()
-
-    data = {
-        'conv': conv,
-        'dataset_name': dataset_name,
-        'files': files,
-        'comments': comments,
-    }
-    _CONV_CACHE[dataset_name] = data
-    return data
-
-
 @pytest.fixture(scope="class")
-def conversation_data(dataset_name):
+def conversation_data(dataset_name, get_or_compute_conversation):
     """Class-scoped fixture: runs the full pipeline once per dataset+blob_type.
 
     dataset_name here is actually a composite 'dataset-blob_type' ID
-    (e.g., 'biodiversity-full'). The Conversation is shared across blob variants.
+    (e.g., 'biodiversity-full'). The Conversation is shared across blob variants
+    via the session-scoped get_or_compute_conversation fixture.
     """
     global _BLOB_CACHE
     ds_name, blob_type = parse_dataset_blob_id(dataset_name)
 
-    # Get or compute the conversation (shared across blob variants)
-    conv_data = _get_or_compute_conversation(ds_name)
+    # Get or compute the conversation (shared across blob variants via session cache)
+    conv_data = get_or_compute_conversation(ds_name)
 
     # Load the specific blob variant (cache per composite ID)
     if dataset_name not in _BLOB_CACHE: