Skip to content

Commit 1e96cb6

Browse files
jucorclaude
andcommitted
Test both incremental and cold-start Clojure blobs
Clojure comparison tests now run against both blob variants when available: - incremental: result of progressive refinement as votes trickled in - cold_start: computed from scratch in one pass on full dataset Each dataset generates separate test IDs (e.g., biodiversity-incremental, biodiversity-cold_start). Only blobs with meaningful content (PCA data or non-empty clusters) are included. Key changes: - Add get_blob_variants() to discover filled blob variants per dataset - Add _is_blob_filled() to check if a blob has meaningful content - Extend get_dataset_files() with explicit blob_type parameter - Add use_blobs=True option to @pytest.mark.use_discovered_datasets - Add parse_dataset_blob_id() helper for composite ID parsing - Update test_legacy_clojure_regression, test_discrepancy_fixes, and test_legacy_repness_comparison to parametrize by blob variant - Conversation computation is shared across blob variants of same dataset Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 03cb573 commit 1e96cb6

6 files changed

Lines changed: 244 additions & 106 deletions

File tree

delphi/polismath/regression/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
get_dataset_info,
1616
get_dataset_files,
1717
get_dataset_report_id,
18+
get_blob_variants,
1819
)
1920
__all__ = [
2021
'ConversationRecorder',
@@ -26,4 +27,5 @@
2627
'get_dataset_info',
2728
'get_dataset_files',
2829
'get_dataset_report_id',
30+
'get_blob_variants',
2931
]

delphi/polismath/regression/datasets.py

Lines changed: 53 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -156,12 +156,15 @@ def get_dataset_report_id(name: str) -> str:
156156
return get_dataset_info(name).report_id
157157

158158

159-
def get_dataset_files(name: str, prefer_cold_start: bool = True) -> Dict[str, str]:
159+
def get_dataset_files(name: str, prefer_cold_start: bool = True, blob_type: Optional[str] = None) -> Dict[str, str]:
160160
"""Get file paths for a dataset.
161161
162162
Args:
163163
name: Dataset name
164-
prefer_cold_start: If True (default), use cold-start blob when available
164+
prefer_cold_start: If True (default), use cold-start blob when available.
165+
Ignored if blob_type is specified.
166+
blob_type: Explicit blob type to use: 'incremental' or 'cold_start'.
167+
If specified, overrides prefer_cold_start.
165168
"""
166169
info = get_dataset_info(name)
167170
rid = info.report_id
@@ -174,11 +177,19 @@ def find_file(pattern: str) -> str:
174177
raise ValueError(f"Multiple files matching {pattern} in {info.path}: {matches}")
175178
return str(matches[0].resolve())
176179

177-
# Check for cold-start blob first, fall back to original
180+
# Determine which blob to use
178181
cold_start_blob = info.path / f"{rid}_math_blob_cold_start.json"
179182
original_blob = info.path / f"{rid}_math_blob.json"
180183

181-
if prefer_cold_start and cold_start_blob.exists():
184+
if blob_type == 'cold_start':
185+
if not cold_start_blob.exists():
186+
raise FileNotFoundError(f"No cold-start blob for {name}")
187+
math_blob_path = str(cold_start_blob)
188+
elif blob_type == 'incremental':
189+
if not original_blob.exists():
190+
raise FileNotFoundError(f"No full blob for {name}")
191+
math_blob_path = str(original_blob)
192+
elif prefer_cold_start and cold_start_blob.exists():
182193
math_blob_path = str(cold_start_blob)
183194
else:
184195
math_blob_path = str(original_blob)
@@ -193,6 +204,44 @@ def find_file(pattern: str) -> str:
193204
}
194205

195206

207+
def _is_blob_filled(blob_path: Path) -> bool:
208+
"""Check if a math blob has meaningful content (PCA, non-empty clusters, etc.)."""
209+
import json
210+
if not blob_path.exists():
211+
return False
212+
try:
213+
with open(blob_path) as f:
214+
data = json.load(f)
215+
# A blob is "filled" if it has PCA data or non-empty base-clusters
216+
has_pca = 'pca' in data and 'comps' in data.get('pca', {})
217+
bc = data.get('base-clusters', {})
218+
has_clusters = isinstance(bc, dict) and len(bc.get('id', [])) > 0
219+
return has_pca or has_clusters
220+
except (json.JSONDecodeError, OSError):
221+
return False
222+
223+
224+
def get_blob_variants(name: str) -> List[str]:
225+
"""Get available filled blob variants for a dataset.
226+
227+
Returns a list of blob_type strings ('incremental', 'cold_start') for blobs
228+
that exist and contain meaningful data.
229+
"""
230+
info = get_dataset_info(name)
231+
rid = info.report_id
232+
variants = []
233+
234+
full_blob = info.path / f"{rid}_math_blob.json"
235+
if _is_blob_filled(full_blob):
236+
variants.append('incremental')
237+
238+
cold_start_blob = info.path / f"{rid}_math_blob_cold_start.json"
239+
if _is_blob_filled(cold_start_blob):
240+
variants.append('cold_start')
241+
242+
return variants
243+
244+
196245
# Legacy aliases
197246
def get_dataset_directory(report_id: str, dataset_name: Optional[str] = None) -> Path:
198247
"""Find dataset directory by report_id."""

delphi/tests/conftest.py

Lines changed: 50 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from polismath.regression.datasets import (
1313
discover_datasets,
1414
list_regression_datasets,
15+
get_blob_variants,
1516
)
1617

1718

@@ -28,7 +29,7 @@ def make_dataset_params(datasets: list[str]) -> list:
2829
are computed only once per dataset per worker.
2930
3031
Args:
31-
datasets: List of dataset names
32+
datasets: List of dataset names (or "dataset-blob_type" composite IDs)
3233
3334
Returns:
3435
List of pytest.param objects with xdist_group markers
@@ -44,6 +45,24 @@ def test_something(dataset_name):
4445
]
4546

4647

48+
def parse_dataset_blob_id(composite_id: str) -> tuple[str, str]:
49+
"""Parse a 'dataset-blob_type' composite ID into (dataset_name, blob_type).
50+
51+
Examples:
52+
'biodiversity-incremental' -> ('biodiversity', 'incremental')
53+
'bg2050-cold_start' -> ('bg2050', 'cold_start')
54+
"""
55+
if composite_id.endswith('-cold_start'):
56+
return composite_id[:-len('-cold_start')], 'cold_start'
57+
elif composite_id.endswith('-incremental'):
58+
return composite_id[:-len('-incremental')], 'incremental'
59+
else:
60+
raise ValueError(
61+
f"Invalid composite dataset ID: {composite_id}. "
62+
f"Expected format: 'dataset-incremental' or 'dataset-cold_start'"
63+
)
64+
65+
4766
def pytest_addoption(parser):
4867
"""Add custom command line options to pytest."""
4968
parser.addoption(
@@ -83,8 +102,10 @@ def pytest_configure(config):
83102
"""Register custom markers."""
84103
config.addinivalue_line(
85104
"markers",
86-
"use_discovered_datasets: dynamically parametrize with discovered "
87-
"datasets, respecting --include-local and --datasets CLI options"
105+
"use_discovered_datasets(use_blobs=False): dynamically parametrize with discovered "
106+
"datasets, respecting --include-local and --datasets CLI options. "
107+
"With use_blobs=True, parametrize with 'dataset-blob_type' composite IDs "
108+
"(e.g., 'biodiversity-incremental', 'engage-cold_start') for each filled blob variant."
88109
)
89110

90111

@@ -113,19 +134,39 @@ def pytest_generate_tests(metafunc):
113134
These tests must declare a 'dataset_name' parameter. They will be parametrized
114135
with all regression datasets, filtered by --include-local and --datasets.
115136
137+
With use_blobs=True, parametrize with 'dataset-blob_type' composite IDs
138+
(e.g., 'biodiversity-incremental', 'engage-cold_start') for each filled blob variant.
139+
116140
Uses xdist_group markers for efficient parallel execution with pytest-xdist.
117141
"""
118-
if not list(metafunc.definition.iter_markers("use_discovered_datasets")):
142+
markers = list(metafunc.definition.iter_markers("use_discovered_datasets"))
143+
if not markers:
119144
return
120145

121146
include_local = metafunc.config.getoption("--include-local")
122147
requested = _get_requested_datasets(metafunc.config)
123148

124-
datasets = list_regression_datasets(include_local=include_local)
125-
if requested:
126-
datasets = [d for d in datasets if d in requested]
127-
128-
metafunc.parametrize("dataset_name", make_dataset_params(datasets))
149+
# Check if use_blobs=True was passed to the marker
150+
use_blobs = any(m.kwargs.get('use_blobs', False) for m in markers)
151+
152+
if use_blobs:
153+
# Parametrize with composite 'dataset-blob_type' IDs
154+
datasets = discover_datasets(include_local=include_local)
155+
blob_ids = []
156+
for name, info in datasets.items():
157+
if not (info.has_votes and info.has_comments and info.has_clojure_reference):
158+
continue
159+
if requested and name not in requested:
160+
continue
161+
for blob_type in get_blob_variants(name):
162+
blob_ids.append(f"{name}-{blob_type}")
163+
metafunc.parametrize("dataset_name", make_dataset_params(blob_ids))
164+
else:
165+
# Parametrize with plain dataset names
166+
datasets = list_regression_datasets(include_local=include_local)
167+
if requested:
168+
datasets = [d for d in datasets if d in requested]
169+
metafunc.parametrize("dataset_name", make_dataset_params(datasets))
129170

130171

131172
# Provide summary of discovered datasets at start of test run

delphi/tests/test_discrepancy_fixes.py

Lines changed: 57 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -43,65 +43,68 @@
4343
repness_metric,
4444
finalize_cmt_stats,
4545
)
46-
from polismath.regression import get_dataset_files
46+
from polismath.regression import get_dataset_files, get_blob_variants
4747
from polismath.regression.clojure_comparer import (
4848
ClojureComparer,
4949
unfold_clojure_group_clusters,
5050
)
5151
from polismath.regression.datasets import discover_datasets
52-
from conftest import _get_requested_datasets, make_dataset_params
52+
from conftest import _get_requested_datasets, make_dataset_params, parse_dataset_blob_id
5353
from tests.common_utils import load_votes, load_comments, load_clojure_output
5454

5555

5656
# ---------------------------------------------------------------------------
57-
# Dataset parametrization (same pattern as test_legacy_clojure_regression.py)
57+
# Dataset+blob parametrization (same pattern as test_legacy_clojure_regression.py)
5858
# ---------------------------------------------------------------------------
5959

60-
def _get_clojure_datasets(include_local: bool, requested: set[str] | None = None) -> list[str]:
61-
"""Get datasets that have Clojure math_blob for comparison."""
60+
def _get_clojure_dataset_blob_ids(include_local: bool, requested: set[str] | None = None) -> list[str]:
61+
"""Get composite 'dataset-blob_type' IDs for all filled blobs."""
6262
datasets = discover_datasets(include_local=include_local)
63-
result = [
64-
name for name, info in datasets.items()
65-
if info.has_votes and info.has_comments and info.has_clojure_reference
66-
]
67-
if requested:
68-
result = [d for d in result if d in requested]
63+
result = []
64+
for name, info in datasets.items():
65+
if not (info.has_votes and info.has_comments and info.has_clojure_reference):
66+
continue
67+
if requested and name not in requested:
68+
continue
69+
for blob_type in get_blob_variants(name):
70+
result.append(f"{name}-{blob_type}")
6971
return result
7072

7173

7274
def pytest_generate_tests(metafunc):
73-
"""Parametrize tests with clojure datasets at collection time."""
75+
"""Parametrize tests with clojure dataset+blob_type at collection time."""
7476
if "dataset_name" in metafunc.fixturenames:
7577
include_local = metafunc.config.getoption("--include-local", default=False)
7678
requested = _get_requested_datasets(metafunc.config)
77-
datasets = _get_clojure_datasets(include_local, requested)
78-
params = make_dataset_params(datasets)
79+
blob_ids = _get_clojure_dataset_blob_ids(include_local, requested)
80+
params = make_dataset_params(blob_ids)
7981
metafunc.parametrize("dataset_name", params, scope="class")
8082

8183

8284
# ---------------------------------------------------------------------------
8385
# Shared fixtures
8486
# ---------------------------------------------------------------------------
8587

86-
# Module-level cache — one Conversation at a time to manage memory
87-
_CACHE: dict = {}
88+
# Module-level caches — Conversation is keyed by dataset name (shared across
89+
# blob variants), blobs are keyed by composite ID.
90+
_CONV_CACHE: dict = {}
91+
_BLOB_CACHE: dict = {}
8892

8993

90-
def _get_conversation_data(dataset_name: str) -> dict:
91-
"""Compute (or retrieve cached) conversation + clojure blob for a dataset."""
94+
def _get_or_compute_conversation(dataset_name: str) -> dict:
95+
"""Compute (or retrieve cached) conversation for a dataset."""
9296
import gc
97+
if dataset_name in _CONV_CACHE:
98+
return _CONV_CACHE[dataset_name]
99+
93100
# Evict other datasets
94-
for ds in list(_CACHE.keys()):
101+
for ds in list(_CONV_CACHE.keys()):
95102
if ds != dataset_name:
96-
_CACHE.pop(ds, None)
103+
_CONV_CACHE.pop(ds, None)
97104
Conversation._reset_conversion_cache()
98105
gc.collect()
99106

100-
if dataset_name in _CACHE:
101-
return _CACHE[dataset_name]
102-
103-
files = get_dataset_files(dataset_name)
104-
clojure = load_clojure_output(files['math_blob'])
107+
files = get_dataset_files(dataset_name, blob_type='incremental')
105108
votes = load_votes(files['votes'])
106109
comments = load_comments(files['comments'])
107110

@@ -111,19 +114,44 @@ def _get_conversation_data(dataset_name: str) -> dict:
111114

112115
data = {
113116
'conv': conv,
114-
'clojure': clojure,
115117
'dataset_name': dataset_name,
116118
'files': files,
117119
'comments': comments,
118120
}
119-
_CACHE[dataset_name] = data
121+
_CONV_CACHE[dataset_name] = data
120122
return data
121123

122124

123125
@pytest.fixture(scope="class")
124126
def conversation_data(dataset_name):
125-
"""Class-scoped fixture: runs the full pipeline once per dataset."""
126-
return _get_conversation_data(dataset_name)
127+
"""Class-scoped fixture: runs the full pipeline once per dataset+blob_type.
128+
129+
dataset_name here is actually a composite 'dataset-blob_type' ID
130+
(e.g., 'biodiversity-full'). The Conversation is shared across blob variants.
131+
"""
132+
global _BLOB_CACHE
133+
ds_name, blob_type = parse_dataset_blob_id(dataset_name)
134+
135+
# Get or compute the conversation (shared across blob variants)
136+
conv_data = _get_or_compute_conversation(ds_name)
137+
138+
# Load the specific blob variant (cache per composite ID)
139+
if dataset_name not in _BLOB_CACHE:
140+
for bid in list(_BLOB_CACHE.keys()):
141+
if not bid.startswith(ds_name + '-'):
142+
_BLOB_CACHE.pop(bid, None)
143+
files = get_dataset_files(ds_name, blob_type=blob_type)
144+
clojure = load_clojure_output(files['math_blob'])
145+
_BLOB_CACHE[dataset_name] = clojure
146+
147+
return {
148+
'conv': conv_data['conv'],
149+
'clojure': _BLOB_CACHE[dataset_name],
150+
'dataset_name': ds_name,
151+
'blob_type': blob_type,
152+
'files': conv_data['files'],
153+
'comments': conv_data['comments'],
154+
}
127155

128156

129157
@pytest.fixture(scope="class")

0 commit comments

Comments
 (0)