fix(etl): rebuild results from artifacts when logs are unavailable in CI

melvinbarbaux · melvinbarbaux · commit 1f6cda97ec37 · 2026-03-30T11:50:02.000+02:00
diff --git a/src/etl.py b/src/etl.py
@@ -21,6 +21,7 @@
 EXCEPTION_RE = re.compile(r"([A-Za-z_]\w*(?:Error|Exception)):\s*(.+)")
 RUN_ID_REGIME_RE = re.compile(r"^(R\d+)_")
 TOKEN_REGIME_RE = re.compile(r"(?:^|[_-])(R\d+)(?:[_-]|$)", re.IGNORECASE)
+REGIME_ONLY_RE = re.compile(r"^R(\d+)$", re.IGNORECASE)
 
 
 def to_float(value):
@@ -770,6 +771,171 @@ def infer_target_regime(run_id, run_payload):
     return None
 
 
+def normalize_target_regime(value):
+    if value is None:
+        return None
+    regime = str(value).strip().upper()
+    if REGIME_ONLY_RE.match(regime):
+        return regime
+    return None
+
+
+def regime_sort_key(value):
+    regime = normalize_target_regime(value)
+    if not regime:
+        return (1, str(value or ''))
+    return (0, int(REGIME_ONLY_RE.match(regime).group(1)))
+
+
+def pick_numeric(*values):
+    for value in values:
+        n = to_float(value)
+        if n is not None:
+            return n
+    return None
+
+
+def extract_labeled_count(run):
+    return pick_numeric(
+        run.get('train_labeled_n'),
+        run.get('sampling.stats.train_labeled.n'),
+        run.get('artifacts.sampling.stats.train_labeled.n'),
+        run.get('sampling.stats.labeled'),
+        run.get('artifacts.sampling.stats.labeled'),
+        run.get('sampling.stats.labeled_class_dist.n'),
+        run.get('artifacts.sampling.stats.labeled_class_dist.n'),
+    )
+
+
+def extract_train_count(run):
+    return pick_numeric(
+        run.get('train_n'),
+        run.get('sampling.stats.train.n'),
+        run.get('artifacts.sampling.stats.train.n'),
+        run.get('sampling.stats.train'),
+        run.get('artifacts.sampling.stats.train'),
+    )
+
+
+def build_run_summary(run):
+    summary_fields = [
+        'run_id',
+        'method_id',
+        'dataset_id',
+        'paradigm',
+        'modality',
+        'target_regime',
+        'test_accuracy',
+        'test_macro_f1',
+        'val.accuracy',
+        'val.macro_f1',
+        'duration_s',
+        'seed',
+        'status',
+        'exit_code',
+        'error',
+    ]
+
+    summary = {field: run.get(field) for field in summary_fields}
+    summary['target_regime'] = normalize_target_regime(summary.get('target_regime'))
+
+    raw_data_urls = run.get('raw_data_urls')
+    if isinstance(raw_data_urls, dict):
+        summary['raw_data_urls'] = {k: v for k, v in raw_data_urls.items() if v}
+    else:
+        summary['raw_data_urls'] = {}
+
+    train_labeled_n = extract_labeled_count(run)
+    if train_labeled_n is not None:
+        summary['train_labeled_n'] = train_labeled_n
+
+    train_n = extract_train_count(run)
+    if train_n is not None:
+        summary['train_n'] = train_n
+
+    return prune_empty(summary)
+
+
+def infer_regime_label_count(runs):
+    votes = {}
+    for run in runs:
+        labeled = extract_labeled_count(run)
+        if labeled is None:
+            continue
+        key = str(int(round(labeled)))
+        votes[key] = votes.get(key, 0) + 1
+
+    best = None
+    best_freq = -1
+    for key, freq in votes.items():
+        value = int(key)
+        if freq > best_freq or (freq == best_freq and (best is None or value < best)):
+            best = value
+            best_freq = freq
+    return best
+
+
+def write_compact_results(all_runs, output_dir):
+    compact_runs = [build_run_summary(run) for run in all_runs]
+    compact_runs = [run for run in compact_runs if run.get('run_id')]
+    compact_runs.sort(
+        key=lambda run: (
+            regime_sort_key(run.get('target_regime')),
+            str(run.get('dataset_id') or ''),
+            str(run.get('method_id') or ''),
+            str(run.get('run_id') or ''),
+        )
+    )
+
+    # Backward-compatible single file (now compact).
+    with open(output_dir / 'results.json', 'w', encoding='utf-8') as f:
+        json.dump(compact_runs, f, indent=2)
+
+    results_dir = output_dir / 'results'
+    results_dir.mkdir(parents=True, exist_ok=True)
+    for stale in results_dir.glob('*.json'):
+        stale.unlink()
+
+    runs_by_regime = {}
+    for run in compact_runs:
+        regime = normalize_target_regime(run.get('target_regime'))
+        if not regime:
+            continue
+        runs_by_regime.setdefault(regime, []).append(run)
+
+    if not runs_by_regime and compact_runs:
+        runs_by_regime['UNKNOWN'] = compact_runs
+
+    manifest_chunks = []
+    for regime in sorted(runs_by_regime.keys(), key=regime_sort_key):
+        regime_runs = runs_by_regime[regime]
+        chunk_file = results_dir / f'{regime}.json'
+        with open(chunk_file, 'w', encoding='utf-8') as f:
+            json.dump(regime_runs, f, indent=2)
+
+        manifest_chunks.append(
+            {
+                'regime': regime,
+                'path': f'data/results/{regime}.json',
+                'run_count': len(regime_runs),
+                'label_count': infer_regime_label_count(regime_runs),
+            }
+        )
+
+    manifest_total = sum(chunk['run_count'] for chunk in manifest_chunks)
+    manifest = {
+        'schema_version': 1,
+        'total_runs': manifest_total,
+        'default_regime': manifest_chunks[0]['regime'] if manifest_chunks else None,
+        'chunks': manifest_chunks,
+    }
+
+    with open(output_dir / 'results-manifest.json', 'w', encoding='utf-8') as f:
+        json.dump(manifest, f, indent=2)
+
+    return compact_runs
+
+
 def extract_run_data_from_artifact_dir(artifact_dir):
     artifact_dir = Path(artifact_dir)
     if not artifact_dir.is_dir():
@@ -925,11 +1091,10 @@ def main():
                 all_runs.append(run_data)
                 seen_run_ids.add(run_id)
 
+    compact_runs = write_compact_results(all_runs, output_dir)
     output_file = output_dir / 'results.json'
-    with open(output_file, 'w', encoding='utf-8') as f:
-        json.dump(all_runs, f, indent=2)
 
-    print(f"Successfully processed {len(all_runs)} runs. Data saved to {output_file}")
+    print(f"Successfully processed {len(compact_runs)} runs. Data saved to {output_file}")
 
 
 if __name__ == '__main__':