diff --git a/docs/anchor-evaluations.adoc b/docs/anchor-evaluations.adoc
index f99a734..7ed8ea6 100644
--- a/docs/anchor-evaluations.adoc
+++ b/docs/anchor-evaluations.adoc
@@ -407,45 +407,63 @@ For Claude, this means Sonnet 4.6 (not Opus).
 For GPT and Gemini, the mid-tier variants are not yet clearly established, so we test the current flagship (GPT-5, Gemini 2.5 Pro) and add smaller variants when they become available.
 A follow-up round with the cheapest variants (Haiku, GPT-5 mini, Gemini Flash) would reveal the lower boundary of anchor activation.
 
+IMPORTANT: Always record the *exact model identifier with date suffix* (e.g., `mistral-large-2512`, not `mistral-large-latest`).
+Model aliases like `-latest` can change without notice.
+
 *Commercial models (API cost per call):*
 
-[cols="1,2"]
+[cols="2,1,2"]
 |===
-|Model |Rationale
+|Model |API ID |Rationale
 
 |Claude Sonnet 4.6
+|`claude-sonnet-4-20250514`
 |Our primary development model. Serves as the baseline.
 
-|GPT-5
-|Largest market share, OpenAI ecosystem.
+|GPT-4o / GPT-5
+|`gpt-4o` / `gpt-5`
+|OpenAI ecosystem. GPT-4o as mid-tier, GPT-5 as flagship.
+
+|Mistral Large 3
+|`mistral-large-2512`
+|European flagship. Already tested (96%).
+
+|Mistral Medium 3.1
+|`mistral-medium-2508`
+|European mid-tier. Frontier-class multimodal.
+
+|Mistral Small 4
+|`mistral-small-2603`
+|European small model. Hybrid reasoning+coding (March 2026).
+
+|Devstral 2
+|`devstral-2512`
+|Code-specialized model. Tests whether SE-focused training improves anchor recognition.
 
 |Gemini 2.5 Pro
+|TBD
 |Google, different training approach.
 |===
 
-*Open-weight models (available as open-source):*
+*Open-weight models (run locally via Ollama):*
 
-[cols="1,2,1"]
+[cols="2,1,2"]
 |===
-|Model |Rationale |Local?
+|Model |Local? |Rationale
 
 |Llama 4 Maverick
-|Largest open-weight model. Shows whether anchors work without proprietary training -- relevant for self-hosted setups.
 |Yes (Ollama)
-
-|Mistral Large
-|European model with a different training focus. Interesting because the anchor catalog is heavily influenced by English-language software engineering literature.
-|No (too large, use La Plateforme API)
+|Largest open-weight model. Shows whether anchors work without proprietary training.
 
 |DeepSeek V3
+|Yes (Ollama)
 |Chinese model. Tests whether anchors work across cultural and training-data boundaries.
+
+|Ministral 3 8B
 |Yes (Ollama)
+|Mistral's tiny model. Lower boundary test.
 |===
 
-Llama and DeepSeek can run locally (e.g., via Ollama) at no API cost.
-Mistral Large requires the Mistral API -- it is open-weight but too large for local inference.
-This means 4 models have API costs (Claude, GPT, Gemini, Mistral) and 2 run locally for free.
-
 === Effort Estimate
 
 Each question runs 4 times (randomized option order) to control for position bias.
diff --git a/evaluations/.gitignore b/evaluations/.gitignore
index 0d20b64..541aa26 100644
--- a/evaluations/.gitignore
+++ b/evaluations/.gitignore
@@ -1 +1,3 @@
 *.pyc
+# Full results with raw responses (large, reproducible via pilot.py)
+results/
diff --git a/evaluations/generate-report.py b/evaluations/generate-report.py
index a6be43e..35c9ef3 100644
--- a/evaluations/generate-report.py
+++ b/evaluations/generate-report.py
@@ -19,35 +19,58 @@
 from collections import defaultdict
 from pathlib import Path
 
-RESULTS_DIR = Path(__file__).parent / "results"
+RESULTS_DIR = Path(__file__).parent / "summaries"
 SPECS_DIR = Path(__file__).parent / "specs"
 
-# Models to include and display order
-MODEL_DISPLAY = {
+# Fallback display names (used when config doesn't have exact model ID)
+MODEL_DISPLAY_FALLBACK = {
     "claude": "Claude Sonnet",
     "claude-cli": "Claude Sonnet (CLI)",
     "claude-haiku": "Claude Haiku",
     "openai": "GPT-4o",
-    "mistral": "Mistral Large",
+    "mistral": "Mistral",
     "ollama": "Ollama (local)",
 }
 
+
+def get_model_display(backend, config):
+    """Get exact model display name from config."""
+    if backend == "openai" and config.get("openai_model"):
+        return config["openai_model"]
+    if backend == "mistral" and config.get("mistral_model"):
+        return config["mistral_model"]
+    if backend == "deepseek" and config.get("deepseek_model"):
+        return config["deepseek_model"]
+    if backend == "ollama" and config.get("ollama_model"):
+        return f"ollama/{config['ollama_model']}"
+    if backend == "claude":
+        return "claude-sonnet-4-20250514"
+    if backend == "claude-cli":
+        return "claude-sonnet-4 (CLI)"
+    if backend == "claude-haiku":
+        return "claude-haiku-4-5"
+    return MODEL_DISPLAY_FALLBACK.get(backend, backend)
+
 CONTROL_ANCHORS = {"sanity-check", "negative-control"}
 
 
 def load_best_results():
-    """Load the latest result with the most questions per model."""
+    """Load the latest result per unique model identifier."""
     results = {}
     for f in sorted(RESULTS_DIR.glob("pilot-*.json")):
         d = json.load(open(f, encoding="utf-8"))
+        config = d.get("config", {})
         for m, r in d["models"].items():
-            if m not in results or len(r) >= len(results[m]["data"]):
-                results[m] = {
+            # Use exact model ID as key instead of backend alias
+            exact_id = get_model_display(m, config)
+            if exact_id not in results or len(r) >= len(results[exact_id]["data"]):
+                results[exact_id] = {
                     "data": r,
                     "file": f.name,
-                    "config": d.get("config", {}),
+                    "config": config,
                     "duration": d.get("duration_seconds", 0),
                     "timestamp": d.get("timestamp", ""),
+                    "backend": m,
                 }
     return results
 
@@ -71,19 +94,15 @@ def score_bg(score):
 
 
 def generate_html(results, output_path):
+    # Keys are already exact model IDs (e.g. "mistral-large-2512")
+    display_names = {m: m for m in results}
+
     # Collect all anchors and questions
     all_questions = defaultdict(dict)  # anchor/label -> {model: score}
-    model_names = []
-
-    # Prefer full runs (75 questions) over pilot runs
-    for m in ["claude", "openai", "mistral"]:
-        if m in results and len(results[m]["data"]) >= 60:
-            model_names.append(m)
 
-    # Add smaller runs if no full run exists
-    for m in ["claude-cli", "claude-haiku", "ollama"]:
-        if m in results and m not in model_names:
-            model_names.append(m)
+    # Sort models: most questions first, then alphabetically
+    model_names = sorted(results.keys(),
+                         key=lambda m: (-len(results[m]["data"]), m))
 
     for m in model_names:
         for q in results[m]["data"]:
@@ -162,7 +181,7 @@ def generate_html(results, output_path):
 
     for m in model_names:
         avg = model_avgs.get(m, 0)
-        display = MODEL_DISPLAY.get(m, m)
+        display = display_names.get(m, m)
         n = len([1 for l in anchor_questions if anchor_questions[l].get(m) is not None])
         info = results[m]
         html += f"""  <div class="summary-card">
@@ -181,7 +200,7 @@ def generate_html(results, output_path):
 """
 
     for m in model_names:
-        html += f"  <th style='text-align:center'>{MODEL_DISPLAY.get(m, m)}</th>\n"
+        html += f"  <th style='text-align:center'>{display_names.get(m, m)}</th>\n"
     html += "</tr></thead>\n<tbody>\n"
 
     for anchor_id in sorted(anchor_groups.keys()):
@@ -224,7 +243,7 @@ def generate_html(results, output_path):
     if control_questions:
         html += '<h2>Control Questions</h2>\n<table class="controls">\n<thead><tr><th>Control</th>'
         for m in model_names:
-            html += f"<th style='text-align:center'>{MODEL_DISPLAY.get(m, m)}</th>"
+            html += f"<th style='text-align:center'>{display_names.get(m, m)}</th>"
         html += "</tr></thead>\n<tbody>\n"
         for label in sorted(control_questions.keys()):
             short = label.replace("/recognition", "")
@@ -246,9 +265,9 @@ def generate_html(results, output_path):
         fails = [(q["label"], q["score"]) for q in results[m]["data"]
                  if q["score"] < 1.0 and not any(q["label"].startswith(c) for c in CONTROL_ANCHORS)]
         if not fails:
-            html += f"<h3>{MODEL_DISPLAY.get(m, m)}: no failures</h3>\n"
+            html += f"<h3>{display_names.get(m, m)}: no failures</h3>\n"
         else:
-            html += f'<h3>{MODEL_DISPLAY.get(m, m)}: {len(fails)} failures</h3>\n<div class="fail-list">\n'
+            html += f'<h3>{display_names.get(m, m)}: {len(fails)} failures</h3>\n<div class="fail-list">\n'
             for label, score in sorted(fails):
                 html += f'<div class="fail-item"><span>{h(label)}</span><span style="color:{score_color(score)};font-weight:600">{score:.0%}</span></div>\n'
             html += "</div>\n"
@@ -262,7 +281,7 @@ def generate_html(results, output_path):
     for m in model_names:
         info = results[m]
         dur = info["duration"]
-        html += f"<dt>{MODEL_DISPLAY.get(m, m)}:</dt><dd>{info['file']} · {int(dur//60)}m {int(dur%60)}s · {info['timestamp'][:19]}</dd><br>"
+        html += f"<dt>{display_names.get(m, m)}:</dt><dd>{info['file']} · {int(dur//60)}m {int(dur%60)}s · {info['timestamp'][:19]}</dd><br>"
 
     html += """
 </dl>
diff --git a/evaluations/pilot.py b/evaluations/pilot.py
index c921ad0..bfc640d 100644
--- a/evaluations/pilot.py
+++ b/evaluations/pilot.py
@@ -312,7 +312,16 @@ def run_pilot(models, dry_run=False, verbose=False, ollama_model="qwen3:4b", no_
     print()
 
     ts = datetime.now().strftime("%Y%m%d-%H%M%S")
-    out_file = RESULTS_DIR / f"pilot-{ts}.json"
+    # Include exact model IDs in filename to prevent race conditions
+    model_ids = []
+    for m in models:
+        if m == "openai": model_ids.append(openai_model)
+        elif m == "mistral": model_ids.append(mistral_model)
+        elif m == "deepseek": model_ids.append(deepseek_model)
+        elif m == "ollama": model_ids.append(f"ollama-{ollama_model}")
+        else: model_ids.append(m)
+    model_suffix = "_".join(model_ids).replace(":", "-").replace("/", "-")
+    out_file = RESULTS_DIR / f"pilot-{ts}_{model_suffix}.json"
 
     all_results = {
         "timestamp": datetime.now(timezone.utc).isoformat(),
@@ -449,6 +458,18 @@ def append_and_save(r):
         save_results(all_results, out_file)
         print(f"\nResults saved to {out_file}")
 
+        # Also save a stripped summary (scores only, no raw responses)
+        summary_dir = RESULTS_DIR.parent / "summaries"
+        summary_dir.mkdir(parents=True, exist_ok=True)
+        summary = json.loads(json.dumps(all_results))  # deep copy
+        for m_results in summary.get("models", {}).values():
+            for r in m_results:
+                r.pop("results", None)
+        summary_file = summary_dir / out_file.name
+        with open(summary_file, "w", encoding="utf-8") as fh:
+            json.dump(summary, fh, indent=2, ensure_ascii=False)
+        print(f"Summary saved to {summary_file}")
+
         # Summary
         print("\n=== SUMMARY ===")
         print(f"Models: {', '.join(models)}")
diff --git a/evaluations/report.html b/evaluations/report.html
index 0fd9fb2..584a172 100644
--- a/evaluations/report.html
+++ b/evaluations/report.html
@@ -37,7 +37,7 @@
 </head>
 <body>
 <h1>Semantic Anchor Evaluation Report</h1>
-<p class="subtitle">Multiple-choice recognition test across 3 LLMs — 191 questions, 61 anchors</p>
+<p class="subtitle">Multiple-choice recognition test across 6 LLMs — 191 questions, 61 anchors</p>
 
 <div class="legend">
   <div class="legend-item"><div class="legend-dot" style="background:#dcfce7"></div> &ge;80%</div>
@@ -48,298 +48,336 @@ <h1>Semantic Anchor Evaluation Report</h1>
 <h2>Model Summary</h2>
 <div class="summary-grid">
   <div class="summary-card">
-    <div class="model-name">Claude Sonnet</div>
+    <div class="model-name">claude-sonnet-4-20250514</div>
     <div class="score" style="color: #22c55e">99%</div>
     <div class="detail">191 questions · pilot-20260324-174404.json</div>
   </div>
   <div class="summary-card">
-    <div class="model-name">GPT-4o</div>
+    <div class="model-name">devstral-2512</div>
+    <div class="score" style="color: #22c55e">96%</div>
+    <div class="detail">191 questions · pilot-20260326-073241_devstral-2512.json</div>
+  </div>
+  <div class="summary-card">
+    <div class="model-name">gpt-4o</div>
     <div class="score" style="color: #22c55e">98%</div>
     <div class="detail">191 questions · pilot-20260324-192413.json</div>
   </div>
   <div class="summary-card">
-    <div class="model-name">Mistral Large</div>
+    <div class="model-name">mistral-large-2512</div>
     <div class="score" style="color: #22c55e">96%</div>
     <div class="detail">191 questions · pilot-20260324-190600.json</div>
   </div>
+  <div class="summary-card">
+    <div class="model-name">mistral-medium-2508</div>
+    <div class="score" style="color: #22c55e">85%</div>
+    <div class="detail">191 questions · pilot-20260326-070127_mistral-medium-2508.json</div>
+  </div>
+  <div class="summary-card">
+    <div class="model-name">mistral-small-2603</div>
+    <div class="score" style="color: #eab308">74%</div>
+    <div class="detail">191 questions · pilot-20260326-074132_mistral-small-2603.json</div>
+  </div>
 </div>
 
 <h2>Heatmap: Anchor × Model</h2>
 <table>
 <thead><tr>
   <th>Anchor / Question</th>
-  <th style='text-align:center'>Claude Sonnet</th>
-  <th style='text-align:center'>GPT-4o</th>
-  <th style='text-align:center'>Mistral Large</th>
+  <th style='text-align:center'>claude-sonnet-4-20250514</th>
+  <th style='text-align:center'>devstral-2512</th>
+  <th style='text-align:center'>gpt-4o</th>
+  <th style='text-align:center'>mistral-large-2512</th>
+  <th style='text-align:center'>mistral-medium-2508</th>
+  <th style='text-align:center'>mistral-small-2603</th>
 </tr></thead>
 <tbody>
-<tr class="anchor-group"><td>adr-according-to-nygard</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
-<tr class="anchor-group"><td>arc42</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">consistency-language</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">consistency-variant-1</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">consistency-variant-2</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">consistency-variant-3</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>atam</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>bdd-given-when-then</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>bem-methodology</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>bluf</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>c4-diagrams</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>chain-of-thought</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>clean-architecture</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>control-chart-shewhart</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>conventional-commits</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>cqrs</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>cynefin-framework</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>definition-of-done</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>devils-advocate</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>diataxis-framework</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>docs-as-code</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>domain-driven-design</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>ears-requirements</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">83%</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
-<tr class="anchor-group"><td>event-driven-architecture</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>fagan-inspection</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>feynman-technique</td><td class="score-cell" style="background:#fef9c3">67%</td><td class="score-cell" style="background:#fef9c3">67%</td><td class="score-cell" style="background:#dcfce7">92%</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#fee2e2">0%</td><td class="score-cell" style="background:#fee2e2">0%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>five-whys</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>fowler-patterns</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>gherkin</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>github-flow</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">92%</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>gutes-deutsch-wolf-schneider</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>hexagonal-architecture</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>iec-61508-sil-levels</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">83%</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>impact-mapping</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>invest</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>iso-25010</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>jobs-to-be-done</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>lasr</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fee2e2">25%</td></tr>
-<tr class="anchor-group"><td>linddun</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>llm-evaluations</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>madr</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>mece</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>morphological-box</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>moscow</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fee2e2">25%</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>mutation-testing</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>nelson-rules</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>owasp-top-10</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>plain-english-strunk-white</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>prd</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fef9c3">67%</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fee2e2">0%</td></tr>
-<tr class="anchor-group"><td>problem-space-nvc</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>property-based-testing</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>pyramid-principle</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>semantic-versioning</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>socratic-method</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>sota</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>spc</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>stride</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>swot</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>tdd-chicago-school</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>tdd-london-school</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">89%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">consistency-language</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">consistency-variant-1</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">consistency-variant-2</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">consistency-variant-3</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>testing-pyramid</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>timtowtdi</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>todotxt-flavoured-markdown</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
-<tr class="anchor-group"><td>user-story-mapping</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>wardley-mapping</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>adr-according-to-nygard</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#dcfce7">83%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">25%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr class="anchor-group"><td>arc42</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">46%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">consistency-language</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">0%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">consistency-variant-1</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">25%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">consistency-variant-2</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">consistency-variant-3</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>atam</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">58%</td><td class="score-cell" style="background:#dcfce7">92%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>bdd-given-when-then</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#fee2e2">33%</td><td class="score-cell" style="background:#dcfce7">92%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#fee2e2">25%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">0%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">25%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>bem-methodology</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">42%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">25%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>bluf</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>c4-diagrams</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">25%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">0%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">25%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>chain-of-thought</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">58%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>clean-architecture</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>control-chart-shewhart</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>conventional-commits</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>cqrs</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>cynefin-framework</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>definition-of-done</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#fef9c3">67%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr class="anchor-group"><td>devils-advocate</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">58%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr class="anchor-group"><td>diataxis-framework</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fef9c3">58%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fee2e2">25%</td></tr>
+<tr class="anchor-group"><td>docs-as-code</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">83%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>domain-driven-design</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">67%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr class="anchor-group"><td>ears-requirements</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">17%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">0%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">0%</td></tr>
+<tr class="anchor-group"><td>event-driven-architecture</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr class="anchor-group"><td>fagan-inspection</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#fef9c3">58%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fee2e2">25%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr class="anchor-group"><td>feynman-technique</td><td class="score-cell" style="background:#fef9c3">67%</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#fef9c3">67%</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#fee2e2">0%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fee2e2">0%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>five-whys</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>fowler-patterns</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr class="anchor-group"><td>gherkin</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr class="anchor-group"><td>github-flow</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">42%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">0%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr class="anchor-group"><td>gutes-deutsch-wolf-schneider</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#dcfce7">92%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr class="anchor-group"><td>hexagonal-architecture</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">92%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr class="anchor-group"><td>iec-61508-sil-levels</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fee2e2">33%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">0%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">0%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>impact-mapping</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">25%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr class="anchor-group"><td>invest</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">25%</td></tr>
+<tr class="anchor-group"><td>iso-25010</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>jobs-to-be-done</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">83%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>lasr</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">58%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fee2e2">25%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr class="anchor-group"><td>linddun</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">83%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>llm-evaluations</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr class="anchor-group"><td>madr</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">67%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">25%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr class="anchor-group"><td>mece</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">83%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>morphological-box</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fef9c3">67%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fee2e2">25%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>moscow</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fee2e2">25%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr class="anchor-group"><td>mutation-testing</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fef9c3">58%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">25%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr class="anchor-group"><td>nelson-rules</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">83%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>owasp-top-10</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fef9c3">67%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr class="anchor-group"><td>plain-english-strunk-white</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">58%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr class="anchor-group"><td>prd</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fef9c3">67%</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#fef9c3">58%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fee2e2">0%</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#fee2e2">25%</td></tr>
+<tr class="anchor-group"><td>problem-space-nvc</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">25%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr class="anchor-group"><td>property-based-testing</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fef9c3">67%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">25%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr class="anchor-group"><td>pyramid-principle</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">83%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>semantic-versioning</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#dcfce7">92%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>socratic-method</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fef9c3">67%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">25%</td></tr>
+<tr class="anchor-group"><td>sota</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>spc</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fee2e2">25%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr class="anchor-group"><td>stride</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fef9c3">67%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr class="anchor-group"><td>swot</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#fef9c3">67%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">25%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>tdd-chicago-school</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">67%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr class="anchor-group"><td>tdd-london-school</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">89%</td><td class="score-cell" style="background:#dcfce7">89%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">71%</td><td class="score-cell" style="background:#fef9c3">54%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">consistency-language</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">consistency-variant-1</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">consistency-variant-2</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">consistency-variant-3</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">25%</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">0%</td><td class="score-cell" style="background:#fee2e2">25%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr class="anchor-group"><td>testing-pyramid</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#dcfce7">92%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>timtowtdi</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#dcfce7">83%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr class="anchor-group"><td>todotxt-flavoured-markdown</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr class="anchor-group"><td>user-story-mapping</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr class="anchor-group"><td>wardley-mapping</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">42%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">25%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
 </tbody></table>
 <h2>Control Questions</h2>
 <table class="controls">
-<thead><tr><th>Control</th><th style='text-align:center'>Claude Sonnet</th><th style='text-align:center'>GPT-4o</th><th style='text-align:center'>Mistral Large</th></tr></thead>
+<thead><tr><th>Control</th><th style='text-align:center'>claude-sonnet-4-20250514</th><th style='text-align:center'>devstral-2512</th><th style='text-align:center'>gpt-4o</th><th style='text-align:center'>mistral-large-2512</th><th style='text-align:center'>mistral-medium-2508</th><th style='text-align:center'>mistral-small-2603</th></tr></thead>
 <tbody>
-<tr><td>negative-control</td><td class="score-cell" style="background:#dcfce7">100%</td><td class="score-cell" style="background:#dcfce7">100%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
-<tr><td>sanity-check</td><td class="score-cell" style="background:#dcfce7">0%</td><td class="score-cell" style="background:#dcfce7">0%</td><td class="score-cell" style="background:#dcfce7">0%</td></tr>
+<tr><td>negative-control</td><td class="score-cell" style="background:#dcfce7">100%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">100%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">100%</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td>sanity-check</td><td class="score-cell" style="background:#dcfce7">0%</td><td class="score-cell" style="background:#dcfce7">0%</td><td class="score-cell" style="background:#dcfce7">0%</td><td class="score-cell" style="background:#dcfce7">0%</td><td class="score-cell" style="background:#dcfce7">0%</td><td class="score-cell" style="background:#dcfce7">0%</td></tr>
 </tbody></table>
 <h2>Failures Detail</h2>
-<h3>Claude Sonnet: 2 failures</h3>
+<h3>claude-sonnet-4-20250514: 2 failures</h3>
 <div class="fail-list">
 <div class="fail-item"><span>feynman-technique/application-paraphrase</span><span style="color:#ef4444;font-weight:600">0%</span></div>
 <div class="fail-item"><span>github-flow/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
 </div>
-<h3>GPT-4o: 13 failures</h3>
+<h3>devstral-2512: 17 failures</h3>
+<div class="fail-list">
+<div class="fail-item"><span>atam/application-paraphrase</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>bdd-given-when-then/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>bdd-given-when-then/recognition</span><span style="color:#ef4444;font-weight:600">0%</span></div>
+<div class="fail-item"><span>ears-requirements/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>ears-requirements/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>feynman-technique/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>feynman-technique/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>github-flow/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>iec-61508-sil-levels/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>iec-61508-sil-levels/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>lasr/recognition</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>madr/recognition</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>prd/recognition</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>property-based-testing/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>tdd-chicago-school/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>tdd-london-school/consistency-variant-3</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>todotxt-flavoured-markdown/recognition</span><span style="color:#eab308;font-weight:600">50%</span></div>
+</div>
+<h3>gpt-4o: 13 failures</h3>
 <div class="fail-list">
 <div class="fail-item"><span>control-chart-shewhart/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
 <div class="fail-item"><span>ears-requirements/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
@@ -355,7 +393,7 @@ <h3>GPT-4o: 13 failures</h3>
 <div class="fail-item"><span>tdd-london-school/consistency-variant-3</span><span style="color:#eab308;font-weight:600">50%</span></div>
 <div class="fail-item"><span>tdd-london-school/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
 </div>
-<h3>Mistral Large: 17 failures</h3>
+<h3>mistral-large-2512: 17 failures</h3>
 <div class="fail-list">
 <div class="fail-item"><span>adr-according-to-nygard/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
 <div class="fail-item"><span>bdd-given-when-then/application-paraphrase</span><span style="color:#eab308;font-weight:600">50%</span></div>
@@ -375,11 +413,209 @@ <h3>Mistral Large: 17 failures</h3>
 <div class="fail-item"><span>semantic-versioning/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
 <div class="fail-item"><span>todotxt-flavoured-markdown/recognition</span><span style="color:#eab308;font-weight:600">50%</span></div>
 </div>
+<h3>mistral-medium-2508: 77 failures</h3>
+<div class="fail-list">
+<div class="fail-item"><span>adr-according-to-nygard/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>adr-according-to-nygard/application-paraphrase</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>adr-according-to-nygard/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>arc42/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>arc42/application-paraphrase</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>arc42/consistency-language</span><span style="color:#ef4444;font-weight:600">0%</span></div>
+<div class="fail-item"><span>arc42/consistency-variant-1</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>arc42/consistency-variant-2</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>arc42/consistency-variant-3</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>arc42/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>atam/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>atam/application-paraphrase</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>atam/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>bdd-given-when-then/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>bdd-given-when-then/application-paraphrase</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>bdd-given-when-then/recognition</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>bem-methodology/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>bem-methodology/application-paraphrase</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>bem-methodology/recognition</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>bluf/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>bluf/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>bluf/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>c4-diagrams/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>c4-diagrams/application-paraphrase</span><span style="color:#ef4444;font-weight:600">0%</span></div>
+<div class="fail-item"><span>c4-diagrams/recognition</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>chain-of-thought/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>chain-of-thought/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>chain-of-thought/recognition</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>control-chart-shewhart/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>cqrs/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>cynefin-framework/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>definition-of-done/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>definition-of-done/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>diataxis-framework/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>docs-as-code/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>event-driven-architecture/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>event-driven-architecture/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>fagan-inspection/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>fagan-inspection/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>feynman-technique/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>fowler-patterns/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>gutes-deutsch-wolf-schneider/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>gutes-deutsch-wolf-schneider/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>hexagonal-architecture/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>iec-61508-sil-levels/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>jobs-to-be-done/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>linddun/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>madr/recognition</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>mece/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>morphological-box/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>moscow/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>mutation-testing/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>nelson-rules/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>owasp-top-10/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>prd/recognition</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>property-based-testing/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>pyramid-principle/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>semantic-versioning/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>semantic-versioning/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>socratic-method/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>sota/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>sota/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>spc/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>stride/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>swot/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>swot/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>tdd-london-school/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>tdd-london-school/consistency-language</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>tdd-london-school/consistency-variant-1</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>tdd-london-school/consistency-variant-3</span><span style="color:#ef4444;font-weight:600">0%</span></div>
+<div class="fail-item"><span>tdd-london-school/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>testing-pyramid/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>testing-pyramid/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>timtowtdi/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>timtowtdi/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>todotxt-flavoured-markdown/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>user-story-mapping/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+</div>
+<h3>mistral-small-2603: 115 failures</h3>
+<div class="fail-list">
+<div class="fail-item"><span>adr-according-to-nygard/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>adr-according-to-nygard/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>atam/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>bdd-given-when-then/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>conventional-commits/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>conventional-commits/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>cqrs/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>cqrs/application-paraphrase</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>definition-of-done/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>definition-of-done/application-paraphrase</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>definition-of-done/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>devils-advocate/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>devils-advocate/application-paraphrase</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>devils-advocate/recognition</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>diataxis-framework/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>diataxis-framework/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>diataxis-framework/recognition</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>docs-as-code/application-paraphrase</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>domain-driven-design/application-paraphrase</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>domain-driven-design/recognition</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>ears-requirements/application-anchor</span><span style="color:#ef4444;font-weight:600">0%</span></div>
+<div class="fail-item"><span>ears-requirements/application-paraphrase</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>ears-requirements/recognition</span><span style="color:#ef4444;font-weight:600">0%</span></div>
+<div class="fail-item"><span>event-driven-architecture/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>event-driven-architecture/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>event-driven-architecture/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>fagan-inspection/application-anchor</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>fagan-inspection/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>fagan-inspection/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>feynman-technique/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>feynman-technique/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>five-whys/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>fowler-patterns/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>fowler-patterns/recognition</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>gherkin/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>gherkin/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>github-flow/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>github-flow/application-paraphrase</span><span style="color:#ef4444;font-weight:600">0%</span></div>
+<div class="fail-item"><span>github-flow/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>gutes-deutsch-wolf-schneider/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>hexagonal-architecture/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>iec-61508-sil-levels/application-anchor</span><span style="color:#ef4444;font-weight:600">0%</span></div>
+<div class="fail-item"><span>iec-61508-sil-levels/application-paraphrase</span><span style="color:#ef4444;font-weight:600">0%</span></div>
+<div class="fail-item"><span>impact-mapping/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>impact-mapping/application-paraphrase</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>impact-mapping/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>invest/recognition</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>iso-25010/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>iso-25010/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>jobs-to-be-done/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>lasr/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>lasr/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>lasr/recognition</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>linddun/application-paraphrase</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>llm-evaluations/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>llm-evaluations/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>madr/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>madr/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>madr/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>mece/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>morphological-box/application-anchor</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>morphological-box/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>moscow/application-paraphrase</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>moscow/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>mutation-testing/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>mutation-testing/application-paraphrase</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>mutation-testing/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>nelson-rules/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>owasp-top-10/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>owasp-top-10/application-paraphrase</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>owasp-top-10/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>plain-english-strunk-white/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>plain-english-strunk-white/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>plain-english-strunk-white/recognition</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>prd/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>prd/recognition</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>problem-space-nvc/application-anchor</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>problem-space-nvc/application-paraphrase</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>problem-space-nvc/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>property-based-testing/application-anchor</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>property-based-testing/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>pyramid-principle/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>pyramid-principle/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>semantic-versioning/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>socratic-method/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>socratic-method/recognition</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>sota/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>sota/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>spc/application-anchor</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>spc/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>spc/recognition</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>stride/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>stride/recognition</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>swot/application-anchor</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>swot/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>tdd-chicago-school/application-paraphrase</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>tdd-chicago-school/recognition</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>tdd-london-school/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>tdd-london-school/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>tdd-london-school/consistency-language</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>tdd-london-school/consistency-variant-1</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>tdd-london-school/consistency-variant-2</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>tdd-london-school/consistency-variant-3</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>tdd-london-school/recognition</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>testing-pyramid/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>timtowtdi/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>timtowtdi/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>todotxt-flavoured-markdown/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>todotxt-flavoured-markdown/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>todotxt-flavoured-markdown/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>user-story-mapping/application-paraphrase</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>user-story-mapping/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>wardley-mapping/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>wardley-mapping/application-paraphrase</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>wardley-mapping/recognition</span><span style="color:#eab308;font-weight:600">50%</span></div>
+</div>
 
 <div class="meta">
 <h3>Run Metadata</h3>
 <dl>
-<dt>Claude Sonnet:</dt><dd>pilot-20260324-174404.json · 81m 2s · 2026-03-24T17:44:04</dd><br><dt>GPT-4o:</dt><dd>pilot-20260324-192413.json · 15m 38s · 2026-03-24T19:24:13</dd><br><dt>Mistral Large:</dt><dd>pilot-20260324-190600.json · 16m 58s · 2026-03-24T19:06:00</dd><br>
+<dt>claude-sonnet-4-20250514:</dt><dd>pilot-20260324-174404.json · 81m 2s · 2026-03-24T17:44:04</dd><br><dt>devstral-2512:</dt><dd>pilot-20260326-073241_devstral-2512.json · 15m 19s · 2026-03-26T07:32:41</dd><br><dt>gpt-4o:</dt><dd>pilot-20260324-192413.json · 15m 38s · 2026-03-24T19:24:13</dd><br><dt>mistral-large-2512:</dt><dd>pilot-20260324-190600.json · 16m 58s · 2026-03-24T19:06:00</dd><br><dt>mistral-medium-2508:</dt><dd>pilot-20260326-070127_mistral-medium-2508.json · 31m 12s · 2026-03-26T07:01:27</dd><br><dt>mistral-small-2603:</dt><dd>pilot-20260326-074132_mistral-small-2603.json · 18m 9s · 2026-03-26T07:41:32</dd><br>
 </dl>
 <p style="margin-top:0.75rem">Generated by <code>evaluations/generate-report.py</code> · Position bias mitigation: 4 permutations per question · Scoring: deterministic MC (no LLM judge)</p>
 </div>
diff --git a/evaluations/results/pilot-20260324-174404.json b/evaluations/results/pilot-20260324-174404.json
deleted file mode 100644
index 110f33a..0000000
--- a/evaluations/results/pilot-20260324-174404.json
+++ /dev/null
@@ -1,10442 +0,0 @@
-{
-  "timestamp": "2026-03-24T17:44:04.891380+00:00",
-  "config": {
-    "models": [
-      "claude"
-    ],
-    "openai_model": null,
-    "mistral_model": null,
-    "deepseek_model": null,
-    "ollama_model": null,
-    "ollama_url": null,
-    "no_think": null,
-    "temperature": 0.0
-  },
-  "models": {
-    "claude": [
-      {
-        "label": "adr-according-to-nygard/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "adr-according-to-nygard/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "adr-according-to-nygard/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "arc42/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          }
-        ]
-      },
-      {
-        "label": "arc42/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "arc42/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "arc42/consistency-variant-1",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "arc42/consistency-variant-2",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "arc42/consistency-variant-3",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "arc42/consistency-language",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "atam/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "atam/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "atam/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "bdd-given-when-then/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "bdd-given-when-then/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "bdd-given-when-then/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "bem-methodology/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "bem-methodology/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "bem-methodology/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "bluf/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "bluf/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "bluf/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "c4-diagrams/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "c4-diagrams/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "c4-diagrams/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "chain-of-thought/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "chain-of-thought/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "chain-of-thought/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "clean-architecture/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "clean-architecture/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "clean-architecture/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "control-chart-shewhart/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "control-chart-shewhart/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "control-chart-shewhart/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "conventional-commits/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "conventional-commits/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "conventional-commits/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "cqrs/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "cqrs/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "cqrs/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "cynefin-framework/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "cynefin-framework/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "cynefin-framework/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "definition-of-done/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "definition-of-done/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "definition-of-done/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "devils-advocate/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "devils-advocate/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "devils-advocate/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "diataxis-framework/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "diataxis-framework/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "diataxis-framework/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "docs-as-code/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "docs-as-code/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "docs-as-code/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "domain-driven-design/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "domain-driven-design/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "domain-driven-design/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "ears-requirements/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "ears-requirements/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "ears-requirements/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "event-driven-architecture/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "event-driven-architecture/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "event-driven-architecture/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "fagan-inspection/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "fagan-inspection/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "fagan-inspection/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "feynman-technique/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "feynman-technique/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "feynman-technique/application-paraphrase",
-        "score": 0.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "C",
-            "correct": false,
-            "raw_response": "C"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "B",
-            "correct": false,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "A",
-            "correct": false,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "D",
-            "correct": false,
-            "raw_response": "D"
-          }
-        ]
-      },
-      {
-        "label": "five-whys/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "five-whys/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "five-whys/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "fowler-patterns/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "fowler-patterns/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "fowler-patterns/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "gherkin/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          }
-        ]
-      },
-      {
-        "label": "gherkin/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "gherkin/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "github-flow/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "github-flow/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "github-flow/application-paraphrase",
-        "score": 0.75,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "A",
-            "correct": false,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "gutes-deutsch-wolf-schneider/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "gutes-deutsch-wolf-schneider/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "gutes-deutsch-wolf-schneider/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "hexagonal-architecture/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "hexagonal-architecture/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "hexagonal-architecture/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "iec-61508-sil-levels/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "iec-61508-sil-levels/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "iec-61508-sil-levels/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "impact-mapping/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "impact-mapping/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "impact-mapping/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "invest/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "invest/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "invest/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "iso-25010/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "iso-25010/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "iso-25010/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "jobs-to-be-done/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "jobs-to-be-done/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "jobs-to-be-done/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "lasr/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "lasr/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "lasr/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "linddun/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "linddun/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "linddun/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "llm-evaluations/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "llm-evaluations/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "llm-evaluations/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "madr/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "madr/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "madr/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "mece/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "mece/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "mece/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "morphological-box/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "morphological-box/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "morphological-box/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "moscow/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "moscow/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "moscow/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "mutation-testing/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "mutation-testing/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "mutation-testing/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "negative-control/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          }
-        ]
-      },
-      {
-        "label": "nelson-rules/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "nelson-rules/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "nelson-rules/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "owasp-top-10/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          }
-        ]
-      },
-      {
-        "label": "owasp-top-10/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "owasp-top-10/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "plain-english-strunk-white/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "plain-english-strunk-white/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "plain-english-strunk-white/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "prd/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "prd/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "prd/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "problem-space-nvc/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "problem-space-nvc/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "problem-space-nvc/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "property-based-testing/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "property-based-testing/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "property-based-testing/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "pyramid-principle/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "pyramid-principle/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "pyramid-principle/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "sanity-check/recognition",
-        "score": 0.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "X",
-            "answer": "B",
-            "correct": false,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "X",
-            "answer": null,
-            "correct": false,
-            "raw_response": "None of the options provided match the famous answer from"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "X",
-            "answer": null,
-            "correct": false,
-            "raw_response": "None of the options provided match the correct answer from"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "X",
-            "answer": null,
-            "correct": false,
-            "raw_response": "None of the options provided match the famous answer from"
-          }
-        ]
-      },
-      {
-        "label": "semantic-versioning/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "semantic-versioning/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "semantic-versioning/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "socratic-method/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "socratic-method/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "socratic-method/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "sota/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "sota/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "sota/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "spc/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "spc/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "spc/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "stride/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "stride/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "stride/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "swot/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "swot/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "swot/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "tdd-chicago-school/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "tdd-chicago-school/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "tdd-chicago-school/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "tdd-london-school/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "tdd-london-school/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "tdd-london-school/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "tdd-london-school/consistency-variant-1",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "tdd-london-school/consistency-variant-2",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "tdd-london-school/consistency-variant-3",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "tdd-london-school/consistency-language",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "testing-pyramid/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "testing-pyramid/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "testing-pyramid/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "timtowtdi/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          }
-        ]
-      },
-      {
-        "label": "timtowtdi/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "timtowtdi/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "todotxt-flavoured-markdown/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "todotxt-flavoured-markdown/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "todotxt-flavoured-markdown/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "user-story-mapping/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "user-story-mapping/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "user-story-mapping/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "wardley-mapping/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "wardley-mapping/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "wardley-mapping/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      }
-    ]
-  },
-  "duration_seconds": 4862.1
-}
\ No newline at end of file
diff --git a/evaluations/results/pilot-20260324-190600.json b/evaluations/results/pilot-20260324-190600.json
deleted file mode 100644
index 613af48..0000000
--- a/evaluations/results/pilot-20260324-190600.json
+++ /dev/null
@@ -1,10442 +0,0 @@
-{
-  "timestamp": "2026-03-24T19:06:00.394684+00:00",
-  "config": {
-    "models": [
-      "mistral"
-    ],
-    "openai_model": null,
-    "mistral_model": "mistral-large-latest",
-    "deepseek_model": null,
-    "ollama_model": null,
-    "ollama_url": null,
-    "no_think": null,
-    "temperature": 0.0
-  },
-  "models": {
-    "mistral": [
-      {
-        "label": "adr-according-to-nygard/recognition",
-        "score": 0.75,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "A",
-            "correct": false,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "adr-according-to-nygard/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "adr-according-to-nygard/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "arc42/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          }
-        ]
-      },
-      {
-        "label": "arc42/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "arc42/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "arc42/consistency-variant-1",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "arc42/consistency-variant-2",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "arc42/consistency-variant-3",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "arc42/consistency-language",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "atam/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "atam/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "atam/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "bdd-given-when-then/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "bdd-given-when-then/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "bdd-given-when-then/application-paraphrase",
-        "score": 0.5,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": null,
-            "correct": false,
-            "raw_response": "ERROR: Error code: 429 - {'object': 'error', 'message': 'Rate limit exceeded', 'type': 'rate_limited', 'param': None, 'code': '1300', 'raw_status_code': 429}"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": null,
-            "correct": false,
-            "raw_response": "ERROR: Error code: 429 - {'object': 'error', 'message': 'Rate limit exceeded', 'type': 'rate_limited', 'param': None, 'code': '1300', 'raw_status_code': 429}"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "bem-methodology/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "bem-methodology/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "bem-methodology/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "bluf/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "bluf/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "bluf/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "c4-diagrams/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "c4-diagrams/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "c4-diagrams/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "chain-of-thought/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "chain-of-thought/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "chain-of-thought/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "clean-architecture/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "clean-architecture/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "clean-architecture/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "control-chart-shewhart/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "control-chart-shewhart/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "control-chart-shewhart/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "conventional-commits/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "conventional-commits/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "conventional-commits/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "cqrs/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "cqrs/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "cqrs/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "cynefin-framework/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "cynefin-framework/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "cynefin-framework/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "definition-of-done/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "definition-of-done/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "definition-of-done/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "devils-advocate/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "devils-advocate/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "devils-advocate/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "diataxis-framework/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "diataxis-framework/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "diataxis-framework/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "docs-as-code/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "docs-as-code/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "docs-as-code/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "domain-driven-design/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "domain-driven-design/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "domain-driven-design/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "ears-requirements/recognition",
-        "score": 0.75,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "C",
-            "correct": false,
-            "raw_response": "C)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "ears-requirements/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "ears-requirements/application-paraphrase",
-        "score": 0.75,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "B",
-            "correct": false,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "event-driven-architecture/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "event-driven-architecture/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "event-driven-architecture/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "fagan-inspection/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "fagan-inspection/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "fagan-inspection/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "feynman-technique/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "feynman-technique/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "feynman-technique/application-paraphrase",
-        "score": 0.75,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "B",
-            "correct": false,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "five-whys/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "five-whys/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "five-whys/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "fowler-patterns/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "fowler-patterns/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "fowler-patterns/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "gherkin/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          }
-        ]
-      },
-      {
-        "label": "gherkin/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "gherkin/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "github-flow/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "github-flow/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "github-flow/application-paraphrase",
-        "score": 0.75,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "A",
-            "correct": false,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "gutes-deutsch-wolf-schneider/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "gutes-deutsch-wolf-schneider/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "gutes-deutsch-wolf-schneider/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "hexagonal-architecture/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "hexagonal-architecture/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "hexagonal-architecture/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "iec-61508-sil-levels/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "iec-61508-sil-levels/application-anchor",
-        "score": 0.5,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "B",
-            "correct": false,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "D",
-            "correct": false,
-            "raw_response": "D"
-          }
-        ]
-      },
-      {
-        "label": "iec-61508-sil-levels/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "impact-mapping/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "impact-mapping/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "impact-mapping/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "invest/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "invest/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "invest/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "iso-25010/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "iso-25010/application-anchor",
-        "score": 0.75,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "B",
-            "correct": false,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "iso-25010/application-paraphrase",
-        "score": 0.75,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "B",
-            "correct": false,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "jobs-to-be-done/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "jobs-to-be-done/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "jobs-to-be-done/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "lasr/recognition",
-        "score": 0.25,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "A",
-            "correct": false,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "C",
-            "correct": false,
-            "raw_response": "C"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "A",
-            "correct": false,
-            "raw_response": "A"
-          }
-        ]
-      },
-      {
-        "label": "lasr/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "lasr/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "linddun/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "linddun/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "linddun/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "llm-evaluations/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "llm-evaluations/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "llm-evaluations/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "madr/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "madr/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "madr/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "mece/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "mece/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "mece/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "morphological-box/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "morphological-box/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "morphological-box/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "moscow/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "moscow/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "moscow/application-paraphrase",
-        "score": 0.25,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "A",
-            "correct": false,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "D",
-            "correct": false,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "B",
-            "correct": false,
-            "raw_response": "B"
-          }
-        ]
-      },
-      {
-        "label": "mutation-testing/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "mutation-testing/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "mutation-testing/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "negative-control/recognition",
-        "score": 0.75,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "D",
-            "answer": "B",
-            "correct": false,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          }
-        ]
-      },
-      {
-        "label": "nelson-rules/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "nelson-rules/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "nelson-rules/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "owasp-top-10/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          }
-        ]
-      },
-      {
-        "label": "owasp-top-10/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "owasp-top-10/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "plain-english-strunk-white/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "plain-english-strunk-white/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "plain-english-strunk-white/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "prd/recognition",
-        "score": 0.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "D",
-            "correct": false,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "C",
-            "correct": false,
-            "raw_response": "C"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "B",
-            "correct": false,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "A",
-            "correct": false,
-            "raw_response": "A"
-          }
-        ]
-      },
-      {
-        "label": "prd/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "prd/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "problem-space-nvc/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "problem-space-nvc/application-anchor",
-        "score": 0.75,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "C",
-            "correct": false,
-            "raw_response": "C"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "problem-space-nvc/application-paraphrase",
-        "score": 0.75,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "C",
-            "correct": false,
-            "raw_response": "C"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "property-based-testing/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "property-based-testing/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "property-based-testing/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "pyramid-principle/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "pyramid-principle/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "pyramid-principle/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "sanity-check/recognition",
-        "score": 0.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "X",
-            "answer": null,
-            "correct": false,
-            "raw_response": "None of the above options is correct, but the"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "X",
-            "answer": null,
-            "correct": false,
-            "raw_response": "None of the options provided is correct, but the"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "X",
-            "answer": null,
-            "correct": false,
-            "raw_response": "None of the options provided is correct, but the"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "X",
-            "answer": "C",
-            "correct": false,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "semantic-versioning/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "semantic-versioning/application-anchor",
-        "score": 0.5,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "C",
-            "correct": false,
-            "raw_response": "C"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "D",
-            "correct": false,
-            "raw_response": "D"
-          }
-        ]
-      },
-      {
-        "label": "semantic-versioning/application-paraphrase",
-        "score": 0.75,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "D",
-            "correct": false,
-            "raw_response": "D"
-          }
-        ]
-      },
-      {
-        "label": "socratic-method/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "socratic-method/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "socratic-method/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "sota/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "sota/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "sota/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "spc/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "spc/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "spc/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "stride/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "None of the options perfectly describe the **STRIDE"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "stride/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "stride/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "swot/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "swot/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A) SWOT Analysis\nB) Value Chain"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "swot/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "tdd-chicago-school/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "tdd-chicago-school/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "tdd-chicago-school/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "tdd-london-school/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "tdd-london-school/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "tdd-london-school/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "tdd-london-school/consistency-variant-1",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "tdd-london-school/consistency-variant-2",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "tdd-london-school/consistency-variant-3",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "tdd-london-school/consistency-language",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "testing-pyramid/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "testing-pyramid/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "testing-pyramid/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "timtowtdi/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          }
-        ]
-      },
-      {
-        "label": "timtowtdi/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "timtowtdi/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "todotxt-flavoured-markdown/recognition",
-        "score": 0.5,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "A",
-            "correct": false,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "D",
-            "correct": false,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "todotxt-flavoured-markdown/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "todotxt-flavoured-markdown/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "user-story-mapping/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "user-story-mapping/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "user-story-mapping/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "wardley-mapping/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "wardley-mapping/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "wardley-mapping/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      }
-    ]
-  },
-  "duration_seconds": 1018.4
-}
\ No newline at end of file
diff --git a/evaluations/results/pilot-20260324-192413.json b/evaluations/results/pilot-20260324-192413.json
deleted file mode 100644
index 8c5f85e..0000000
--- a/evaluations/results/pilot-20260324-192413.json
+++ /dev/null
@@ -1,10442 +0,0 @@
-{
-  "timestamp": "2026-03-24T19:24:13.551875+00:00",
-  "config": {
-    "models": [
-      "openai"
-    ],
-    "openai_model": "gpt-4o",
-    "mistral_model": null,
-    "deepseek_model": null,
-    "ollama_model": null,
-    "ollama_url": null,
-    "no_think": null,
-    "temperature": 0.0
-  },
-  "models": {
-    "openai": [
-      {
-        "label": "adr-according-to-nygard/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "adr-according-to-nygard/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "adr-according-to-nygard/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "arc42/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B) A 12-section template for standardized software"
-          }
-        ]
-      },
-      {
-        "label": "arc42/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "arc42/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B) External interfaces in Section 3 (Context"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "arc42/consistency-variant-1",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B) Gernot Starke"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A) Gernot Starke"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D) Gernot Starke"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C) Gernot Starke"
-          }
-        ]
-      },
-      {
-        "label": "arc42/consistency-variant-2",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B) Gernot Starke"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A) Gernot Starke"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D) Gernot Starke"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C) Gernot Starke"
-          }
-        ]
-      },
-      {
-        "label": "arc42/consistency-variant-3",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B) Gernot Starke"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A) Gernot Starke"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D) Gernot Starke"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C) Gernot Starke"
-          }
-        ]
-      },
-      {
-        "label": "arc42/consistency-language",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "atam/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "atam/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "atam/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B) Build a utility tree to prioritize quality scenarios"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D) Build a utility tree to prioritize quality scenarios"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C) Build a utility tree to prioritize quality scenarios"
-          }
-        ]
-      },
-      {
-        "label": "bdd-given-when-then/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "bdd-given-when-then/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "bdd-given-when-then/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D) Organize discovery workshops with the three amigos"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "bem-methodology/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "bem-methodology/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B) Use structured class names like `.navbar`,"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C) Use structured class names like `.navbar`,"
-          }
-        ]
-      },
-      {
-        "label": "bem-methodology/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B) Use structured class names like `.navbar`,"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D) Use structured class names like `.navbar`,"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C) Use structured class names like `.navbar`,"
-          }
-        ]
-      },
-      {
-        "label": "bluf/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "bluf/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "bluf/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "c4-diagrams/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A) Four levels of abstraction; : system in"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C) Four levels of abstraction; : system in"
-          }
-        ]
-      },
-      {
-        "label": "c4-diagrams/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "c4-diagrams/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "chain-of-thought/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "chain-of-thought/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "chain-of-thought/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "clean-architecture/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "clean-architecture/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B) Define payment processing use cases in the core"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "clean-architecture/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "control-chart-shewhart/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C) Measured value plotted over time; process"
-          }
-        ]
-      },
-      {
-        "label": "control-chart-shewhart/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "control-chart-shewhart/application-paraphrase",
-        "score": 0.75,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "A",
-            "correct": false,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "conventional-commits/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "conventional-commits/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "conventional-commits/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "cqrs/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "cqrs/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "cqrs/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B) Create separate optimized data models: a normalized"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A) Create separate optimized data models: a normalized"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C) Create separate optimized data models: a normalized"
-          }
-        ]
-      },
-      {
-        "label": "cynefin-framework/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B) Five domains; : best practices apply,"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C) Five domains; : best practices apply,"
-          }
-        ]
-      },
-      {
-        "label": "cynefin-framework/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "cynefin-framework/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "definition-of-done/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "definition-of-done/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "definition-of-done/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B) Collaboratively create a single, team-wide"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "devils-advocate/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A) Present opposing viewpoints even if not personally held"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "devils-advocate/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "devils-advocate/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A) Systematically argue against your own design by"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "diataxis-framework/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D) Four documentation types; : learning-oriented,"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "diataxis-framework/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B) Develop four distinct documentation sections: beginner tutorials"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "diataxis-framework/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B) Develop four distinct documentation sections: beginner tutorials"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D) Develop four distinct documentation sections: beginner tutorials"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "docs-as-code/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "docs-as-code/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "docs-as-code/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "domain-driven-design/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "domain-driven-design/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "domain-driven-design/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B) Establish a ubiquitous language by working closely with"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "ears-requirements/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "ears-requirements/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B) Structure requirements using specific templates: 'The"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D) Structure requirements using specific templates: 'The"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "ears-requirements/application-paraphrase",
-        "score": 0.75,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B) Structure requirements using specific templates: 'The"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "B",
-            "correct": false,
-            "raw_response": "B) Create a comprehensive requirements specification document with functional"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C) Structure requirements using specific templates: 'The"
-          }
-        ]
-      },
-      {
-        "label": "event-driven-architecture/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "event-driven-architecture/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "event-driven-architecture/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "fagan-inspection/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "fagan-inspection/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "fagan-inspection/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "feynman-technique/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "feynman-technique/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "feynman-technique/application-paraphrase",
-        "score": 0.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "C",
-            "correct": false,
-            "raw_response": "C)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "B",
-            "correct": false,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "A",
-            "correct": false,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "A",
-            "correct": false,
-            "raw_response": "A"
-          }
-        ]
-      },
-      {
-        "label": "five-whys/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "five-whys/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "five-whys/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "fowler-patterns/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "fowler-patterns/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "fowler-patterns/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B) Use a Domain Model pattern for complex business"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D) Use a Domain Model pattern for complex business"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C) Use a Domain Model pattern for complex business"
-          }
-        ]
-      },
-      {
-        "label": "gherkin/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          }
-        ]
-      },
-      {
-        "label": "gherkin/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "gherkin/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "github-flow/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B) Workflow steps"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A) Workflow steps"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D) Workflow steps"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "github-flow/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "github-flow/application-paraphrase",
-        "score": 0.75,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "A",
-            "correct": false,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "gutes-deutsch-wolf-schneider/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "gutes-deutsch-wolf-schneider/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "gutes-deutsch-wolf-schneider/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "hexagonal-architecture/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "hexagonal-architecture/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "hexagonal-architecture/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "iec-61508-sil-levels/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "iec-61508-sil-levels/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "iec-61508-sil-levels/application-paraphrase",
-        "score": 0.75,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "D",
-            "correct": false,
-            "raw_response": "D"
-          }
-        ]
-      },
-      {
-        "label": "impact-mapping/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D) Goal → Actors → Impacts → Deliver"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "impact-mapping/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B) Map the retention goal to key actors ("
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C) Map the retention goal to key actors ("
-          }
-        ]
-      },
-      {
-        "label": "impact-mapping/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B) Map the retention goal to key actors ("
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "invest/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "invest/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "invest/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A) Split this into multiple smaller stories with specific"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D) Split this into multiple smaller stories with specific"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "iso-25010/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "iso-25010/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "iso-25010/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "jobs-to-be-done/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "jobs-to-be-done/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B) Interview users about the specific circumstances that led"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "jobs-to-be-done/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B) Interview users about the specific circumstances that led"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A) Interview users about the specific circumstances that led"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D) Interview users about the specific circumstances that led"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C) Interview users about the specific circumstances that led"
-          }
-        ]
-      },
-      {
-        "label": "lasr/recognition",
-        "score": 0.75,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "A",
-            "correct": false,
-            "raw_response": "A"
-          }
-        ]
-      },
-      {
-        "label": "lasr/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "lasr/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "linddun/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "linddun/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "linddun/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D) Systematically analyze the system against seven privacy"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C) Systematically analyze the system against seven privacy"
-          }
-        ]
-      },
-      {
-        "label": "llm-evaluations/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "llm-evaluations/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "llm-evaluations/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "madr/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "madr/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A) Document the decision with sections for context,"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D) Document the decision with sections for context,"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "madr/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D) Document the decision with sections for context,"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "mece/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D) Structuring categories so they do not overlap"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "mece/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D) Organize by business capability: User Service"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C) Organize by business capability: User Service"
-          }
-        ]
-      },
-      {
-        "label": "mece/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D) Organize by business capability: User Service"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C) Organize by business capability: User Service"
-          }
-        ]
-      },
-      {
-        "label": "morphological-box/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B) Break complex problem into independent parameters/dim"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D) Break complex problem into independent parameters/dim"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C) Break complex problem into independent parameters/dim"
-          }
-        ]
-      },
-      {
-        "label": "morphological-box/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "morphological-box/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "moscow/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "moscow/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "moscow/application-paraphrase",
-        "score": 0.75,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "C",
-            "correct": false,
-            "raw_response": "C"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "mutation-testing/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "mutation-testing/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "mutation-testing/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "negative-control/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          }
-        ]
-      },
-      {
-        "label": "nelson-rules/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "nelson-rules/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "nelson-rules/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "owasp-top-10/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          }
-        ]
-      },
-      {
-        "label": "owasp-top-10/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "owasp-top-10/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "plain-english-strunk-white/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "plain-english-strunk-white/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "plain-english-strunk-white/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "prd/recognition",
-        "score": 0.75,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "A",
-            "correct": false,
-            "raw_response": "A"
-          }
-        ]
-      },
-      {
-        "label": "prd/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "prd/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B) Write a comprehensive document that defines the problem"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D) Write a comprehensive document that defines the problem"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "problem-space-nvc/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B) Concrete, objective facts without evaluation or judgment"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "problem-space-nvc/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "problem-space-nvc/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "property-based-testing/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D) Invariants that should always hold; automatic"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "property-based-testing/application-anchor",
-        "score": 0.75,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B) Define mathematical invariants like 'interest calculations"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "A",
-            "correct": false,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "property-based-testing/application-paraphrase",
-        "score": 0.75,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "D",
-            "correct": false,
-            "raw_response": "D) Write comprehensive unit tests covering typical financial scenarios"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D) Define mathematical invariants like 'interest calculations"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "pyramid-principle/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "pyramid-principle/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "pyramid-principle/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "sanity-check/recognition",
-        "score": 0.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "X",
-            "answer": "A",
-            "correct": false,
-            "raw_response": "None of the options provided are correct. The Answer"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "X",
-            "answer": "A",
-            "correct": false,
-            "raw_response": "None of the options provided correspond to the Answer to"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "X",
-            "answer": "A",
-            "correct": false,
-            "raw_response": "None of the options provided correspond to the Answer to"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "X",
-            "answer": "A",
-            "correct": false,
-            "raw_response": "None of the options provided correspond to the Answer to"
-          }
-        ]
-      },
-      {
-        "label": "semantic-versioning/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "semantic-versioning/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A) 3.0.0 - because"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "semantic-versioning/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "socratic-method/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "socratic-method/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "socratic-method/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "sota/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "sota/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "sota/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "spc/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "spc/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "spc/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "stride/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "None of the options provided accurately describe the STRIDE"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "stride/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "stride/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "swot/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "swot/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C) Analyze internal strengths and weaknesses of your current"
-          }
-        ]
-      },
-      {
-        "label": "swot/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B) Analyze internal strengths and weaknesses of your current"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "tdd-chicago-school/recognition",
-        "score": 0.75,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "D",
-            "correct": false,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "tdd-chicago-school/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "tdd-chicago-school/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D) Begin with tests for the core pricing calculations"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "tdd-london-school/recognition",
-        "score": 0.75,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "D",
-            "correct": false,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "tdd-london-school/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B) Write a test that mocks PaymentGateway and"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "tdd-london-school/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "tdd-london-school/consistency-variant-1",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B) Steve Freeman"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A) Steve Freeman"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D) Steve Freeman"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C) Steve Freeman"
-          }
-        ]
-      },
-      {
-        "label": "tdd-london-school/consistency-variant-2",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B) Steve Freeman"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A) Steve Freeman"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D) Steve Freeman"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C) Steve Freeman"
-          }
-        ]
-      },
-      {
-        "label": "tdd-london-school/consistency-variant-3",
-        "score": 0.5,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "C",
-            "correct": false,
-            "raw_response": "C) Dan North"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "B",
-            "correct": false,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D) Steve Freeman"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C) Steve Freeman"
-          }
-        ]
-      },
-      {
-        "label": "tdd-london-school/consistency-language",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D) Steve Freeman"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "testing-pyramid/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B) Three layers; more unit tests, fewer"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A) Three layers; more unit tests, fewer"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "testing-pyramid/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "testing-pyramid/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "timtowtdi/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          }
-        ]
-      },
-      {
-        "label": "timtowtdi/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "timtowtdi/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "todotxt-flavoured-markdown/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "todotxt-flavoured-markdown/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "todotxt-flavoured-markdown/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "user-story-mapping/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "user-story-mapping/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "user-story-mapping/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A)"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "wardley-mapping/recognition",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C)"
-          }
-        ]
-      },
-      {
-        "label": "wardley-mapping/application-anchor",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B)"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      },
-      {
-        "label": "wardley-mapping/application-paraphrase",
-        "score": 1.0,
-        "results": [
-          {
-            "permutation": [
-              "A",
-              "B",
-              "C",
-              "D"
-            ],
-            "expected": "B",
-            "answer": "B",
-            "correct": true,
-            "raw_response": "B) Map the payment processing component's position on"
-          },
-          {
-            "permutation": [
-              "B",
-              "C",
-              "D",
-              "A"
-            ],
-            "expected": "A",
-            "answer": "A",
-            "correct": true,
-            "raw_response": "A"
-          },
-          {
-            "permutation": [
-              "C",
-              "D",
-              "A",
-              "B"
-            ],
-            "expected": "D",
-            "answer": "D",
-            "correct": true,
-            "raw_response": "D)"
-          },
-          {
-            "permutation": [
-              "D",
-              "A",
-              "B",
-              "C"
-            ],
-            "expected": "C",
-            "answer": "C",
-            "correct": true,
-            "raw_response": "C"
-          }
-        ]
-      }
-    ]
-  },
-  "duration_seconds": 938.4
-}
\ No newline at end of file
diff --git a/evaluations/summaries/pilot-20260324-174404.json b/evaluations/summaries/pilot-20260324-174404.json
new file mode 100644
index 0000000..c66b5d9
--- /dev/null
+++ b/evaluations/summaries/pilot-20260324-174404.json
@@ -0,0 +1,792 @@
+{
+  "timestamp": "2026-03-24T17:44:04.891380+00:00",
+  "config": {
+    "models": [
+      "claude"
+    ],
+    "openai_model": null,
+    "mistral_model": null,
+    "deepseek_model": null,
+    "ollama_model": null,
+    "ollama_url": null,
+    "no_think": null,
+    "temperature": 0.0
+  },
+  "models": {
+    "claude": [
+      {
+        "label": "adr-according-to-nygard/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "adr-according-to-nygard/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "adr-according-to-nygard/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "arc42/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "arc42/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "arc42/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "arc42/consistency-variant-1",
+        "score": 1.0
+      },
+      {
+        "label": "arc42/consistency-variant-2",
+        "score": 1.0
+      },
+      {
+        "label": "arc42/consistency-variant-3",
+        "score": 1.0
+      },
+      {
+        "label": "arc42/consistency-language",
+        "score": 1.0
+      },
+      {
+        "label": "atam/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "atam/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "atam/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "bdd-given-when-then/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "bdd-given-when-then/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "bdd-given-when-then/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "bem-methodology/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "bem-methodology/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "bem-methodology/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "bluf/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "bluf/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "bluf/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "c4-diagrams/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "c4-diagrams/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "c4-diagrams/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "chain-of-thought/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "chain-of-thought/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "chain-of-thought/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "clean-architecture/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "clean-architecture/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "clean-architecture/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "control-chart-shewhart/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "control-chart-shewhart/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "control-chart-shewhart/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "conventional-commits/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "conventional-commits/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "conventional-commits/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "cqrs/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "cqrs/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "cqrs/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "cynefin-framework/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "cynefin-framework/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "cynefin-framework/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "definition-of-done/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "definition-of-done/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "definition-of-done/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "devils-advocate/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "devils-advocate/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "devils-advocate/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "diataxis-framework/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "diataxis-framework/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "diataxis-framework/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "docs-as-code/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "docs-as-code/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "docs-as-code/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "domain-driven-design/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "domain-driven-design/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "domain-driven-design/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "ears-requirements/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "ears-requirements/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "ears-requirements/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "event-driven-architecture/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "event-driven-architecture/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "event-driven-architecture/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "fagan-inspection/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "fagan-inspection/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "fagan-inspection/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "feynman-technique/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "feynman-technique/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "feynman-technique/application-paraphrase",
+        "score": 0.0
+      },
+      {
+        "label": "five-whys/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "five-whys/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "five-whys/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "fowler-patterns/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "fowler-patterns/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "fowler-patterns/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "gherkin/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "gherkin/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "gherkin/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "github-flow/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "github-flow/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "github-flow/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "gutes-deutsch-wolf-schneider/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "gutes-deutsch-wolf-schneider/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "gutes-deutsch-wolf-schneider/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "hexagonal-architecture/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "hexagonal-architecture/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "hexagonal-architecture/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "iec-61508-sil-levels/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "iec-61508-sil-levels/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "iec-61508-sil-levels/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "impact-mapping/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "impact-mapping/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "impact-mapping/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "invest/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "invest/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "invest/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "iso-25010/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "iso-25010/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "iso-25010/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "jobs-to-be-done/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "jobs-to-be-done/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "jobs-to-be-done/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "lasr/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "lasr/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "lasr/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "linddun/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "linddun/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "linddun/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "llm-evaluations/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "llm-evaluations/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "llm-evaluations/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "madr/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "madr/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "madr/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "mece/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "mece/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "mece/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "morphological-box/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "morphological-box/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "morphological-box/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "moscow/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "moscow/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "moscow/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "mutation-testing/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "mutation-testing/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "mutation-testing/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "negative-control/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "nelson-rules/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "nelson-rules/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "nelson-rules/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "owasp-top-10/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "owasp-top-10/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "owasp-top-10/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "plain-english-strunk-white/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "plain-english-strunk-white/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "plain-english-strunk-white/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "prd/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "prd/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "prd/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "problem-space-nvc/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "problem-space-nvc/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "problem-space-nvc/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "property-based-testing/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "property-based-testing/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "property-based-testing/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "pyramid-principle/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "pyramid-principle/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "pyramid-principle/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "sanity-check/recognition",
+        "score": 0.0
+      },
+      {
+        "label": "semantic-versioning/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "semantic-versioning/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "semantic-versioning/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "socratic-method/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "socratic-method/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "socratic-method/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "sota/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "sota/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "sota/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "spc/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "spc/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "spc/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "stride/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "stride/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "stride/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "swot/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "swot/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "swot/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "tdd-chicago-school/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "tdd-chicago-school/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "tdd-chicago-school/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "tdd-london-school/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "tdd-london-school/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "tdd-london-school/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "tdd-london-school/consistency-variant-1",
+        "score": 1.0
+      },
+      {
+        "label": "tdd-london-school/consistency-variant-2",
+        "score": 1.0
+      },
+      {
+        "label": "tdd-london-school/consistency-variant-3",
+        "score": 1.0
+      },
+      {
+        "label": "tdd-london-school/consistency-language",
+        "score": 1.0
+      },
+      {
+        "label": "testing-pyramid/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "testing-pyramid/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "testing-pyramid/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "timtowtdi/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "timtowtdi/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "timtowtdi/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "todotxt-flavoured-markdown/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "todotxt-flavoured-markdown/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "todotxt-flavoured-markdown/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "user-story-mapping/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "user-story-mapping/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "user-story-mapping/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "wardley-mapping/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "wardley-mapping/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "wardley-mapping/application-paraphrase",
+        "score": 1.0
+      }
+    ]
+  },
+  "duration_seconds": 4862.1
+}
\ No newline at end of file
diff --git a/evaluations/summaries/pilot-20260324-190600.json b/evaluations/summaries/pilot-20260324-190600.json
new file mode 100644
index 0000000..075b200
--- /dev/null
+++ b/evaluations/summaries/pilot-20260324-190600.json
@@ -0,0 +1,792 @@
+{
+  "timestamp": "2026-03-24T19:06:00.394684+00:00",
+  "config": {
+    "models": [
+      "mistral"
+    ],
+    "openai_model": null,
+    "mistral_model": "mistral-large-2512",
+    "deepseek_model": null,
+    "ollama_model": null,
+    "ollama_url": null,
+    "no_think": null,
+    "temperature": 0.0
+  },
+  "models": {
+    "mistral": [
+      {
+        "label": "adr-according-to-nygard/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "adr-according-to-nygard/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "adr-according-to-nygard/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "arc42/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "arc42/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "arc42/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "arc42/consistency-variant-1",
+        "score": 1.0
+      },
+      {
+        "label": "arc42/consistency-variant-2",
+        "score": 1.0
+      },
+      {
+        "label": "arc42/consistency-variant-3",
+        "score": 1.0
+      },
+      {
+        "label": "arc42/consistency-language",
+        "score": 1.0
+      },
+      {
+        "label": "atam/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "atam/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "atam/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "bdd-given-when-then/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "bdd-given-when-then/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "bdd-given-when-then/application-paraphrase",
+        "score": 0.5
+      },
+      {
+        "label": "bem-methodology/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "bem-methodology/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "bem-methodology/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "bluf/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "bluf/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "bluf/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "c4-diagrams/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "c4-diagrams/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "c4-diagrams/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "chain-of-thought/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "chain-of-thought/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "chain-of-thought/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "clean-architecture/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "clean-architecture/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "clean-architecture/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "control-chart-shewhart/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "control-chart-shewhart/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "control-chart-shewhart/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "conventional-commits/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "conventional-commits/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "conventional-commits/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "cqrs/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "cqrs/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "cqrs/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "cynefin-framework/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "cynefin-framework/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "cynefin-framework/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "definition-of-done/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "definition-of-done/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "definition-of-done/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "devils-advocate/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "devils-advocate/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "devils-advocate/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "diataxis-framework/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "diataxis-framework/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "diataxis-framework/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "docs-as-code/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "docs-as-code/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "docs-as-code/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "domain-driven-design/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "domain-driven-design/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "domain-driven-design/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "ears-requirements/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "ears-requirements/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "ears-requirements/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "event-driven-architecture/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "event-driven-architecture/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "event-driven-architecture/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "fagan-inspection/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "fagan-inspection/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "fagan-inspection/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "feynman-technique/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "feynman-technique/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "feynman-technique/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "five-whys/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "five-whys/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "five-whys/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "fowler-patterns/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "fowler-patterns/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "fowler-patterns/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "gherkin/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "gherkin/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "gherkin/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "github-flow/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "github-flow/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "github-flow/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "gutes-deutsch-wolf-schneider/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "gutes-deutsch-wolf-schneider/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "gutes-deutsch-wolf-schneider/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "hexagonal-architecture/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "hexagonal-architecture/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "hexagonal-architecture/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "iec-61508-sil-levels/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "iec-61508-sil-levels/application-anchor",
+        "score": 0.5
+      },
+      {
+        "label": "iec-61508-sil-levels/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "impact-mapping/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "impact-mapping/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "impact-mapping/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "invest/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "invest/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "invest/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "iso-25010/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "iso-25010/application-anchor",
+        "score": 0.75
+      },
+      {
+        "label": "iso-25010/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "jobs-to-be-done/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "jobs-to-be-done/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "jobs-to-be-done/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "lasr/recognition",
+        "score": 0.25
+      },
+      {
+        "label": "lasr/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "lasr/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "linddun/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "linddun/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "linddun/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "llm-evaluations/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "llm-evaluations/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "llm-evaluations/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "madr/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "madr/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "madr/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "mece/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "mece/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "mece/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "morphological-box/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "morphological-box/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "morphological-box/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "moscow/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "moscow/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "moscow/application-paraphrase",
+        "score": 0.25
+      },
+      {
+        "label": "mutation-testing/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "mutation-testing/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "mutation-testing/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "negative-control/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "nelson-rules/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "nelson-rules/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "nelson-rules/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "owasp-top-10/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "owasp-top-10/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "owasp-top-10/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "plain-english-strunk-white/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "plain-english-strunk-white/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "plain-english-strunk-white/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "prd/recognition",
+        "score": 0.0
+      },
+      {
+        "label": "prd/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "prd/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "problem-space-nvc/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "problem-space-nvc/application-anchor",
+        "score": 0.75
+      },
+      {
+        "label": "problem-space-nvc/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "property-based-testing/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "property-based-testing/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "property-based-testing/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "pyramid-principle/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "pyramid-principle/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "pyramid-principle/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "sanity-check/recognition",
+        "score": 0.0
+      },
+      {
+        "label": "semantic-versioning/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "semantic-versioning/application-anchor",
+        "score": 0.5
+      },
+      {
+        "label": "semantic-versioning/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "socratic-method/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "socratic-method/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "socratic-method/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "sota/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "sota/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "sota/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "spc/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "spc/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "spc/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "stride/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "stride/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "stride/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "swot/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "swot/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "swot/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "tdd-chicago-school/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "tdd-chicago-school/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "tdd-chicago-school/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "tdd-london-school/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "tdd-london-school/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "tdd-london-school/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "tdd-london-school/consistency-variant-1",
+        "score": 1.0
+      },
+      {
+        "label": "tdd-london-school/consistency-variant-2",
+        "score": 1.0
+      },
+      {
+        "label": "tdd-london-school/consistency-variant-3",
+        "score": 1.0
+      },
+      {
+        "label": "tdd-london-school/consistency-language",
+        "score": 1.0
+      },
+      {
+        "label": "testing-pyramid/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "testing-pyramid/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "testing-pyramid/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "timtowtdi/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "timtowtdi/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "timtowtdi/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "todotxt-flavoured-markdown/recognition",
+        "score": 0.5
+      },
+      {
+        "label": "todotxt-flavoured-markdown/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "todotxt-flavoured-markdown/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "user-story-mapping/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "user-story-mapping/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "user-story-mapping/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "wardley-mapping/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "wardley-mapping/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "wardley-mapping/application-paraphrase",
+        "score": 1.0
+      }
+    ]
+  },
+  "duration_seconds": 1018.4
+}
\ No newline at end of file
diff --git a/evaluations/summaries/pilot-20260324-192413.json b/evaluations/summaries/pilot-20260324-192413.json
new file mode 100644
index 0000000..5312994
--- /dev/null
+++ b/evaluations/summaries/pilot-20260324-192413.json
@@ -0,0 +1,792 @@
+{
+  "timestamp": "2026-03-24T19:24:13.551875+00:00",
+  "config": {
+    "models": [
+      "openai"
+    ],
+    "openai_model": "gpt-4o",
+    "mistral_model": null,
+    "deepseek_model": null,
+    "ollama_model": null,
+    "ollama_url": null,
+    "no_think": null,
+    "temperature": 0.0
+  },
+  "models": {
+    "openai": [
+      {
+        "label": "adr-according-to-nygard/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "adr-according-to-nygard/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "adr-according-to-nygard/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "arc42/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "arc42/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "arc42/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "arc42/consistency-variant-1",
+        "score": 1.0
+      },
+      {
+        "label": "arc42/consistency-variant-2",
+        "score": 1.0
+      },
+      {
+        "label": "arc42/consistency-variant-3",
+        "score": 1.0
+      },
+      {
+        "label": "arc42/consistency-language",
+        "score": 1.0
+      },
+      {
+        "label": "atam/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "atam/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "atam/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "bdd-given-when-then/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "bdd-given-when-then/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "bdd-given-when-then/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "bem-methodology/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "bem-methodology/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "bem-methodology/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "bluf/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "bluf/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "bluf/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "c4-diagrams/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "c4-diagrams/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "c4-diagrams/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "chain-of-thought/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "chain-of-thought/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "chain-of-thought/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "clean-architecture/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "clean-architecture/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "clean-architecture/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "control-chart-shewhart/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "control-chart-shewhart/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "control-chart-shewhart/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "conventional-commits/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "conventional-commits/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "conventional-commits/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "cqrs/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "cqrs/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "cqrs/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "cynefin-framework/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "cynefin-framework/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "cynefin-framework/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "definition-of-done/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "definition-of-done/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "definition-of-done/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "devils-advocate/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "devils-advocate/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "devils-advocate/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "diataxis-framework/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "diataxis-framework/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "diataxis-framework/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "docs-as-code/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "docs-as-code/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "docs-as-code/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "domain-driven-design/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "domain-driven-design/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "domain-driven-design/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "ears-requirements/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "ears-requirements/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "ears-requirements/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "event-driven-architecture/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "event-driven-architecture/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "event-driven-architecture/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "fagan-inspection/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "fagan-inspection/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "fagan-inspection/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "feynman-technique/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "feynman-technique/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "feynman-technique/application-paraphrase",
+        "score": 0.0
+      },
+      {
+        "label": "five-whys/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "five-whys/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "five-whys/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "fowler-patterns/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "fowler-patterns/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "fowler-patterns/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "gherkin/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "gherkin/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "gherkin/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "github-flow/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "github-flow/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "github-flow/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "gutes-deutsch-wolf-schneider/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "gutes-deutsch-wolf-schneider/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "gutes-deutsch-wolf-schneider/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "hexagonal-architecture/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "hexagonal-architecture/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "hexagonal-architecture/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "iec-61508-sil-levels/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "iec-61508-sil-levels/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "iec-61508-sil-levels/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "impact-mapping/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "impact-mapping/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "impact-mapping/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "invest/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "invest/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "invest/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "iso-25010/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "iso-25010/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "iso-25010/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "jobs-to-be-done/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "jobs-to-be-done/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "jobs-to-be-done/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "lasr/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "lasr/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "lasr/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "linddun/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "linddun/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "linddun/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "llm-evaluations/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "llm-evaluations/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "llm-evaluations/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "madr/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "madr/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "madr/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "mece/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "mece/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "mece/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "morphological-box/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "morphological-box/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "morphological-box/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "moscow/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "moscow/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "moscow/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "mutation-testing/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "mutation-testing/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "mutation-testing/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "negative-control/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "nelson-rules/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "nelson-rules/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "nelson-rules/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "owasp-top-10/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "owasp-top-10/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "owasp-top-10/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "plain-english-strunk-white/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "plain-english-strunk-white/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "plain-english-strunk-white/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "prd/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "prd/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "prd/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "problem-space-nvc/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "problem-space-nvc/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "problem-space-nvc/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "property-based-testing/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "property-based-testing/application-anchor",
+        "score": 0.75
+      },
+      {
+        "label": "property-based-testing/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "pyramid-principle/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "pyramid-principle/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "pyramid-principle/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "sanity-check/recognition",
+        "score": 0.0
+      },
+      {
+        "label": "semantic-versioning/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "semantic-versioning/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "semantic-versioning/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "socratic-method/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "socratic-method/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "socratic-method/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "sota/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "sota/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "sota/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "spc/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "spc/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "spc/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "stride/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "stride/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "stride/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "swot/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "swot/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "swot/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "tdd-chicago-school/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "tdd-chicago-school/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "tdd-chicago-school/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "tdd-london-school/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "tdd-london-school/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "tdd-london-school/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "tdd-london-school/consistency-variant-1",
+        "score": 1.0
+      },
+      {
+        "label": "tdd-london-school/consistency-variant-2",
+        "score": 1.0
+      },
+      {
+        "label": "tdd-london-school/consistency-variant-3",
+        "score": 0.5
+      },
+      {
+        "label": "tdd-london-school/consistency-language",
+        "score": 1.0
+      },
+      {
+        "label": "testing-pyramid/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "testing-pyramid/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "testing-pyramid/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "timtowtdi/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "timtowtdi/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "timtowtdi/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "todotxt-flavoured-markdown/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "todotxt-flavoured-markdown/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "todotxt-flavoured-markdown/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "user-story-mapping/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "user-story-mapping/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "user-story-mapping/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "wardley-mapping/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "wardley-mapping/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "wardley-mapping/application-paraphrase",
+        "score": 1.0
+      }
+    ]
+  },
+  "duration_seconds": 938.4
+}
\ No newline at end of file
diff --git a/evaluations/summaries/pilot-20260326-070127_mistral-medium-2508.json b/evaluations/summaries/pilot-20260326-070127_mistral-medium-2508.json
new file mode 100644
index 0000000..3b26758
--- /dev/null
+++ b/evaluations/summaries/pilot-20260326-070127_mistral-medium-2508.json
@@ -0,0 +1,792 @@
+{
+  "timestamp": "2026-03-26T07:01:27.544539+00:00",
+  "config": {
+    "models": [
+      "mistral"
+    ],
+    "openai_model": null,
+    "mistral_model": "mistral-medium-2508",
+    "deepseek_model": null,
+    "ollama_model": null,
+    "ollama_url": null,
+    "no_think": null,
+    "temperature": 0.0
+  },
+  "models": {
+    "mistral": [
+      {
+        "label": "adr-according-to-nygard/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "adr-according-to-nygard/application-anchor",
+        "score": 0.5
+      },
+      {
+        "label": "adr-according-to-nygard/application-paraphrase",
+        "score": 0.25
+      },
+      {
+        "label": "arc42/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "arc42/application-anchor",
+        "score": 0.75
+      },
+      {
+        "label": "arc42/application-paraphrase",
+        "score": 0.5
+      },
+      {
+        "label": "arc42/consistency-variant-1",
+        "score": 0.25
+      },
+      {
+        "label": "arc42/consistency-variant-2",
+        "score": 0.5
+      },
+      {
+        "label": "arc42/consistency-variant-3",
+        "score": 0.5
+      },
+      {
+        "label": "arc42/consistency-language",
+        "score": 0.0
+      },
+      {
+        "label": "atam/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "atam/application-anchor",
+        "score": 0.5
+      },
+      {
+        "label": "atam/application-paraphrase",
+        "score": 0.5
+      },
+      {
+        "label": "bdd-given-when-then/recognition",
+        "score": 0.25
+      },
+      {
+        "label": "bdd-given-when-then/application-anchor",
+        "score": 0.5
+      },
+      {
+        "label": "bdd-given-when-then/application-paraphrase",
+        "score": 0.25
+      },
+      {
+        "label": "bem-methodology/recognition",
+        "score": 0.5
+      },
+      {
+        "label": "bem-methodology/application-anchor",
+        "score": 0.5
+      },
+      {
+        "label": "bem-methodology/application-paraphrase",
+        "score": 0.25
+      },
+      {
+        "label": "bluf/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "bluf/application-anchor",
+        "score": 0.75
+      },
+      {
+        "label": "bluf/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "c4-diagrams/recognition",
+        "score": 0.25
+      },
+      {
+        "label": "c4-diagrams/application-anchor",
+        "score": 0.5
+      },
+      {
+        "label": "c4-diagrams/application-paraphrase",
+        "score": 0.0
+      },
+      {
+        "label": "chain-of-thought/recognition",
+        "score": 0.5
+      },
+      {
+        "label": "chain-of-thought/application-anchor",
+        "score": 0.5
+      },
+      {
+        "label": "chain-of-thought/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "clean-architecture/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "clean-architecture/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "clean-architecture/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "control-chart-shewhart/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "control-chart-shewhart/application-anchor",
+        "score": 0.75
+      },
+      {
+        "label": "control-chart-shewhart/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "conventional-commits/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "conventional-commits/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "conventional-commits/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "cqrs/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "cqrs/application-anchor",
+        "score": 0.75
+      },
+      {
+        "label": "cqrs/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "cynefin-framework/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "cynefin-framework/application-anchor",
+        "score": 0.75
+      },
+      {
+        "label": "cynefin-framework/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "definition-of-done/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "definition-of-done/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "definition-of-done/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "devils-advocate/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "devils-advocate/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "devils-advocate/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "diataxis-framework/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "diataxis-framework/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "diataxis-framework/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "docs-as-code/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "docs-as-code/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "docs-as-code/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "domain-driven-design/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "domain-driven-design/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "domain-driven-design/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "ears-requirements/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "ears-requirements/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "ears-requirements/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "event-driven-architecture/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "event-driven-architecture/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "event-driven-architecture/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "fagan-inspection/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "fagan-inspection/application-anchor",
+        "score": 0.75
+      },
+      {
+        "label": "fagan-inspection/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "feynman-technique/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "feynman-technique/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "feynman-technique/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "five-whys/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "five-whys/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "five-whys/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "fowler-patterns/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "fowler-patterns/application-anchor",
+        "score": 0.75
+      },
+      {
+        "label": "fowler-patterns/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "gherkin/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "gherkin/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "gherkin/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "github-flow/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "github-flow/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "github-flow/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "gutes-deutsch-wolf-schneider/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "gutes-deutsch-wolf-schneider/application-anchor",
+        "score": 0.75
+      },
+      {
+        "label": "gutes-deutsch-wolf-schneider/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "hexagonal-architecture/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "hexagonal-architecture/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "hexagonal-architecture/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "iec-61508-sil-levels/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "iec-61508-sil-levels/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "iec-61508-sil-levels/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "impact-mapping/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "impact-mapping/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "impact-mapping/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "invest/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "invest/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "invest/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "iso-25010/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "iso-25010/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "iso-25010/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "jobs-to-be-done/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "jobs-to-be-done/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "jobs-to-be-done/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "lasr/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "lasr/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "lasr/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "linddun/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "linddun/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "linddun/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "llm-evaluations/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "llm-evaluations/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "llm-evaluations/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "madr/recognition",
+        "score": 0.25
+      },
+      {
+        "label": "madr/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "madr/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "mece/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "mece/application-anchor",
+        "score": 0.75
+      },
+      {
+        "label": "mece/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "morphological-box/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "morphological-box/application-anchor",
+        "score": 0.75
+      },
+      {
+        "label": "morphological-box/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "moscow/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "moscow/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "moscow/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "mutation-testing/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "mutation-testing/application-anchor",
+        "score": 0.75
+      },
+      {
+        "label": "mutation-testing/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "negative-control/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "nelson-rules/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "nelson-rules/application-anchor",
+        "score": 0.75
+      },
+      {
+        "label": "nelson-rules/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "owasp-top-10/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "owasp-top-10/application-anchor",
+        "score": 0.75
+      },
+      {
+        "label": "owasp-top-10/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "plain-english-strunk-white/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "plain-english-strunk-white/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "plain-english-strunk-white/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "prd/recognition",
+        "score": 0.5
+      },
+      {
+        "label": "prd/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "prd/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "problem-space-nvc/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "problem-space-nvc/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "problem-space-nvc/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "property-based-testing/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "property-based-testing/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "property-based-testing/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "pyramid-principle/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "pyramid-principle/application-anchor",
+        "score": 0.75
+      },
+      {
+        "label": "pyramid-principle/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "sanity-check/recognition",
+        "score": 0.0
+      },
+      {
+        "label": "semantic-versioning/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "semantic-versioning/application-anchor",
+        "score": 0.75
+      },
+      {
+        "label": "semantic-versioning/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "socratic-method/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "socratic-method/application-anchor",
+        "score": 0.75
+      },
+      {
+        "label": "socratic-method/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "sota/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "sota/application-anchor",
+        "score": 0.75
+      },
+      {
+        "label": "sota/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "spc/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "spc/application-anchor",
+        "score": 0.75
+      },
+      {
+        "label": "spc/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "stride/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "stride/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "stride/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "swot/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "swot/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "swot/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "tdd-chicago-school/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "tdd-chicago-school/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "tdd-chicago-school/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "tdd-london-school/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "tdd-london-school/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "tdd-london-school/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "tdd-london-school/consistency-variant-1",
+        "score": 0.75
+      },
+      {
+        "label": "tdd-london-school/consistency-variant-2",
+        "score": 1.0
+      },
+      {
+        "label": "tdd-london-school/consistency-variant-3",
+        "score": 0.0
+      },
+      {
+        "label": "tdd-london-school/consistency-language",
+        "score": 0.75
+      },
+      {
+        "label": "testing-pyramid/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "testing-pyramid/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "testing-pyramid/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "timtowtdi/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "timtowtdi/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "timtowtdi/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "todotxt-flavoured-markdown/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "todotxt-flavoured-markdown/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "todotxt-flavoured-markdown/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "user-story-mapping/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "user-story-mapping/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "user-story-mapping/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "wardley-mapping/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "wardley-mapping/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "wardley-mapping/application-paraphrase",
+        "score": 1.0
+      }
+    ]
+  },
+  "duration_seconds": 1872.8
+}
\ No newline at end of file
diff --git a/evaluations/summaries/pilot-20260326-073241_devstral-2512.json b/evaluations/summaries/pilot-20260326-073241_devstral-2512.json
new file mode 100644
index 0000000..e22ba86
--- /dev/null
+++ b/evaluations/summaries/pilot-20260326-073241_devstral-2512.json
@@ -0,0 +1,792 @@
+{
+  "timestamp": "2026-03-26T07:32:41.051735+00:00",
+  "config": {
+    "models": [
+      "mistral"
+    ],
+    "openai_model": null,
+    "mistral_model": "devstral-2512",
+    "deepseek_model": null,
+    "ollama_model": null,
+    "ollama_url": null,
+    "no_think": null,
+    "temperature": 0.0
+  },
+  "models": {
+    "mistral": [
+      {
+        "label": "adr-according-to-nygard/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "adr-according-to-nygard/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "adr-according-to-nygard/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "arc42/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "arc42/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "arc42/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "arc42/consistency-variant-1",
+        "score": 1.0
+      },
+      {
+        "label": "arc42/consistency-variant-2",
+        "score": 1.0
+      },
+      {
+        "label": "arc42/consistency-variant-3",
+        "score": 1.0
+      },
+      {
+        "label": "arc42/consistency-language",
+        "score": 1.0
+      },
+      {
+        "label": "atam/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "atam/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "atam/application-paraphrase",
+        "score": 0.5
+      },
+      {
+        "label": "bdd-given-when-then/recognition",
+        "score": 0.0
+      },
+      {
+        "label": "bdd-given-when-then/application-anchor",
+        "score": 0.5
+      },
+      {
+        "label": "bdd-given-when-then/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "bem-methodology/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "bem-methodology/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "bem-methodology/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "bluf/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "bluf/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "bluf/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "c4-diagrams/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "c4-diagrams/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "c4-diagrams/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "chain-of-thought/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "chain-of-thought/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "chain-of-thought/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "clean-architecture/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "clean-architecture/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "clean-architecture/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "control-chart-shewhart/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "control-chart-shewhart/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "control-chart-shewhart/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "conventional-commits/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "conventional-commits/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "conventional-commits/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "cqrs/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "cqrs/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "cqrs/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "cynefin-framework/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "cynefin-framework/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "cynefin-framework/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "definition-of-done/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "definition-of-done/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "definition-of-done/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "devils-advocate/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "devils-advocate/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "devils-advocate/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "diataxis-framework/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "diataxis-framework/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "diataxis-framework/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "docs-as-code/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "docs-as-code/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "docs-as-code/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "domain-driven-design/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "domain-driven-design/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "domain-driven-design/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "ears-requirements/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "ears-requirements/application-anchor",
+        "score": 0.75
+      },
+      {
+        "label": "ears-requirements/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "event-driven-architecture/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "event-driven-architecture/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "event-driven-architecture/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "fagan-inspection/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "fagan-inspection/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "fagan-inspection/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "feynman-technique/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "feynman-technique/application-anchor",
+        "score": 0.75
+      },
+      {
+        "label": "feynman-technique/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "five-whys/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "five-whys/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "five-whys/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "fowler-patterns/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "fowler-patterns/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "fowler-patterns/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "gherkin/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "gherkin/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "gherkin/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "github-flow/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "github-flow/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "github-flow/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "gutes-deutsch-wolf-schneider/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "gutes-deutsch-wolf-schneider/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "gutes-deutsch-wolf-schneider/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "hexagonal-architecture/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "hexagonal-architecture/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "hexagonal-architecture/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "iec-61508-sil-levels/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "iec-61508-sil-levels/application-anchor",
+        "score": 0.75
+      },
+      {
+        "label": "iec-61508-sil-levels/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "impact-mapping/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "impact-mapping/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "impact-mapping/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "invest/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "invest/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "invest/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "iso-25010/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "iso-25010/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "iso-25010/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "jobs-to-be-done/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "jobs-to-be-done/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "jobs-to-be-done/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "lasr/recognition",
+        "score": 0.5
+      },
+      {
+        "label": "lasr/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "lasr/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "linddun/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "linddun/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "linddun/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "llm-evaluations/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "llm-evaluations/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "llm-evaluations/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "madr/recognition",
+        "score": 0.5
+      },
+      {
+        "label": "madr/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "madr/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "mece/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "mece/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "mece/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "morphological-box/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "morphological-box/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "morphological-box/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "moscow/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "moscow/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "moscow/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "mutation-testing/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "mutation-testing/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "mutation-testing/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "negative-control/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "nelson-rules/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "nelson-rules/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "nelson-rules/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "owasp-top-10/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "owasp-top-10/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "owasp-top-10/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "plain-english-strunk-white/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "plain-english-strunk-white/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "plain-english-strunk-white/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "prd/recognition",
+        "score": 0.5
+      },
+      {
+        "label": "prd/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "prd/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "problem-space-nvc/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "problem-space-nvc/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "problem-space-nvc/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "property-based-testing/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "property-based-testing/application-anchor",
+        "score": 0.75
+      },
+      {
+        "label": "property-based-testing/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "pyramid-principle/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "pyramid-principle/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "pyramid-principle/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "sanity-check/recognition",
+        "score": 0.0
+      },
+      {
+        "label": "semantic-versioning/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "semantic-versioning/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "semantic-versioning/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "socratic-method/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "socratic-method/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "socratic-method/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "sota/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "sota/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "sota/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "spc/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "spc/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "spc/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "stride/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "stride/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "stride/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "swot/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "swot/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "swot/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "tdd-chicago-school/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "tdd-chicago-school/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "tdd-chicago-school/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "tdd-london-school/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "tdd-london-school/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "tdd-london-school/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "tdd-london-school/consistency-variant-1",
+        "score": 1.0
+      },
+      {
+        "label": "tdd-london-school/consistency-variant-2",
+        "score": 1.0
+      },
+      {
+        "label": "tdd-london-school/consistency-variant-3",
+        "score": 0.25
+      },
+      {
+        "label": "tdd-london-school/consistency-language",
+        "score": 1.0
+      },
+      {
+        "label": "testing-pyramid/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "testing-pyramid/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "testing-pyramid/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "timtowtdi/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "timtowtdi/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "timtowtdi/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "todotxt-flavoured-markdown/recognition",
+        "score": 0.5
+      },
+      {
+        "label": "todotxt-flavoured-markdown/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "todotxt-flavoured-markdown/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "user-story-mapping/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "user-story-mapping/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "user-story-mapping/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "wardley-mapping/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "wardley-mapping/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "wardley-mapping/application-paraphrase",
+        "score": 1.0
+      }
+    ]
+  },
+  "duration_seconds": 919.6
+}
\ No newline at end of file
diff --git a/evaluations/summaries/pilot-20260326-074132_mistral-small-2603.json b/evaluations/summaries/pilot-20260326-074132_mistral-small-2603.json
new file mode 100644
index 0000000..d7d851e
--- /dev/null
+++ b/evaluations/summaries/pilot-20260326-074132_mistral-small-2603.json
@@ -0,0 +1,792 @@
+{
+  "timestamp": "2026-03-26T07:41:32.011654+00:00",
+  "config": {
+    "models": [
+      "mistral"
+    ],
+    "openai_model": null,
+    "mistral_model": "mistral-small-2603",
+    "deepseek_model": null,
+    "ollama_model": null,
+    "ollama_url": null,
+    "no_think": null,
+    "temperature": 0.0
+  },
+  "models": {
+    "mistral": [
+      {
+        "label": "adr-according-to-nygard/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "adr-according-to-nygard/application-anchor",
+        "score": 0.75
+      },
+      {
+        "label": "adr-according-to-nygard/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "arc42/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "arc42/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "arc42/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "arc42/consistency-variant-1",
+        "score": 1.0
+      },
+      {
+        "label": "arc42/consistency-variant-2",
+        "score": 1.0
+      },
+      {
+        "label": "arc42/consistency-variant-3",
+        "score": 1.0
+      },
+      {
+        "label": "arc42/consistency-language",
+        "score": 1.0
+      },
+      {
+        "label": "atam/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "atam/application-anchor",
+        "score": 0.75
+      },
+      {
+        "label": "atam/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "bdd-given-when-then/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "bdd-given-when-then/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "bdd-given-when-then/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "bem-methodology/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "bem-methodology/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "bem-methodology/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "bluf/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "bluf/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "bluf/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "c4-diagrams/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "c4-diagrams/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "c4-diagrams/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "chain-of-thought/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "chain-of-thought/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "chain-of-thought/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "clean-architecture/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "clean-architecture/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "clean-architecture/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "control-chart-shewhart/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "control-chart-shewhart/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "control-chart-shewhart/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "conventional-commits/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "conventional-commits/application-anchor",
+        "score": 0.5
+      },
+      {
+        "label": "conventional-commits/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "cqrs/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "cqrs/application-anchor",
+        "score": 0.75
+      },
+      {
+        "label": "cqrs/application-paraphrase",
+        "score": 0.5
+      },
+      {
+        "label": "cynefin-framework/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "cynefin-framework/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "cynefin-framework/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "definition-of-done/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "definition-of-done/application-anchor",
+        "score": 0.75
+      },
+      {
+        "label": "definition-of-done/application-paraphrase",
+        "score": 0.5
+      },
+      {
+        "label": "devils-advocate/recognition",
+        "score": 0.5
+      },
+      {
+        "label": "devils-advocate/application-anchor",
+        "score": 0.75
+      },
+      {
+        "label": "devils-advocate/application-paraphrase",
+        "score": 0.5
+      },
+      {
+        "label": "diataxis-framework/recognition",
+        "score": 0.25
+      },
+      {
+        "label": "diataxis-framework/application-anchor",
+        "score": 0.75
+      },
+      {
+        "label": "diataxis-framework/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "docs-as-code/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "docs-as-code/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "docs-as-code/application-paraphrase",
+        "score": 0.5
+      },
+      {
+        "label": "domain-driven-design/recognition",
+        "score": 0.5
+      },
+      {
+        "label": "domain-driven-design/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "domain-driven-design/application-paraphrase",
+        "score": 0.5
+      },
+      {
+        "label": "ears-requirements/recognition",
+        "score": 0.0
+      },
+      {
+        "label": "ears-requirements/application-anchor",
+        "score": 0.0
+      },
+      {
+        "label": "ears-requirements/application-paraphrase",
+        "score": 0.5
+      },
+      {
+        "label": "event-driven-architecture/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "event-driven-architecture/application-anchor",
+        "score": 0.75
+      },
+      {
+        "label": "event-driven-architecture/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "fagan-inspection/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "fagan-inspection/application-anchor",
+        "score": 0.25
+      },
+      {
+        "label": "fagan-inspection/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "feynman-technique/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "feynman-technique/application-anchor",
+        "score": 0.5
+      },
+      {
+        "label": "feynman-technique/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "five-whys/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "five-whys/application-anchor",
+        "score": 0.75
+      },
+      {
+        "label": "five-whys/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "fowler-patterns/recognition",
+        "score": 0.5
+      },
+      {
+        "label": "fowler-patterns/application-anchor",
+        "score": 0.75
+      },
+      {
+        "label": "fowler-patterns/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "gherkin/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "gherkin/application-anchor",
+        "score": 0.75
+      },
+      {
+        "label": "gherkin/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "github-flow/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "github-flow/application-anchor",
+        "score": 0.5
+      },
+      {
+        "label": "github-flow/application-paraphrase",
+        "score": 0.0
+      },
+      {
+        "label": "gutes-deutsch-wolf-schneider/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "gutes-deutsch-wolf-schneider/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "gutes-deutsch-wolf-schneider/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "hexagonal-architecture/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "hexagonal-architecture/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "hexagonal-architecture/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "iec-61508-sil-levels/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "iec-61508-sil-levels/application-anchor",
+        "score": 0.0
+      },
+      {
+        "label": "iec-61508-sil-levels/application-paraphrase",
+        "score": 0.0
+      },
+      {
+        "label": "impact-mapping/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "impact-mapping/application-anchor",
+        "score": 0.5
+      },
+      {
+        "label": "impact-mapping/application-paraphrase",
+        "score": 0.25
+      },
+      {
+        "label": "invest/recognition",
+        "score": 0.25
+      },
+      {
+        "label": "invest/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "invest/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "iso-25010/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "iso-25010/application-anchor",
+        "score": 0.5
+      },
+      {
+        "label": "iso-25010/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "jobs-to-be-done/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "jobs-to-be-done/application-anchor",
+        "score": 0.5
+      },
+      {
+        "label": "jobs-to-be-done/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "lasr/recognition",
+        "score": 0.5
+      },
+      {
+        "label": "lasr/application-anchor",
+        "score": 0.5
+      },
+      {
+        "label": "lasr/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "linddun/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "linddun/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "linddun/application-paraphrase",
+        "score": 0.5
+      },
+      {
+        "label": "llm-evaluations/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "llm-evaluations/application-anchor",
+        "score": 0.75
+      },
+      {
+        "label": "llm-evaluations/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "madr/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "madr/application-anchor",
+        "score": 0.5
+      },
+      {
+        "label": "madr/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "mece/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "mece/application-anchor",
+        "score": 0.5
+      },
+      {
+        "label": "mece/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "morphological-box/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "morphological-box/application-anchor",
+        "score": 0.25
+      },
+      {
+        "label": "morphological-box/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "moscow/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "moscow/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "moscow/application-paraphrase",
+        "score": 0.5
+      },
+      {
+        "label": "mutation-testing/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "mutation-testing/application-anchor",
+        "score": 0.75
+      },
+      {
+        "label": "mutation-testing/application-paraphrase",
+        "score": 0.25
+      },
+      {
+        "label": "negative-control/recognition",
+        "score": 0.5
+      },
+      {
+        "label": "nelson-rules/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "nelson-rules/application-anchor",
+        "score": 0.5
+      },
+      {
+        "label": "nelson-rules/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "owasp-top-10/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "owasp-top-10/application-anchor",
+        "score": 0.75
+      },
+      {
+        "label": "owasp-top-10/application-paraphrase",
+        "score": 0.5
+      },
+      {
+        "label": "plain-english-strunk-white/recognition",
+        "score": 0.5
+      },
+      {
+        "label": "plain-english-strunk-white/application-anchor",
+        "score": 0.5
+      },
+      {
+        "label": "plain-english-strunk-white/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "prd/recognition",
+        "score": 0.25
+      },
+      {
+        "label": "prd/application-anchor",
+        "score": 0.5
+      },
+      {
+        "label": "prd/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "problem-space-nvc/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "problem-space-nvc/application-anchor",
+        "score": 0.25
+      },
+      {
+        "label": "problem-space-nvc/application-paraphrase",
+        "score": 0.5
+      },
+      {
+        "label": "property-based-testing/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "property-based-testing/application-anchor",
+        "score": 0.25
+      },
+      {
+        "label": "property-based-testing/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "pyramid-principle/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "pyramid-principle/application-anchor",
+        "score": 0.75
+      },
+      {
+        "label": "pyramid-principle/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "sanity-check/recognition",
+        "score": 0.0
+      },
+      {
+        "label": "semantic-versioning/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "semantic-versioning/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "semantic-versioning/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "socratic-method/recognition",
+        "score": 0.25
+      },
+      {
+        "label": "socratic-method/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "socratic-method/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "sota/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "sota/application-anchor",
+        "score": 0.5
+      },
+      {
+        "label": "sota/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "spc/recognition",
+        "score": 0.5
+      },
+      {
+        "label": "spc/application-anchor",
+        "score": 0.25
+      },
+      {
+        "label": "spc/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "stride/recognition",
+        "score": 0.5
+      },
+      {
+        "label": "stride/application-anchor",
+        "score": 0.5
+      },
+      {
+        "label": "stride/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "swot/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "swot/application-anchor",
+        "score": 0.25
+      },
+      {
+        "label": "swot/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "tdd-chicago-school/recognition",
+        "score": 0.5
+      },
+      {
+        "label": "tdd-chicago-school/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "tdd-chicago-school/application-paraphrase",
+        "score": 0.5
+      },
+      {
+        "label": "tdd-london-school/recognition",
+        "score": 0.5
+      },
+      {
+        "label": "tdd-london-school/application-anchor",
+        "score": 0.5
+      },
+      {
+        "label": "tdd-london-school/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "tdd-london-school/consistency-variant-1",
+        "score": 0.5
+      },
+      {
+        "label": "tdd-london-school/consistency-variant-2",
+        "score": 0.75
+      },
+      {
+        "label": "tdd-london-school/consistency-variant-3",
+        "score": 0.25
+      },
+      {
+        "label": "tdd-london-school/consistency-language",
+        "score": 0.5
+      },
+      {
+        "label": "testing-pyramid/recognition",
+        "score": 1.0
+      },
+      {
+        "label": "testing-pyramid/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "testing-pyramid/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "timtowtdi/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "timtowtdi/application-anchor",
+        "score": 0.75
+      },
+      {
+        "label": "timtowtdi/application-paraphrase",
+        "score": 1.0
+      },
+      {
+        "label": "todotxt-flavoured-markdown/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "todotxt-flavoured-markdown/application-anchor",
+        "score": 0.75
+      },
+      {
+        "label": "todotxt-flavoured-markdown/application-paraphrase",
+        "score": 0.75
+      },
+      {
+        "label": "user-story-mapping/recognition",
+        "score": 0.75
+      },
+      {
+        "label": "user-story-mapping/application-anchor",
+        "score": 1.0
+      },
+      {
+        "label": "user-story-mapping/application-paraphrase",
+        "score": 0.5
+      },
+      {
+        "label": "wardley-mapping/recognition",
+        "score": 0.5
+      },
+      {
+        "label": "wardley-mapping/application-anchor",
+        "score": 0.5
+      },
+      {
+        "label": "wardley-mapping/application-paraphrase",
+        "score": 0.25
+      }
+    ]
+  },
+  "duration_seconds": 1089.2
+}
\ No newline at end of file
diff --git a/website/public/evaluation-report.html b/website/public/evaluation-report.html
index 0fd9fb2..584a172 100644
--- a/website/public/evaluation-report.html
+++ b/website/public/evaluation-report.html
@@ -37,7 +37,7 @@
 </head>
 <body>
 <h1>Semantic Anchor Evaluation Report</h1>
-<p class="subtitle">Multiple-choice recognition test across 3 LLMs — 191 questions, 61 anchors</p>
+<p class="subtitle">Multiple-choice recognition test across 6 LLMs — 191 questions, 61 anchors</p>
 
 <div class="legend">
   <div class="legend-item"><div class="legend-dot" style="background:#dcfce7"></div> &ge;80%</div>
@@ -48,298 +48,336 @@ <h1>Semantic Anchor Evaluation Report</h1>
 <h2>Model Summary</h2>
 <div class="summary-grid">
   <div class="summary-card">
-    <div class="model-name">Claude Sonnet</div>
+    <div class="model-name">claude-sonnet-4-20250514</div>
     <div class="score" style="color: #22c55e">99%</div>
     <div class="detail">191 questions · pilot-20260324-174404.json</div>
   </div>
   <div class="summary-card">
-    <div class="model-name">GPT-4o</div>
+    <div class="model-name">devstral-2512</div>
+    <div class="score" style="color: #22c55e">96%</div>
+    <div class="detail">191 questions · pilot-20260326-073241_devstral-2512.json</div>
+  </div>
+  <div class="summary-card">
+    <div class="model-name">gpt-4o</div>
     <div class="score" style="color: #22c55e">98%</div>
     <div class="detail">191 questions · pilot-20260324-192413.json</div>
   </div>
   <div class="summary-card">
-    <div class="model-name">Mistral Large</div>
+    <div class="model-name">mistral-large-2512</div>
     <div class="score" style="color: #22c55e">96%</div>
     <div class="detail">191 questions · pilot-20260324-190600.json</div>
   </div>
+  <div class="summary-card">
+    <div class="model-name">mistral-medium-2508</div>
+    <div class="score" style="color: #22c55e">85%</div>
+    <div class="detail">191 questions · pilot-20260326-070127_mistral-medium-2508.json</div>
+  </div>
+  <div class="summary-card">
+    <div class="model-name">mistral-small-2603</div>
+    <div class="score" style="color: #eab308">74%</div>
+    <div class="detail">191 questions · pilot-20260326-074132_mistral-small-2603.json</div>
+  </div>
 </div>
 
 <h2>Heatmap: Anchor × Model</h2>
 <table>
 <thead><tr>
   <th>Anchor / Question</th>
-  <th style='text-align:center'>Claude Sonnet</th>
-  <th style='text-align:center'>GPT-4o</th>
-  <th style='text-align:center'>Mistral Large</th>
+  <th style='text-align:center'>claude-sonnet-4-20250514</th>
+  <th style='text-align:center'>devstral-2512</th>
+  <th style='text-align:center'>gpt-4o</th>
+  <th style='text-align:center'>mistral-large-2512</th>
+  <th style='text-align:center'>mistral-medium-2508</th>
+  <th style='text-align:center'>mistral-small-2603</th>
 </tr></thead>
 <tbody>
-<tr class="anchor-group"><td>adr-according-to-nygard</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
-<tr class="anchor-group"><td>arc42</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">consistency-language</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">consistency-variant-1</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">consistency-variant-2</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">consistency-variant-3</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>atam</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>bdd-given-when-then</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>bem-methodology</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>bluf</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>c4-diagrams</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>chain-of-thought</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>clean-architecture</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>control-chart-shewhart</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>conventional-commits</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>cqrs</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>cynefin-framework</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>definition-of-done</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>devils-advocate</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>diataxis-framework</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>docs-as-code</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>domain-driven-design</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>ears-requirements</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">83%</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
-<tr class="anchor-group"><td>event-driven-architecture</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>fagan-inspection</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>feynman-technique</td><td class="score-cell" style="background:#fef9c3">67%</td><td class="score-cell" style="background:#fef9c3">67%</td><td class="score-cell" style="background:#dcfce7">92%</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#fee2e2">0%</td><td class="score-cell" style="background:#fee2e2">0%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>five-whys</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>fowler-patterns</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>gherkin</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>github-flow</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">92%</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>gutes-deutsch-wolf-schneider</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>hexagonal-architecture</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>iec-61508-sil-levels</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">83%</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>impact-mapping</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>invest</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>iso-25010</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>jobs-to-be-done</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>lasr</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fee2e2">25%</td></tr>
-<tr class="anchor-group"><td>linddun</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>llm-evaluations</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>madr</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>mece</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>morphological-box</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>moscow</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fee2e2">25%</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>mutation-testing</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>nelson-rules</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>owasp-top-10</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>plain-english-strunk-white</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>prd</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fef9c3">67%</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fee2e2">0%</td></tr>
-<tr class="anchor-group"><td>problem-space-nvc</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>property-based-testing</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>pyramid-principle</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>semantic-versioning</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>socratic-method</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>sota</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>spc</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>stride</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>swot</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>tdd-chicago-school</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>tdd-london-school</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">89%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">consistency-language</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">consistency-variant-1</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">consistency-variant-2</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">consistency-variant-3</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>testing-pyramid</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>timtowtdi</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>todotxt-flavoured-markdown</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
-<tr class="anchor-group"><td>user-story-mapping</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr class="anchor-group"><td>wardley-mapping</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
-<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>adr-according-to-nygard</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#dcfce7">83%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">25%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr class="anchor-group"><td>arc42</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">46%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">consistency-language</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">0%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">consistency-variant-1</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">25%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">consistency-variant-2</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">consistency-variant-3</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>atam</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">58%</td><td class="score-cell" style="background:#dcfce7">92%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>bdd-given-when-then</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#fee2e2">33%</td><td class="score-cell" style="background:#dcfce7">92%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#fee2e2">25%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">0%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">25%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>bem-methodology</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">42%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">25%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>bluf</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>c4-diagrams</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">25%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">0%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">25%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>chain-of-thought</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">58%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>clean-architecture</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>control-chart-shewhart</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>conventional-commits</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>cqrs</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>cynefin-framework</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>definition-of-done</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#fef9c3">67%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr class="anchor-group"><td>devils-advocate</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">58%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr class="anchor-group"><td>diataxis-framework</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fef9c3">58%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fee2e2">25%</td></tr>
+<tr class="anchor-group"><td>docs-as-code</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">83%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>domain-driven-design</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">67%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr class="anchor-group"><td>ears-requirements</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">17%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">0%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">0%</td></tr>
+<tr class="anchor-group"><td>event-driven-architecture</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr class="anchor-group"><td>fagan-inspection</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#fef9c3">58%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fee2e2">25%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr class="anchor-group"><td>feynman-technique</td><td class="score-cell" style="background:#fef9c3">67%</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#fef9c3">67%</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#fee2e2">0%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fee2e2">0%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>five-whys</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>fowler-patterns</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr class="anchor-group"><td>gherkin</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr class="anchor-group"><td>github-flow</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">42%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">0%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr class="anchor-group"><td>gutes-deutsch-wolf-schneider</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#dcfce7">92%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr class="anchor-group"><td>hexagonal-architecture</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">92%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr class="anchor-group"><td>iec-61508-sil-levels</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fee2e2">33%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">0%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">0%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>impact-mapping</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">25%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr class="anchor-group"><td>invest</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">25%</td></tr>
+<tr class="anchor-group"><td>iso-25010</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>jobs-to-be-done</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">83%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>lasr</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">58%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fee2e2">25%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr class="anchor-group"><td>linddun</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">83%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>llm-evaluations</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr class="anchor-group"><td>madr</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">67%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">25%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr class="anchor-group"><td>mece</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">83%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>morphological-box</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fef9c3">67%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fee2e2">25%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>moscow</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fee2e2">25%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr class="anchor-group"><td>mutation-testing</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fef9c3">58%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">25%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr class="anchor-group"><td>nelson-rules</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">83%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>owasp-top-10</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fef9c3">67%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr class="anchor-group"><td>plain-english-strunk-white</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">58%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr class="anchor-group"><td>prd</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fef9c3">67%</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#fef9c3">58%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fee2e2">0%</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#fee2e2">25%</td></tr>
+<tr class="anchor-group"><td>problem-space-nvc</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">25%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr class="anchor-group"><td>property-based-testing</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fef9c3">67%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">25%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr class="anchor-group"><td>pyramid-principle</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">83%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>semantic-versioning</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#dcfce7">92%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>socratic-method</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fef9c3">67%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">25%</td></tr>
+<tr class="anchor-group"><td>sota</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>spc</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fee2e2">25%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr class="anchor-group"><td>stride</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fef9c3">67%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr class="anchor-group"><td>swot</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#fef9c3">67%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">25%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>tdd-chicago-school</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">67%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr class="anchor-group"><td>tdd-london-school</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">89%</td><td class="score-cell" style="background:#dcfce7">89%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">71%</td><td class="score-cell" style="background:#fef9c3">54%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">consistency-language</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">consistency-variant-1</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">consistency-variant-2</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">consistency-variant-3</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">25%</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">0%</td><td class="score-cell" style="background:#fee2e2">25%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr class="anchor-group"><td>testing-pyramid</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#dcfce7">92%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr class="anchor-group"><td>timtowtdi</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#dcfce7">83%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr class="anchor-group"><td>todotxt-flavoured-markdown</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">83%</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr class="anchor-group"><td>user-story-mapping</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">92%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
+<tr class="anchor-group"><td>wardley-mapping</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">42%</td></tr>
+<tr><td class="question-label">application-anchor</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td class="question-label">application-paraphrase</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fee2e2">25%</td></tr>
+<tr><td class="question-label">recognition</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#dcfce7">✓</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
 </tbody></table>
 <h2>Control Questions</h2>
 <table class="controls">
-<thead><tr><th>Control</th><th style='text-align:center'>Claude Sonnet</th><th style='text-align:center'>GPT-4o</th><th style='text-align:center'>Mistral Large</th></tr></thead>
+<thead><tr><th>Control</th><th style='text-align:center'>claude-sonnet-4-20250514</th><th style='text-align:center'>devstral-2512</th><th style='text-align:center'>gpt-4o</th><th style='text-align:center'>mistral-large-2512</th><th style='text-align:center'>mistral-medium-2508</th><th style='text-align:center'>mistral-small-2603</th></tr></thead>
 <tbody>
-<tr><td>negative-control</td><td class="score-cell" style="background:#dcfce7">100%</td><td class="score-cell" style="background:#dcfce7">100%</td><td class="score-cell" style="background:#fef9c3">75%</td></tr>
-<tr><td>sanity-check</td><td class="score-cell" style="background:#dcfce7">0%</td><td class="score-cell" style="background:#dcfce7">0%</td><td class="score-cell" style="background:#dcfce7">0%</td></tr>
+<tr><td>negative-control</td><td class="score-cell" style="background:#dcfce7">100%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">100%</td><td class="score-cell" style="background:#fef9c3">75%</td><td class="score-cell" style="background:#dcfce7">100%</td><td class="score-cell" style="background:#fef9c3">50%</td></tr>
+<tr><td>sanity-check</td><td class="score-cell" style="background:#dcfce7">0%</td><td class="score-cell" style="background:#dcfce7">0%</td><td class="score-cell" style="background:#dcfce7">0%</td><td class="score-cell" style="background:#dcfce7">0%</td><td class="score-cell" style="background:#dcfce7">0%</td><td class="score-cell" style="background:#dcfce7">0%</td></tr>
 </tbody></table>
 <h2>Failures Detail</h2>
-<h3>Claude Sonnet: 2 failures</h3>
+<h3>claude-sonnet-4-20250514: 2 failures</h3>
 <div class="fail-list">
 <div class="fail-item"><span>feynman-technique/application-paraphrase</span><span style="color:#ef4444;font-weight:600">0%</span></div>
 <div class="fail-item"><span>github-flow/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
 </div>
-<h3>GPT-4o: 13 failures</h3>
+<h3>devstral-2512: 17 failures</h3>
+<div class="fail-list">
+<div class="fail-item"><span>atam/application-paraphrase</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>bdd-given-when-then/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>bdd-given-when-then/recognition</span><span style="color:#ef4444;font-weight:600">0%</span></div>
+<div class="fail-item"><span>ears-requirements/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>ears-requirements/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>feynman-technique/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>feynman-technique/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>github-flow/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>iec-61508-sil-levels/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>iec-61508-sil-levels/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>lasr/recognition</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>madr/recognition</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>prd/recognition</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>property-based-testing/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>tdd-chicago-school/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>tdd-london-school/consistency-variant-3</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>todotxt-flavoured-markdown/recognition</span><span style="color:#eab308;font-weight:600">50%</span></div>
+</div>
+<h3>gpt-4o: 13 failures</h3>
 <div class="fail-list">
 <div class="fail-item"><span>control-chart-shewhart/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
 <div class="fail-item"><span>ears-requirements/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
@@ -355,7 +393,7 @@ <h3>GPT-4o: 13 failures</h3>
 <div class="fail-item"><span>tdd-london-school/consistency-variant-3</span><span style="color:#eab308;font-weight:600">50%</span></div>
 <div class="fail-item"><span>tdd-london-school/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
 </div>
-<h3>Mistral Large: 17 failures</h3>
+<h3>mistral-large-2512: 17 failures</h3>
 <div class="fail-list">
 <div class="fail-item"><span>adr-according-to-nygard/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
 <div class="fail-item"><span>bdd-given-when-then/application-paraphrase</span><span style="color:#eab308;font-weight:600">50%</span></div>
@@ -375,11 +413,209 @@ <h3>Mistral Large: 17 failures</h3>
 <div class="fail-item"><span>semantic-versioning/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
 <div class="fail-item"><span>todotxt-flavoured-markdown/recognition</span><span style="color:#eab308;font-weight:600">50%</span></div>
 </div>
+<h3>mistral-medium-2508: 77 failures</h3>
+<div class="fail-list">
+<div class="fail-item"><span>adr-according-to-nygard/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>adr-according-to-nygard/application-paraphrase</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>adr-according-to-nygard/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>arc42/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>arc42/application-paraphrase</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>arc42/consistency-language</span><span style="color:#ef4444;font-weight:600">0%</span></div>
+<div class="fail-item"><span>arc42/consistency-variant-1</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>arc42/consistency-variant-2</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>arc42/consistency-variant-3</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>arc42/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>atam/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>atam/application-paraphrase</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>atam/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>bdd-given-when-then/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>bdd-given-when-then/application-paraphrase</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>bdd-given-when-then/recognition</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>bem-methodology/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>bem-methodology/application-paraphrase</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>bem-methodology/recognition</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>bluf/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>bluf/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>bluf/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>c4-diagrams/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>c4-diagrams/application-paraphrase</span><span style="color:#ef4444;font-weight:600">0%</span></div>
+<div class="fail-item"><span>c4-diagrams/recognition</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>chain-of-thought/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>chain-of-thought/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>chain-of-thought/recognition</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>control-chart-shewhart/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>cqrs/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>cynefin-framework/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>definition-of-done/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>definition-of-done/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>diataxis-framework/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>docs-as-code/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>event-driven-architecture/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>event-driven-architecture/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>fagan-inspection/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>fagan-inspection/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>feynman-technique/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>fowler-patterns/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>gutes-deutsch-wolf-schneider/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>gutes-deutsch-wolf-schneider/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>hexagonal-architecture/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>iec-61508-sil-levels/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>jobs-to-be-done/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>linddun/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>madr/recognition</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>mece/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>morphological-box/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>moscow/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>mutation-testing/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>nelson-rules/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>owasp-top-10/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>prd/recognition</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>property-based-testing/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>pyramid-principle/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>semantic-versioning/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>semantic-versioning/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>socratic-method/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>sota/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>sota/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>spc/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>stride/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>swot/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>swot/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>tdd-london-school/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>tdd-london-school/consistency-language</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>tdd-london-school/consistency-variant-1</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>tdd-london-school/consistency-variant-3</span><span style="color:#ef4444;font-weight:600">0%</span></div>
+<div class="fail-item"><span>tdd-london-school/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>testing-pyramid/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>testing-pyramid/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>timtowtdi/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>timtowtdi/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>todotxt-flavoured-markdown/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>user-story-mapping/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+</div>
+<h3>mistral-small-2603: 115 failures</h3>
+<div class="fail-list">
+<div class="fail-item"><span>adr-according-to-nygard/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>adr-according-to-nygard/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>atam/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>bdd-given-when-then/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>conventional-commits/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>conventional-commits/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>cqrs/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>cqrs/application-paraphrase</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>definition-of-done/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>definition-of-done/application-paraphrase</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>definition-of-done/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>devils-advocate/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>devils-advocate/application-paraphrase</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>devils-advocate/recognition</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>diataxis-framework/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>diataxis-framework/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>diataxis-framework/recognition</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>docs-as-code/application-paraphrase</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>domain-driven-design/application-paraphrase</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>domain-driven-design/recognition</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>ears-requirements/application-anchor</span><span style="color:#ef4444;font-weight:600">0%</span></div>
+<div class="fail-item"><span>ears-requirements/application-paraphrase</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>ears-requirements/recognition</span><span style="color:#ef4444;font-weight:600">0%</span></div>
+<div class="fail-item"><span>event-driven-architecture/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>event-driven-architecture/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>event-driven-architecture/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>fagan-inspection/application-anchor</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>fagan-inspection/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>fagan-inspection/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>feynman-technique/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>feynman-technique/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>five-whys/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>fowler-patterns/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>fowler-patterns/recognition</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>gherkin/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>gherkin/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>github-flow/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>github-flow/application-paraphrase</span><span style="color:#ef4444;font-weight:600">0%</span></div>
+<div class="fail-item"><span>github-flow/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>gutes-deutsch-wolf-schneider/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>hexagonal-architecture/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>iec-61508-sil-levels/application-anchor</span><span style="color:#ef4444;font-weight:600">0%</span></div>
+<div class="fail-item"><span>iec-61508-sil-levels/application-paraphrase</span><span style="color:#ef4444;font-weight:600">0%</span></div>
+<div class="fail-item"><span>impact-mapping/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>impact-mapping/application-paraphrase</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>impact-mapping/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>invest/recognition</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>iso-25010/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>iso-25010/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>jobs-to-be-done/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>lasr/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>lasr/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>lasr/recognition</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>linddun/application-paraphrase</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>llm-evaluations/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>llm-evaluations/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>madr/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>madr/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>madr/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>mece/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>morphological-box/application-anchor</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>morphological-box/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>moscow/application-paraphrase</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>moscow/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>mutation-testing/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>mutation-testing/application-paraphrase</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>mutation-testing/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>nelson-rules/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>owasp-top-10/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>owasp-top-10/application-paraphrase</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>owasp-top-10/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>plain-english-strunk-white/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>plain-english-strunk-white/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>plain-english-strunk-white/recognition</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>prd/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>prd/recognition</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>problem-space-nvc/application-anchor</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>problem-space-nvc/application-paraphrase</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>problem-space-nvc/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>property-based-testing/application-anchor</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>property-based-testing/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>pyramid-principle/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>pyramid-principle/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>semantic-versioning/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>socratic-method/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>socratic-method/recognition</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>sota/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>sota/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>spc/application-anchor</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>spc/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>spc/recognition</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>stride/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>stride/recognition</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>swot/application-anchor</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>swot/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>tdd-chicago-school/application-paraphrase</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>tdd-chicago-school/recognition</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>tdd-london-school/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>tdd-london-school/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>tdd-london-school/consistency-language</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>tdd-london-school/consistency-variant-1</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>tdd-london-school/consistency-variant-2</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>tdd-london-school/consistency-variant-3</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>tdd-london-school/recognition</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>testing-pyramid/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>timtowtdi/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>timtowtdi/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>todotxt-flavoured-markdown/application-anchor</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>todotxt-flavoured-markdown/application-paraphrase</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>todotxt-flavoured-markdown/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>user-story-mapping/application-paraphrase</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>user-story-mapping/recognition</span><span style="color:#eab308;font-weight:600">75%</span></div>
+<div class="fail-item"><span>wardley-mapping/application-anchor</span><span style="color:#eab308;font-weight:600">50%</span></div>
+<div class="fail-item"><span>wardley-mapping/application-paraphrase</span><span style="color:#ef4444;font-weight:600">25%</span></div>
+<div class="fail-item"><span>wardley-mapping/recognition</span><span style="color:#eab308;font-weight:600">50%</span></div>
+</div>
 
 <div class="meta">
 <h3>Run Metadata</h3>
 <dl>
-<dt>Claude Sonnet:</dt><dd>pilot-20260324-174404.json · 81m 2s · 2026-03-24T17:44:04</dd><br><dt>GPT-4o:</dt><dd>pilot-20260324-192413.json · 15m 38s · 2026-03-24T19:24:13</dd><br><dt>Mistral Large:</dt><dd>pilot-20260324-190600.json · 16m 58s · 2026-03-24T19:06:00</dd><br>
+<dt>claude-sonnet-4-20250514:</dt><dd>pilot-20260324-174404.json · 81m 2s · 2026-03-24T17:44:04</dd><br><dt>devstral-2512:</dt><dd>pilot-20260326-073241_devstral-2512.json · 15m 19s · 2026-03-26T07:32:41</dd><br><dt>gpt-4o:</dt><dd>pilot-20260324-192413.json · 15m 38s · 2026-03-24T19:24:13</dd><br><dt>mistral-large-2512:</dt><dd>pilot-20260324-190600.json · 16m 58s · 2026-03-24T19:06:00</dd><br><dt>mistral-medium-2508:</dt><dd>pilot-20260326-070127_mistral-medium-2508.json · 31m 12s · 2026-03-26T07:01:27</dd><br><dt>mistral-small-2603:</dt><dd>pilot-20260326-074132_mistral-small-2603.json · 18m 9s · 2026-03-26T07:41:32</dd><br>
 </dl>
 <p style="margin-top:0.75rem">Generated by <code>evaluations/generate-report.py</code> · Position bias mitigation: 4 permutations per question · Scoring: deterministic MC (no LLM judge)</p>
 </div>