Hack23
diff --git a/‎.github/prompts/05-analysis-gate.md‎
Lines changed: 27 additions & 3 deletions b/‎.github/prompts/05-analysis-gate.md‎
Lines changed: 27 additions & 3 deletions
diff --git a/‎analysis/methodologies/ai-driven-analysis-guide.md‎
Lines changed: 6 additions & 4 deletions b/‎analysis/methodologies/ai-driven-analysis-guide.md‎
Lines changed: 6 additions & 4 deletions
diff --git a/‎scripts/download-parliamentary-data.ts‎
Lines changed: 46 additions & 6 deletions b/‎scripts/download-parliamentary-data.ts‎
Lines changed: 46 additions & 6 deletions
diff --git a/‎scripts/fetch-statskontoret.ts‎
Lines changed: 2 additions & 2 deletions b/‎scripts/fetch-statskontoret.ts‎
Lines changed: 2 additions & 2 deletions
@@ -31,10 +31,13 @@ This is the **only** gate separating analysis from article generation. If it fai
    - `forward-indicators.md` declares **≥ 10 dated indicators** (bullet or table rows matching a date pattern across the four horizon sections).
    - `coalition-mathematics.md` contains a seat-count table (≥ 1 table row with `Ja`/`Nej`/`Avstår` or a party-to-seats mapping).
    - `implementation-feasibility.md` — when it names a recognised agency (Kriminalvården, Polismyndigheten, Försäkringskassan, Skatteverket, Migrationsverket, Arbetsförmedlingen, Socialstyrelsen, Transportstyrelsen, Trafikverket, Naturvårdsverket, Energimyndigheten) — contains a `statskontoret.se` URL citation **or** the literal phrase `none found` in the `Statskontoret relevance` row.
+9. **PIR status sidecar** — `pir-status.json` is present and valid so open PIRs can roll forward to the next cycle.
+10. **Top-2 full-text availability** — when `data-download-manifest.md` contains a `## Full-Text Fetch Outcomes` table (written by `download-parliamentary-data.ts --auto-full-text-top-n`), at least 2 top documents must have `full_text_available=true`. Add `<!-- full-text-fallback: <reason> -->` to the manifest to bypass (e.g. when full text is genuinely unavailable from the MCP server or the flag was not used).
+11. **Supplementary artifacts** — see §Supplementary checks below (blocking for aggregation/Tier-C/multi-run).
 
 ## Implementation
 
-No dedicated validator script exists yet — implement the checks as an inline bash gate. Full implementation (covers checks 1–9, plus conditional check 9b where applicable):
+No dedicated validator script exists yet — implement the checks as an inline bash gate. Full implementation (covers checks 1–11, plus conditional check 9b where applicable):
 
 ```bash
 set -Eeuo pipefail
@@ -238,9 +241,10 @@ fi
 # populate the `| **Statskontoret relevance** | ... |` row with either a
 # statskontoret.se URL or the literal `none found` when no relevant coverage exists.
 AGENCY_RE='Kriminalvård(en)?|Polismyndigheten|Försäkringskassan|Skatteverket|Migrationsverket|Arbetsförmedlingen|Socialstyrelsen|Transportstyrelsen|Trafikverket|Naturvårdsverket|Energimyndigheten'
+STATSKONTORET_RELEVANCE_RE='^\|[[:space:]]*\*\*Statskontoret relevance\*\*[[:space:]]*\|[[:space:]]*([^|]*statskontoret\.se[^|]*|[^|]*none found[^|]*)\|'
 if [ -s "$ANALYSIS_DIR/implementation-feasibility.md" ]; then
   if grep -qE "$AGENCY_RE" "$ANALYSIS_DIR/implementation-feasibility.md"; then
-    grep -qiE '^\|[[:space:]]*\*\*Statskontoret relevance\*\*[[:space:]]*\|[[:space:]]*([^|]*statskontoret\.se[^|]*|[^|]*none found[^|]*)\|' "$ANALYSIS_DIR/implementation-feasibility.md" \
+    grep -qiE "$STATSKONTORET_RELEVANCE_RE" "$ANALYSIS_DIR/implementation-feasibility.md" \
       || { echo "❌ implementation-feasibility.md: names a recognised agency but the Statskontoret relevance row lacks a statskontoret.se URL or 'none found'"; FAIL=1; }
   fi
 fi
@@ -319,6 +323,26 @@ except Exception as e:
 " 2>&1 || FAIL=1
 fi
 
+# Check 10 — top-2 full-text availability (auto-full-text-top-n gate)
+# When the manifest contains a "Full-Text Fetch Outcomes" table (written by
+# download-parliamentary-data.ts --auto-full-text-top-n), verify that at least
+# 2 top documents have full_text_available=true. A fallback annotation
+# <!-- full-text-fallback: <reason> --> anywhere in the manifest bypasses
+# this check so that runs without the flag, or runs where full text is
+# genuinely unavailable from the MCP server, are not blocked.
+if [ -s "$ANALYSIS_DIR/data-download-manifest.md" ]; then
+  if grep -q "## Full-Text Fetch Outcomes" "$ANALYSIS_DIR/data-download-manifest.md"; then
+    if grep -q "full-text-fallback:" "$ANALYSIS_DIR/data-download-manifest.md"; then
+      : # Fallback annotation present — bypass check
+    else
+      FT_SUCCESS=$(grep -cE '^\|[[:space:]]*[A-Za-z0-9_-]+[[:space:]]*\|[[:space:]]*true' \
+        "$ANALYSIS_DIR/data-download-manifest.md" || true)
+      [ "${FT_SUCCESS:-0}" -ge 2 ] \
+        || { echo "❌ data-download-manifest.md: Full-Text Fetch Outcomes table present but fewer than 2 top documents have full_text_available=true (found ${FT_SUCCESS:-0}). Add <!-- full-text-fallback: <reason> --> to the manifest to bypass."; FAIL=1; }
+    fi
+  fi
+fi
+
 [ "$FAIL" -eq 0 ] || exit 1
 ```
 
@@ -351,7 +375,7 @@ Non-blocking for `standard` / `deep` runs; **blocking for `comprehensive` / Tier
 Inline bash probe — append to the main block after `FAIL=0` bookkeeping completes. Supplementary artifacts have **three independent blocking triggers**, not a single tier-only rule: **aggregation article types** (`weekly-review`, `monthly-review`) require the aggregation artifacts; any run whose **tier** is `comprehensive` (the Tier-C run mode) requires the Tier-C supplementary set; and `cross-run-diff.md` is blocking whenever the workflow has **≥ 2 production runs** of the same article type, including `standard` and `deep` runs. `ARTICLE_TYPE` encodes the workflow family; `ANALYSIS_TIER` (when set) encodes the depth tier (`standard` | `deep` | `comprehensive`); `ANALYSIS_RUN_COUNT` (when set) is the numeric count of runs for the same article-generation cycle (if unset or non-numeric, treated as `1`).
 
 ```bash
-# Check 10 — supplementary artifacts (blocking for aggregation types, any Tier-C run, and S5 when run-count >= 2)
+# Check 11 — supplementary artifacts (blocking for aggregation types, any Tier-C run, and S5 when run-count >= 2)
 IS_AGGREGATION=0
 IS_TIER_C=0
 IS_MULTI_RUN=0
 
@@ -16,7 +16,7 @@
   <a href="#"><img src="https://img.shields.io/badge/Classification-Public-green?style=for-the-badge" alt="Classification"/></a>
 </p>
 
-**📋 Document Owner:** CEO | **📄 Version:** 6.6 | **📅 Last Updated:** 2026-04-25 (UTC)
+**📋 Document Owner:** CEO | **📄 Version:** 6.7 | **📅 Last Updated:** 2026-04-27 (UTC)
 **🔄 Review Cycle:** Quarterly | **⏰ Next Review:** 2026-07-21
 **🏢 Owner:** Hack23 AB (Org.nr 5595347807) | **🏷️ Classification:** Public
 
@@ -87,11 +87,13 @@ Scripts run the download. Example:
 ```bash
 npx tsx scripts/download-parliamentary-data.ts \
   --date ${ARTICLE_DATE} \
-  --scope ${DOC_TYPE} \
-  --out analysis/daily/${ARTICLE_DATE}/${DOC_TYPE}/data/
+  --doc-type ${DOC_TYPE} \
+  --auto-full-text-top-n 2
 ```
 
-**Write `data-download-manifest.md`** using the [manifest template](../templates/data-download-manifest.md). It records what arrived, from which MCP tools, with what data-depth distribution (FULL-TEXT / SUMMARY / METADATA-ONLY).
+**`--auto-full-text-top-n 2`** (recommended for L2/L3 runs): after the bulk download, the script calls `get_dokument_innehall` with `include_full_text=true` for the top-2 documents (by order in the downloaded batch) and persists the retrieved content to `analysis/daily/${ARTICLE_DATE}/${DOC_TYPE}/full-text/{dok_id}.md`. Accept the extra 30–60 s as a documented quality investment. The manifest's `## Full-Text Fetch Outcomes` table records `full_text_available` per `dok_id`; the analysis gate (check 10) enforces that ≥ 2 succeed or a `<!-- full-text-fallback: <reason> -->` annotation is present.
+
+**Write `data-download-manifest.md`** using the [manifest template](../templates/data-download-manifest.md). It records what arrived, from which MCP tools, with what data-depth distribution (FULL-TEXT / SUMMARY / METADATA-ONLY) and — when `--auto-full-text-top-n` is used — the `## Full-Text Fetch Outcomes` table.
 
 After `download-parliamentary-data.ts` completes for `committeeReports`, also run the voting-records script to capture party-level vote counts and defector detection for each betänkande:
 
 
@@ -34,8 +34,9 @@ import {
   flattenDocuments,
   subtractBusinessDays,
   MAX_LOOKBACK_BUSINESS_DAYS,
+  fetchFullTextForTopN,
 } from './parliamentary-data/data-downloader.js';
-import type { DocumentTypeKey } from './parliamentary-data/data-downloader.js';
+import type { DocumentTypeKey, FullTextFetchOutcome } from './parliamentary-data/data-downloader.js';
 
 import { persistDownloadedData, sanitizeDokId } from './parliamentary-data/data-persistence.js';
 
@@ -148,10 +149,11 @@ export function parseArgs(argv: string[]): {
       })
     : [];
 
-  // --auto-full-text-top-n: Override the per-type full-text enrichment limit.
-  // When set, only the top N documents per type receive fetchDocumentDetails
-  // (full-text) enrichment, enabling more targeted significance-scoring input.
-  // Defaults to MAX_ENRICHMENT_PER_TYPE when omitted (null → caller uses default).
+  // --auto-full-text-top-n: Override the per-type full-text enrichment limit and
+  // persist full text outcomes for the first N documents in the current filtered
+  // array order. Defaults to null when omitted so downloadAllDocuments uses
+  // MAX_ENRICHMENT_PER_TYPE; explicit 0 disables per-type enrichment and
+  // persisted full-text fetching. No DIW significance ranking is applied here.
   const autoFullTextTopNArg = get('--auto-full-text-top-n');
   let autoFullTextTopN: number | null = null;
   if (autoFullTextTopNArg !== null) {
@@ -235,6 +237,7 @@ function serializeDataManifest(
   docCounts: Record<string, number>,
   dateFilteredTotal: number,
   dataFreshness: string | null,
+  fullTextOutcomes?: FullTextFetchOutcome[],
 ): string {
   const totalDocs = Object.values(docCounts).reduce((a, b) => a + b, 0);
   const lines: string[] = [
@@ -267,6 +270,21 @@ function serializeDataManifest(
     lines.push(`Data sourced from ${dataFreshness} via lookback fallback — check freshness indicators.`);
   }
 
+  // Append full-text fetch outcomes when --auto-full-text-top-n was used.
+  if (fullTextOutcomes && fullTextOutcomes.length > 0) {
+    lines.push('', '## Full-Text Fetch Outcomes', '');
+    lines.push('| dok_id | full_text_available | chars | notes |');
+    lines.push('|--------|--------------------:|------:|-------|');
+    for (const o of fullTextOutcomes) {
+      const available = o.success ? 'true' : 'false';
+      const chars = o.chars > 0 ? String(o.chars) : '0';
+      const notes = o.reason ?? (o.filePath ? `persisted: ${o.filePath}` : '');
+      lines.push(`| ${o.dokId} | ${available} | ${chars} | ${notes} |`);
+    }
+    const successCount = fullTextOutcomes.filter(o => o.success).length;
+    lines.push('', `**Full-text retrieved**: ${successCount}/${fullTextOutcomes.length} top documents`);
+  }
+
   return lines.join('\n');
 }
 
@@ -514,10 +532,27 @@ async function runPreArticleAnalysis(opts: {
   const persistResult = persistDownloadedData(data, resolvedRm);
   console.log(`   🗄️  Persisted data for ${persistResult.written} documents to ${path.relative(REPO_ROOT, persistResult.dataRoot)}/ (${persistResult.skipped} skipped)`);
 
+  // ── Step 2b: Auto-fetch full text for top-N documents ────────────────────
+  let fullTextOutcomes: FullTextFetchOutcome[] | undefined;
+  if (autoFullTextTopN !== null && autoFullTextTopN > 0 && allDocs.length > 0) {
+    console.log(`\n📄 Step 2b: Auto-fetching full text for top-${autoFullTextTopN} documents (--auto-full-text-top-n=${autoFullTextTopN})...`);
+    console.log('   ⏱️  This may take 30–60 s — documented quality investment for deep-analysis tiers.');
+    fullTextOutcomes = await fetchFullTextForTopN(client, allDocs, autoFullTextTopN, outputDir);
+    const successCount = fullTextOutcomes.filter(o => o.success).length;
+    console.log(`   ✅ Full text retrieved for ${successCount}/${fullTextOutcomes.length} document(s)`);
+    for (const o of fullTextOutcomes) {
+      if (o.success) {
+        console.log(`      ✅ ${o.dokId}: ${o.chars} chars → ${o.filePath}`);
+      } else {
+        console.warn(`      ⚠️ ${o.dokId}: ${o.reason}`);
+      }
+    }
+  }
+
   // Write data-download-manifest.md (factual download summary — NOT analysis)
   const manifestContent = serializeDataManifest(
     date, generatedAt, manifest.dataSources, manifest.docCounts,
-    allDocs.length, dataFreshness,
+    allDocs.length, dataFreshness, fullTextOutcomes,
   );
   const manifestPath = path.join(outputDir, 'data-download-manifest.md');
   fs.writeFileSync(manifestPath, manifestContent, 'utf8');
@@ -553,6 +588,11 @@ async function runPreArticleAnalysis(opts: {
   console.log(`\n✅ Data download complete! Results in: ${path.relative(REPO_ROOT, outputDir)}/`);
   console.log(`   📄 ${totalFiles} total files written (1 manifest + ${storedCount} documents)`);
   console.log(`   📊 ${allDocs.length} documents available for AI analysis`);
+  if (autoFullTextTopN !== null && autoFullTextTopN > 0) {
+    const successCount = fullTextOutcomes?.filter(o => o.success).length ?? 0;
+    const attempted = fullTextOutcomes?.length ?? 0;
+    console.log(`   📄 Full text: ${successCount}/${attempted} top-${autoFullTextTopN} documents (see full-text/ sub-folder)`);
+  }
   if (docType) {
     console.log(`   📋 Scoped to: ${docType}`);
   }
 
@@ -180,8 +180,8 @@ export async function fetchStatskontoretCached(
 
   try {
     links = await client.discoverDownloads(sourceKey);
-    // Stamp provenance after the fetch completes so `fetchedAt` reflects when
-    // the data was actually retrieved, not when the request was issued.
+    // Stamp provenance after discovery completes so `fetchedAt` reflects the
+    // cache completion time, not when the request was issued.
     fetchedAt = new Date().toISOString();
     writeCacheEntry(filePath, { fetchedAt, sourceKey, links });
   } catch (error) {