diff --git a/.github/labeler.yml b/.github/labeler.yml index d5afc752fe..0a0d70d027 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -145,7 +145,6 @@ data-pipeline: - changed-files: - any-glob-to-any-file: - ".github/workflows/data-pipeline.yml" - - "scripts/pipeline-*.js" - "scripts/fetch-*.js" - "scripts/validate-*.js" - title: "pipeline:*" @@ -176,7 +175,6 @@ i18n: - "sitemap_*.xml" - "news/**/*_*.html" - "TRANSLATION_GUIDE.md" - - "scripts/extract-vocabulary.js" - title: "i18n:*" - title: "localization:*" - title: "l10n:*" diff --git a/.github/workflows/SHARED_PROMPT_PATTERNS.md b/.github/workflows/SHARED_PROMPT_PATTERNS.md index bd8b06234e..57f65f6447 100644 --- a/.github/workflows/SHARED_PROMPT_PATTERNS.md +++ b/.github/workflows/SHARED_PROMPT_PATTERNS.md @@ -371,29 +371,56 @@ Scripts MUST NEVER generate any of these — this is the AI agent's exclusive re The following script directories and functions previously generated analysis content and are now **DEPRECATED** — their analysis functions are replaced by AI agent analysis in workflow prompts: +> ⚠️ **IMPORTANT DISTINCTION**: The table below lists **analysis-generating** functions that are deprecated. The **HTML rendering** functions (`generateSwotSection()`, `generateDashboardSection()`, `generateMindmapSection()`) are **NOT deprecated** — they are active HTML renderers that take structured data and produce formatted HTML sections. AI agents produce analysis content in markdown files; scripts then render that content into HTML using these renderer functions. See §HTML RENDERER FUNCTIONS below. + | Directory/Function | Status | Replacement | |-----------|--------|-------------| | `scripts/ai-analysis/` | ⚠️ DEPRECATED for analysis generation | AI agent performs analysis per workflow prompts | | `scripts/analysis-framework/` | ⚠️ DEPRECATED for analysis generation | AI agent uses methodology guides directly | -| `scripts/data-transformers/content-generators/ai-swot-analyzer.ts` | ⚠️ DEPRECATED | AI agent generates SWOT per political-swot-framework.md | -| `scripts/data-transformers/content-generators/stakeholder-swot-section.ts` | ⚠️ DEPRECATED | AI agent generates stakeholder analysis per stakeholder-impact.md | -| `scripts/generate-news-enhanced/ai-analysis-pipeline.ts` → `buildDynamicSwot()` | ⚠️ DEPRECATED | AI prompt: "Generate SWOT for all 8 stakeholder groups with dok_id evidence" | -| `scripts/generate-news-enhanced/ai-analysis-pipeline.ts` → `buildStrategicImplications()` | ⚠️ DEPRECATED | AI prompt: "Write strategic implications citing specific policy signals" | -| `scripts/generate-news-enhanced/ai-analysis-pipeline.ts` → `buildKeyTakeaways()` | ⚠️ DEPRECATED | AI prompt: "Extract 5 key takeaways with confidence levels" | -| `scripts/generate-news-enhanced/ai-analysis-pipeline.ts` → `buildLegislativeImpact()` | ⚠️ DEPRECATED | AI prompt: "Assess legislative impact using committee + vote data" | -| `scripts/generate-news-enhanced/ai-analysis-pipeline.ts` → `buildCrossPartyImplications()` | ⚠️ DEPRECATED | AI prompt: "Analyze cross-party dynamics from voting records" | +| `scripts/generate-news-enhanced/swot-analyzer.ts` | ⚠️ DEPRECATED | AI agent generates SWOT per political-swot-framework.md | +| `scripts/data-transformers/content-generators/index.ts` → `generateStakeholderSwotSection()` | ⚠️ DEPRECATED | AI agent generates stakeholder analysis per stakeholder-impact.md | +| `scripts/generate-news-enhanced/ai-analysis-pipeline.ts` → `AIAnalysisPipeline` class | ⚠️ DEPRECATED | AI agent performs the primary analysis; class still runs as a deprecated/stub runtime pipeline | | `scripts/data-transformers/content-generators/shared.ts` → `generateDeepAnalysisSection()` | ⚠️ DEPRECATED | AI prompt: "Write 5W deep analysis (Who/What/When/Why/Winners)" | -| `scripts/data-transformers/content-generators/shared.ts` → `generateTimelineContext()` | 🔴 REMOVED | Now outputs `AI_MUST_REPLACE` marker — AI MUST write specific timeline analysis | -| `scripts/data-transformers/content-generators/shared.ts` → `broadAgendaText()`, `focusedAgendaText()`, `defaultWhyText()` | 🔴 REMOVED | Now outputs `AI_MUST_REPLACE` marker — AI MUST write specific "Why This Matters" analysis | -| `scripts/data-transformers/content-generators/shared.ts` → `genericImpactText()`, `propImpactText()`, `betImpactText()`, `motImpactText()` | 🔴 REMOVED | Now outputs `AI_MUST_REPLACE` marker — AI MUST write specific political impact analysis | -| `scripts/data-transformers/content-generators/shared.ts` → `genericConsequencesText()`, `propConsequencesText()`, `motConsequencesText()` | 🔴 REMOVED | Now outputs `AI_MUST_REPLACE` marker — AI MUST write specific consequences analysis | -| `scripts/data-transformers/content-generators/shared.ts` → `defaultCriticalText()` | 🔴 REMOVED | Now outputs `AI_MUST_REPLACE` marker — AI MUST write specific critical assessment | -| `scripts/editorial-pillars.ts` → `INTER_PILLAR_TRANSITIONS` | 🔴 REMOVED | Transitions now empty — AI MUST write article-specific connective prose or omit | -| `scripts/data-transformers/content-generators/newsworthiness.ts` → `scoreNewsworthiness()` | ⚠️ DEPRECATED | AI prompt: "Score newsworthiness 0-100 with dimension breakdown" | +| `scripts/data-transformers/content-generators/shared.ts` → `generateTimelineContext()` | 🔴 STUB | Now outputs `AI_MUST_REPLACE` marker — AI MUST write specific timeline analysis | +| `scripts/data-transformers/content-generators/shared.ts` → `broadAgendaText()`, `focusedAgendaText()`, `defaultWhyText()` | 🔴 STUB | Now outputs `AI_MUST_REPLACE` marker — AI MUST write specific "Why This Matters" analysis | +| `scripts/data-transformers/content-generators/shared.ts` → `genericImpactText()`, `propImpactText()`, `betImpactText()`, `motImpactText()` | 🔴 STUB | Now outputs `AI_MUST_REPLACE` marker — AI MUST write specific political impact analysis | +| `scripts/data-transformers/content-generators/shared.ts` → `genericConsequencesText()`, `propConsequencesText()`, `motConsequencesText()` | 🔴 STUB | Now outputs `AI_MUST_REPLACE` marker — AI MUST write specific consequences analysis | +| `scripts/data-transformers/content-generators/shared.ts` → `defaultCriticalText()` | 🔴 STUB | Now outputs `AI_MUST_REPLACE` marker — AI MUST write specific critical assessment | +| `scripts/editorial-pillars.ts` → `INTER_PILLAR_TRANSITIONS` | 🔴 EMPTY | Transitions now return empty strings — AI MUST write article-specific connective prose or omit | +| `scripts/data-transformers/content-generators/newsworthiness.ts` → `scoreNewsworthiness()` | ✅ ACTIVE (data utility) | Heuristic scoring retained for routing/experimentation/tests; AI MUST independently assess editorial significance | | `scripts/data-transformers/content-generators/shared.ts` → all `*Text()` templates | ⚠️ DEPRECATED | AI prompt: "Write editorial analysis from actual document data" | **These scripts may still be called for data downloading and HTML formatting functions**, but their analysis output (SWOT entries, risk scores, classifications, titles, descriptions, editorial judgments) MUST be treated as stubs that the AI agent MUST overwrite with real template-compliant analysis. +#### HTML Renderer Functions (NOT Deprecated — Active Utilities) + +The following functions are **HTML renderers**, not analysis generators. They take structured data and produce formatted HTML. They are used by `generate-news-enhanced` to build article sections and are **actively maintained**: + +| Function | Module | Purpose | AI Agent Relationship | +|----------|--------|---------|----------------------| +| `generateSwotSection({ data, lang })` | `swot-section.ts` | Renders SWOT quadrant HTML from `SwotData` | **Current implementation:** AI writes SWOT analysis in markdown → script reads it → extracts SWOT data → calls this function to render HTML | +| `generateDashboardSection({ data, lang })` | `dashboard-section.ts` | Renders Chart.js canvas HTML from chart config | Renderer utility: used when structured chart data is provided by the pipeline; automatic extraction from AI analysis markdown is not currently implemented as a standard flow | +| `generateMindmapSection({ topic, branches, lang })` | `mindmap-section.ts` | Renders CSS mindmap HTML from branch data | Renderer utility: used when structured mindmap branch data is provided by the pipeline; analysis-to-mindmap extraction is workflow-specific or future-facing | +| `generateMultiPanelDashboardSection(...)` | `dashboard-section.ts` | Renders multi-panel CSS dashboards | Renderer utility for pre-structured panel data; not a guaranteed current markdown-extraction path | +| `generateEconomicDashboardSection(...)` | `economic-dashboard-section.ts` | Renders economic indicator dashboard | Renderer utility for structured economic dashboard data when available; automated extraction from agent analysis should be treated as planned or workflow-specific unless separately implemented | + +> **How AI agents interact with these**: AI agents do NOT call these TypeScript functions directly. These utilities are renderers only. Unless a specific workflow explicitly implements and validates a machine-readable input format, `generate-news-enhanced` should be treated as consuming final article HTML/section-ready content rather than generically extracting SWOT entries, chart data, or mindmap structures from markdown analysis files. Currently, only SWOT data extraction from analysis markdown is implemented as a standard flow. Workflow authors MUST NOT assume a supported structured-parse step exists for dashboard, mindmap, or other renderer inputs; if a renderer is used, the workflow must explicitly define how its input data is produced and validated. + +#### Minor TypeScript/Script Corrections Policy + +> **Agentic workflows MAY make minor corrections** to TypeScript code and scripts when necessary to complete their mission, but MUST use AI prompts for all important analysis and content creation. + +| Allowed Minor Corrections | Prohibited Changes | +|--------------------------|-------------------| +| Fix broken file paths or import statements | Rewrite analysis-generating logic | +| Correct typos in template strings | Add new analysis functions | +| Fix date format bugs in scripts | Change article quality thresholds | +| Update stale configuration values | Modify editorial framework scoring | +| Fix syntax errors blocking article generation | Change MCP tool invocation patterns | +| Adjust HTML template structure for validation | Remove or bypass quality gates | + +**Rule**: If a correction affects analysis content quality, it MUST be done via AI prompt analysis — not by editing TypeScript code. + --- ## 🏆 AI ANALYSIS QUALITY HIERARCHY — AI Always Wins @@ -1321,6 +1348,9 @@ Before generating articles, consult these skills: 7. **`scripts/prompts/v2/political-analysis.md`** — Core political analysis framework (6 analytical lenses) 8. **`scripts/prompts/v2/stakeholder-perspectives.md`** — Multi-perspective analysis instructions 9. **`scripts/prompts/v2/quality-criteria.md`** — Quality self-assessment rubric (minimum 7/10) +10. **`scripts/prompts/v2/per-file-intelligence-analysis.md`** — Per-file AI analysis protocol +11. **`analysis/methodologies/ai-driven-analysis-guide.md`** — Master methodology guide (v5.0): analysis-driven article decisions, policy domain inference, empty analysis fallback, Election 2026 lens +12. **`analysis/templates/per-file-political-intelligence.md`** — Per-file analysis output template (SWOT, risk matrix, threat taxonomy, Mermaid diagrams) ``` ## 🧠 Repo Memory — Persistent Cross-Workflow Context (copy into every workflow) @@ -2124,6 +2154,8 @@ Read these methodology documents to guide your analysis: - **`analysis/templates/stakeholder-impact.md`** — Stakeholder impact template - **`analysis/templates/significance-scoring.md`** — Significance scoring template - **`scripts/prompts/v2/per-file-intelligence-analysis.md`** — Detailed analysis prompt +- **`analysis/templates/per-file-political-intelligence.md`** — Per-file analysis output template (SWOT, risk, threat, Mermaid) +- **`analysis/methodologies/ai-driven-analysis-guide.md`** — Master methodology guide (v5.0) #### Protocol 1. **Catalog:** Run `npx tsx scripts/catalog-downloaded-data.ts --pending-only` to list files needing analysis diff --git a/.github/workflows/news-evening-analysis.md b/.github/workflows/news-evening-analysis.md index bf19d41dd9..98be8b7128 100644 --- a/.github/workflows/news-evening-analysis.md +++ b/.github/workflows/news-evening-analysis.md @@ -240,12 +240,12 @@ Every analysis MUST include an **Election 2026 Implications** section assessing: ### Phase 1 — Data Collection & Initial Analysis 1. Fetch today's activity from MCP (`search_anforanden` — filter by `datum`, `get_betankanden` — filter by `publicerad`, `search_voteringar` — filter by `datum`, `get_sync_status`) -2. Score newsworthiness of each item using `scoreNewsworthiness()` logic +2. Assess newsworthiness of each item using AI analysis (score 0-100 with dimension breakdown: political significance, public impact, timeliness, unexpectedness) 3. Build initial outline: day-in-review lede, top stories, votes summary, tonight's context ### Phase 2 — Depth Enhancement (for `deep`/`comprehensive` depth) 1. **Quick SWOT**: 1-paragraph SWOT overview of the day's political balance -2. **Activity Dashboard**: Generate `generateDashboardSection()` with ≥1 chart (today's activity breakdown) +2. **Activity Dashboard**: Include a concise activity breakdown by type/party as a Markdown table or bullet list; do not assume an automatic dashboard rendering step unless a workflow-specific validated input format is defined. 3. **Quality Gate**: - Verify article covers events from today's date (not yesterday or tomorrow) - Verify all Swedish API text is translated diff --git a/.github/workflows/news-interpellations.md b/.github/workflows/news-interpellations.md index 5eb0fe0da0..d3ae7d1384 100644 --- a/.github/workflows/news-interpellations.md +++ b/.github/workflows/news-interpellations.md @@ -255,7 +255,7 @@ Every analysis MUST include an **Election 2026 Implications** section assessing: ### Phase 2 — Iterative Depth Enhancement (repeat per `analysis_depth`) For each AI iteration: 1. **SWOT Analysis**: Generate multi-stakeholder SWOT with ALL 8 groups (Citizens, Government Coalition, Opposition Bloc, Business/Industry, Civil Society, International/EU, Judiciary/Constitutional, Media/Public Opinion). Use structured evidence tables with columns: `#`, `Statement`, `Evidence (frs ID/dok_id)`, `Confidence`, `Impact`, `Entry Date`. Every entry MUST cite specific interpellation frs ID, minister name, and policy area. -2. **Accountability Dashboard**: Generate `generateDashboardSection()` with ≥1 chart (interpellations by minister or party) +2. **Accountability Dashboard**: Include at least one chart-ready summary (interpellations by minister or party), formatted as a clear Markdown table or bullet list; do not assume automatic dashboard rendering unless a separate workflow step explicitly parses and renders it. 3. **Quality Gate** (check before next iteration): - Verify ministerial accountability section names specific ministers and their policy areas - Verify no identical "Why It Matters" text across entries — each must reference the specific minister and policy context diff --git a/.github/workflows/news-month-ahead.md b/.github/workflows/news-month-ahead.md index ccf0e5f321..528e644690 100644 --- a/.github/workflows/news-month-ahead.md +++ b/.github/workflows/news-month-ahead.md @@ -222,8 +222,8 @@ Every analysis MUST include an **Election 2026 Implications** section assessing: ### Phase 2 — Iterative Depth Enhancement (repeat per `analysis_depth`) For each AI iteration: 1. **Full SWOT Analysis**: Generate multi-stakeholder SWOT with ALL 8 groups (Citizens, Government Coalition, Opposition Bloc, Business/Industry, Civil Society, International/EU, Judiciary/Constitutional, Media/Public Opinion) focusing on upcoming legislative priorities. Use structured evidence tables with columns: `#`, `Statement`, `Evidence (dok_id)`, `Confidence`, `Impact`, `Entry Date`. Every entry MUST cite specific scheduled debate, committee meeting, or expected vote. -2. **Strategic Dashboard**: Generate `generateDashboardSection()` with ≥2 charts (documents by week, policy domain distribution) -3. **Policy Mindmap**: Generate `generateMindmapSection()` showing inter-connected policy areas +2. **Strategic Dashboard Summary**: Provide concise comparative summaries for at least 2 analytical views (for example, documents by week and policy domain distribution) using prose and/or markdown tables that can be included directly in the article without requiring any undocumented rendering pipeline. +3. **Policy Relationship Outline**: Describe inter-connected policy areas as a clear hierarchical outline (central topic, major branches, and sub-items) in standard markdown so the relationships are explicit without assuming automated mindmap rendering. 4. **Quality Gate** (check before next iteration): - Verify forward-looking watch-points reference specific scheduled events - Verify all Swedish API text is translated diff --git a/.github/workflows/news-monthly-review.md b/.github/workflows/news-monthly-review.md index 76fb746c80..f4ac8c3b66 100644 --- a/.github/workflows/news-monthly-review.md +++ b/.github/workflows/news-monthly-review.md @@ -208,10 +208,10 @@ Every analysis MUST include an **Election 2026 Implications** section assessing: ### Phase 2 — Iterative Depth Enhancement (3 iterations for `deep`/`comprehensive`) For each AI iteration: -1. **Full SWOT**: Generate `generateSwotSection()` with ≥5 stakeholder perspectives per quadrant (government coalition, opposition parties, affected citizens, EU/Nordic context, media/civil society, business sector, academic/think-tanks) -2. **Monthly Dashboard**: Generate `generateEconomicDashboardSection()` with ≥4 charts (monthly trends, party activity ranking, policy domain heatmap, legislative pipeline) -3. **Policy Mindmap**: Generate `generateMindmapSection()` showing the month's cross-cutting policy themes -4. **Stakeholder SWOT**: Generate `generateStakeholderSwotSection()` with ≥7 perspectives for comprehensive depth +1. **Full SWOT**: Write a clearly structured SWOT analysis with ≥5 stakeholder perspectives per quadrant (government coalition, opposition parties, affected citizens, EU/Nordic context, media/civil society, business sector, academic/think-tanks). Format it as publication-ready markdown with explicit `Strengths`, `Weaknesses`, `Opportunities`, and `Threats` headings. +2. **Monthly Dashboard Summary**: Provide a dashboard-style analytical summary covering at least 4 evidence-based views: monthly trends, party activity ranking, policy domain heatmap summary, and legislative pipeline status. Present the underlying figures and comparisons directly in markdown text and bullet lists or tables; do not assume any machine-readable chart schema or automatic rendering step. +3. **Policy Theme Map**: Describe the month's cross-cutting policy themes as a hierarchical outline with one central theme and clearly labelled subthemes. Use readable markdown headings or nested bullet lists rather than implying a structured mindmap payload or CSS-rendered component. +4. **Stakeholder SWOT**: Write a stakeholder-focused SWOT with ≥7 perspectives for comprehensive depth, and cite specific `dok_id` evidence for each entry. 5. **Quality Gate** (check before next iteration): - Verify trend comparison uses actual previous-month data from MCP - Verify party rankings section covers all 8 Riksdag parties diff --git a/.github/workflows/news-motions.md b/.github/workflows/news-motions.md index 003a0c5974..c8fd16ce59 100644 --- a/.github/workflows/news-motions.md +++ b/.github/workflows/news-motions.md @@ -241,7 +241,7 @@ Every analysis MUST include an **Election 2026 Implications** section assessing: ### Phase 2 — Iterative Depth Enhancement (repeat per `analysis_depth`) For each AI iteration: 1. **SWOT Analysis**: Generate multi-stakeholder SWOT with ALL 8 groups (Citizens, Government Coalition, Opposition Bloc, Business/Industry, Civil Society, International/EU, Judiciary/Constitutional, Media/Public Opinion). Use structured evidence tables with columns: `#`, `Statement`, `Evidence (mot. ID/dok_id)`, `Confidence`, `Impact`, `Entry Date`. Every entry MUST cite specific motion number, party origin, and policy area. -2. **Coalition Dashboard**: Generate `generateDashboardSection()` with ≥1 chart (party motion count) +2. **Coalition Dashboard**: Include at least one chart-ready summary in the article output (for example, party motion counts or thematic distribution), formatted as a clear Markdown table or bullet list; do not assume automatic dashboard rendering unless a separate workflow step explicitly parses and renders it. 3. **Quality Gate** (check before next iteration): - Verify opposition strategy section is substantive (not just party counts) - Verify no identical "Why It Matters" text across entries diff --git a/.github/workflows/news-propositions.md b/.github/workflows/news-propositions.md index ca81e25c65..eaca7e4ecc 100644 --- a/.github/workflows/news-propositions.md +++ b/.github/workflows/news-propositions.md @@ -232,9 +232,9 @@ Every analysis MUST include an **Election 2026 Implications** section assessing: ### Phase 2 — Iterative Depth Enhancement (repeat per `analysis_depth`) For each AI iteration: -1. **SWOT Analysis**: Generate `generateSwotSection()` with ≥3 stakeholder perspectives (≥5 when `analysis_depth` is `deep` or `comprehensive`) -2. **Policy Dashboard**: Generate `generateDashboardSection()` with ≥1 chart (≥2 for `deep`/`comprehensive`) -3. **Mindmap**: Generate `generateMindmapSection()` showing policy impact connections (only for `deep`/`comprehensive`) +1. **SWOT Analysis**: Write SWOT analysis with ≥3 stakeholder perspectives (≥5 when `analysis_depth` is `deep` or `comprehensive`) as publication-ready prose and bullet points +2. **Policy Comparison Summary**: Provide a concise markdown table or bullet list with ≥1 comparative policy metric set (≥2 for `deep`/`comprehensive`) suitable for later manual visualization if needed; do not assume any automatic chart rendering +3. **Impact Map**: For `deep`/`comprehensive`, describe policy impact connections as a nested markdown bullet list (mindmap-style) that can be published as text without requiring a renderer 4. **Quality Gate** (check before next iteration): - Verify legislative timeline is included per proposition - Verify no identical "Why It Matters" text across entries diff --git a/.github/workflows/news-realtime-monitor.md b/.github/workflows/news-realtime-monitor.md index 80258296ee..73880b22fe 100644 --- a/.github/workflows/news-realtime-monitor.md +++ b/.github/workflows/news-realtime-monitor.md @@ -1045,7 +1045,7 @@ For breaking news, this workflow uses the `breaking` profile (from `scripts/edit ### Phase 2 — Depth Enhancement (per `analysis_depth`) When `analysis_depth` is `deep` or `comprehensive`: 1. Add **Quick SWOT** paragraph for each major article -2. Add **Activity Chart** using `generateDashboardSection()` +2. Add **Activity Summary** — include a concise trend summary as prose or a simple Markdown bullet list/table (for example, recent item counts by time period or source). Do not emit a standalone machine-readable chart payload here unless the workflow explicitly defines the schema and downstream consumption step. 3. **Quality Gate**: word count ≥ 400, no identical why-it-matters, all Swedish text translated ### Phase 3 — Final Quality Gate Before PR diff --git a/.github/workflows/news-week-ahead.md b/.github/workflows/news-week-ahead.md index 906673769f..4259bcc9c9 100644 --- a/.github/workflows/news-week-ahead.md +++ b/.github/workflows/news-week-ahead.md @@ -205,7 +205,7 @@ Every analysis MUST include an **Election 2026 Implications** section assessing: ### Phase 2 — Depth Enhancement (for `deep`/`comprehensive` depth only) 1. **Quick SWOT**: 1-paragraph SWOT overview of the week's political balance -2. **Event Dashboard**: Generate `generateDashboardSection()` with ≥2 charts (committee meeting density, event type breakdown) +2. **Event Dashboard**: Provide concise summary data for ≥2 analytical views (committee meeting density, event type breakdown) as prose or markdown tables that can be included directly in the article without requiring any undocumented rendering pipeline 3. **Quality Gate**: - Verify watch-points are specific and actionable (not just event titles) - Verify all Swedish API text is translated diff --git a/.github/workflows/news-weekly-review.md b/.github/workflows/news-weekly-review.md index c6c6fbe14e..27a790c92b 100644 --- a/.github/workflows/news-weekly-review.md +++ b/.github/workflows/news-weekly-review.md @@ -207,9 +207,9 @@ Every analysis MUST include an **Election 2026 Implications** section assessing: ### Phase 2 — Iterative Depth Enhancement (repeat per `analysis_depth`) For each AI iteration: -1. **Condensed SWOT**: Generate `generateSwotSection()` with ≥3 stakeholder perspectives on the week's balance of power -2. **Week-in-Review Dashboard**: Generate `generateDashboardSection()` with ≥2 charts (activity by day, document type breakdown) -3. **Policy Mindmap**: Generate `generateMindmapSection()` showing how the week's stories interconnect +1. **Condensed SWOT**: Write SWOT analysis with ≥3 stakeholder perspectives on the week's balance of power, using clear markdown headings and bullets suitable for the standard SWOT extraction flow +2. **Week-in-Review Dashboard**: Provide ≥2 visualization-ready summaries (for example: activity by day and document type breakdown) with explicit labels, values, and short interpretation text; do not assume an interactive dashboard renderer unless a workflow-specific validated input format is defined +3. **Policy Mindmap**: Provide a structured outline showing how the week's stories interconnect (central topic + branches + sub-branches) in nested markdown bullets; do not assume a mindmap render pipeline unless a workflow-specific validated input format is defined 4. **Quality Gate** (check before next iteration): - Verify the article covers the actual past week (Mon–Fri), not a forecast - Verify voting analysis section includes specific vote outcomes diff --git a/knip.json b/knip.json index 15cecf6438..057f0c3ab8 100644 --- a/knip.json +++ b/knip.json @@ -5,7 +5,6 @@ "js/*.js", "dashboard/*.js", "scripts/*.ts", - "scripts/*.js", "scripts/news-types/*.ts", "scripts/committees-dashboard/*.ts", "scripts/coalition-dashboard/*.ts", @@ -19,7 +18,6 @@ "js/**/*.js", "dashboard/**/*.js", "scripts/**/*.ts", - "scripts/**/*.js", "tests/**/*.ts", "tests/**/*.js", "cypress/**/*.js" @@ -29,11 +27,9 @@ ], "ignoreDependencies": [ "htmlhint", - "js-yaml", - "playwright" + "js-yaml" ], "ignoreBinaries": [ - "playwright", "cypress", "python3", "linkinator" diff --git a/package.json b/package.json index b865edba2f..6ae6036057 100644 --- a/package.json +++ b/package.json @@ -75,7 +75,7 @@ "generate-sitemap": "node scripts/generate-sitemap.ts", "generate-sitemap-html": "npx tsx scripts/generate-sitemap-html.ts", "generate-rss": "npx tsx scripts/generate-rss.ts", - "generate-news-backport": "node scripts/generate-news-backport.ts", + "htmlhint": "sh -c 'htmlhint *.html; set -- news/*.html; if [ -e \"$1\" ]; then htmlhint \"$@\"; else echo \"No news/*.html files to lint\"; fi'", "lint": "eslint .", "lint:fix": "eslint . --fix", diff --git a/scripts/deep-inspection/index.ts b/scripts/deep-inspection/index.ts deleted file mode 100644 index d9e988f578..0000000000 --- a/scripts/deep-inspection/index.ts +++ /dev/null @@ -1,83 +0,0 @@ -/** - * @module deep-inspection - * @description DeepInspectionPipeline — thin programmatic entrypoint wrapper - * around `generateDeepInspection()`. - * - * The underlying generator performs collection, analysis, synthesis, and - * rendering internally. This class intentionally does not re-implement those - * phases; it only delegates execution and returns enriched run metadata. - * - * @example - * ```typescript - * const pipeline = new DeepInspectionPipeline(); - * const result = await pipeline.run(); - * ``` - * - * @author Hack23 AB - * @license Apache-2.0 - */ - -import { generateDeepInspection } from '../generate-news-enhanced/generators.js'; -import { analysisDepth, focusTopic } from '../generate-news-enhanced/config.js'; -import type { GenerationResult } from '../types/article.js'; - -/** - * Result produced by a pipeline run. - * Extends GenerationResult with optional depth and topic metadata. - */ -export interface DeepInspectionResult extends GenerationResult { - /** Effective analysis depth used. */ - depth: 1 | 2 | 3 | 4; - /** Focus topic if provided. */ - topic?: string; -} - -/** - * DeepInspectionPipeline delegates execution to - * `generateDeepInspection()` in generators.ts, which - * reads targeting parameters and `analysisDepth` from CLI config. When used - * programmatically via this class, those CLI values are already set at module - * load time — so `run()` simply invokes the generator and enriches the result. - * - * All targeting (document IDs, URLs) and analysis depth are controlled via CLI - * arguments parsed by `config.ts` at module load time. This class provides a - * clean programmatic entrypoint without duplicating CLI parameter handling. - */ -export class DeepInspectionPipeline { - /** - * Phase labels for logging purposes. - * @internal - */ - private phaseLabel(depth: 1 | 2 | 3 | 4): string { - const labels: Record<1 | 2 | 3 | 4, string> = { - 1: 'Surface analysis — events & actors', - 2: 'Predictive + historical context', - 3: 'Full report with executive summary & methodology', - 4: 'Full multi-iteration intelligence report', - }; - return labels[depth]; - } - - /** - * Run deep-inspection generation via the underlying generator wrapper. - * - * @returns DeepInspectionResult with success status, file count, and slug - */ - async run(): Promise { - const depth = analysisDepth; - const topic = focusTopic || undefined; - - console.log(`🔬 DeepInspectionPipeline starting — depth ${depth}: ${this.phaseLabel(depth)}`); - if (topic) console.log(` Topic: ${topic}`); - - const result = await generateDeepInspection(); - - return { - ...result, - depth, - topic, - }; - } -} - -export default DeepInspectionPipeline; diff --git a/scripts/extract-vocabulary.ts b/scripts/extract-vocabulary.ts deleted file mode 100644 index eee45f922c..0000000000 --- a/scripts/extract-vocabulary.ts +++ /dev/null @@ -1,299 +0,0 @@ -/** - * @module Intelligence/Terminology - * @category Intelligence Operations / Supporting Infrastructure - * @name Vocabulary Extraction - Political Terminology Pattern Analysis - * - * @description - * Advanced terminology extraction system analyzing translated news articles across - * all 14 supported languages to identify and catalog political terminology patterns. - * - * @author Hack23 AB (Linguistic Intelligence Team) - * @license Apache-2.0 - * @version 2.5.0 - */ - -import { readFileSync, readdirSync } from 'fs'; -import { basename, join } from 'path'; - -import type { Language } from './types/language.js'; - -// --------------------------------------------------------------------------- -// Types -// --------------------------------------------------------------------------- - -/** Map of language codes to language names */ -const LANGUAGES: Readonly> = { - en: 'English', sv: 'Swedish', da: 'Danish', no: 'Norwegian', fi: 'Finnish', - de: 'German', fr: 'French', es: 'Spanish', nl: 'Dutch', - ar: 'Arabic', he: 'Hebrew', ja: 'Japanese', ko: 'Korean', zh: 'Chinese', -}; - -interface ExtractedTerms { - titles?: string[]; - watchLabel?: string; - committeeLabel?: string; - documentLabel?: string; - mainTitle?: string; -} - -interface ArticleSample { - readonly file: string; - readonly type: string; - readonly terms: ExtractedTerms; -} - -interface LanguageResult { - readonly language: string; - readonly code: Language; - samples: ArticleSample[]; -} - -interface SkippedFile { - readonly file: string; - readonly reason: string; -} - -type AnalysisResults = Record; - -// Track skipped files for warning summary -const skippedFiles: SkippedFile[] = []; - -// --------------------------------------------------------------------------- -// Term extraction -// --------------------------------------------------------------------------- - -/** - * Extract political terms from HTML content using structure-based approach. - */ -function extractTerms(content: string, _lang: Language): ExtractedTerms { - const terms: ExtractedTerms = {}; - - // Extract titles (main political terminology) - const h3Pattern = /

(.*?)<\/h3>/g; - const h3Matches: string[] = []; - let h3Match: RegExpExecArray | null; - while ((h3Match = h3Pattern.exec(content)) !== null) { - const cleanText = h3Match[1]!.replace(/<[^>]+>/g, '').trim(); - if (cleanText) h3Matches.push(cleanText); - } - terms.titles = h3Matches.slice(0, 10); - - // Extract "What to Watch" heading (any language) - structure-based - const h2Pattern = /]*>([^<]+)<\/h2>/g; - const h2Matches: string[] = []; - let h2Match: RegExpExecArray | null; - while ((h2Match = h2Pattern.exec(content)) !== null) { - const text = h2Match[1]!.trim(); - if (text.length > 5 && text.length < 100) { - h2Matches.push(text); - } - } - if (h2Matches.length > 0) { - terms.watchLabel = h2Matches[0]; - } - - // Extract structured labels from …: (language-agnostic) - const strongLabelPattern = /\s*([^:<]+?)\s*:\s*<\/strong>/g; - const strongLabels: string[] = []; - let strongMatch: RegExpExecArray | null; - while ((strongMatch = strongLabelPattern.exec(content)) !== null) { - const label = strongMatch[1]!.trim(); - if (label.length > 0 && label.length < 50) { - strongLabels.push(label); - } - } - - if (strongLabels[0]) terms.committeeLabel = strongLabels[0]; - if (strongLabels[1]) terms.documentLabel = strongLabels[1]; - - // Extract article type from title - const titleMatch = content.match(/

([^<]+)<\/h1>/); - if (titleMatch) terms.mainTitle = titleMatch[1]!.trim(); - - return terms; -} - -// --------------------------------------------------------------------------- -// Article analysis -// --------------------------------------------------------------------------- - -/** - * Analyze all news articles. - */ -function analyzeArticles(directory: string = 'news', datePrefix: string | null = null): AnalysisResults { - const results: AnalysisResults = {} as AnalysisResults; - - for (const lang of Object.keys(LANGUAGES) as Language[]) { - results[lang] = { - language: LANGUAGES[lang], - code: lang, - samples: [], - }; - } - - try { - const files = readdirSync(directory).filter((f) => { - if (!f.endsWith('.html')) return false; - if (datePrefix && !f.includes(datePrefix)) return false; - return true; - }); - - console.log(`\nScanning ${files.length} HTML files in ${directory}/`); - if (datePrefix) { - console.log(`Filtering by date prefix: "${datePrefix}"\n`); - } - - for (const file of files) { - const match = file.match(/-([a-z]{2})\.html$/); - if (!match) { - skippedFiles.push({ file, reason: 'No language code in filename' }); - continue; - } - - const lang = match[1] as string; - if (!results[lang as Language]) { - skippedFiles.push({ file, reason: `Unknown language code: ${lang}` }); - continue; - } - - try { - const content = readFileSync(join(directory, file), 'utf-8'); - const terms = extractTerms(content, lang as Language); - - // Determine article type - let articleType = 'general'; - if (file.includes('committee')) articleType = 'committee-reports'; - else if (file.includes('proposition')) articleType = 'propositions'; - else if (file.includes('motion')) articleType = 'motions'; - else if (file.includes('evening')) articleType = 'evening-analysis'; - else if (file.includes('week-ahead')) articleType = 'week-ahead'; - - results[lang as Language].samples.push({ - file: basename(file), - type: articleType, - terms, - }); - } catch (error: unknown) { - skippedFiles.push({ file, reason: `Read error: ${(error as Error).message}` }); - } - } - } catch (error: unknown) { - console.error(`Error reading directory: ${(error as Error).message}`); - process.exit(1); - } - - return results; -} - -// --------------------------------------------------------------------------- -// Report -// --------------------------------------------------------------------------- - -/** - * Generate vocabulary report. - */ -function generateReport(results: AnalysisResults): void { - console.log('\n========================================'); - console.log('Political Vocabulary Analysis Report'); - console.log('========================================\n'); - - for (const [code, data] of Object.entries(results) as Array<[Language, LanguageResult]>) { - if (data.samples.length === 0) continue; - - console.log(`\n## ${data.language} (${code.toUpperCase()})`); - console.log(`Samples analyzed: ${data.samples.length}`); - - // Collect unique labels - const watchLabels = new Set(); - const committeeLabels = new Set(); - const documentLabels = new Set(); - const _mainTitles = new Set(); - - for (const sample of data.samples) { - if (sample.terms.watchLabel) watchLabels.add(sample.terms.watchLabel); - if (sample.terms.committeeLabel) committeeLabels.add(sample.terms.committeeLabel); - if (sample.terms.documentLabel) documentLabels.add(sample.terms.documentLabel); - if (sample.terms.mainTitle) _mainTitles.add(sample.terms.mainTitle); - } - - if (watchLabels.size > 0) console.log(` "What to Watch": ${Array.from(watchLabels).join(', ')}`); - if (committeeLabels.size > 0) console.log(` "Committee": ${Array.from(committeeLabels).join(', ')}`); - if (documentLabels.size > 0) console.log(` "Document": ${Array.from(documentLabels).join(', ')}`); - - // Show sample titles from any articles (prioritize committee reports) - const committeeReports = data.samples.filter((s) => s.type === 'committee-reports'); - const sampleWithTitles = - committeeReports.find((s) => s.terms.titles && s.terms.titles.length > 0) || - data.samples.find((s) => s.terms.titles && s.terms.titles.length > 0); - - if (sampleWithTitles && sampleWithTitles.terms.titles && sampleWithTitles.terms.titles.length > 0) { - console.log(` Sample titles: ${sampleWithTitles.terms.titles.slice(0, 3).join(', ')}`); - } - } - - // Warning summary - if (skippedFiles.length > 0) { - console.log('\n\n⚠️ WARNING: Skipped Files Summary'); - console.log('====================================='); - console.log(`Total skipped: ${skippedFiles.length}\n`); - - // Group by reason - const byReason: Record = {}; - for (const { file, reason } of skippedFiles) { - if (!byReason[reason]) byReason[reason] = []; - byReason[reason].push(file); - } - - for (const [reason, files] of Object.entries(byReason)) { - console.log(`${reason}: ${files.length} file(s)`); - if (files.length <= 5) { - files.forEach((f) => console.log(` - ${f}`)); - } else { - files.slice(0, 3).forEach((f) => console.log(` - ${f}`)); - console.log(` ... and ${files.length - 3} more`); - } - console.log(); - } - } - - console.log('\n========================================'); - console.log('Analysis complete!'); - console.log('========================================\n'); -} - -// --------------------------------------------------------------------------- -// CLI -// --------------------------------------------------------------------------- - -const args = process.argv.slice(2); -let datePrefix: string | null = null; -let directory = 'news'; - -for (let i = 0; i < args.length; i++) { - if (args[i] === '--date-prefix' && args[i + 1]) { - datePrefix = args[i + 1]!; - i++; - } else if (args[i] === '--directory' && args[i + 1]) { - directory = args[i + 1]!; - i++; - } else if (args[i] === '--help' || args[i] === '-h') { - console.log(` -Usage: node scripts/extract-vocabulary.js [options] - -Options: - --date-prefix Filter files by date prefix (e.g., "2026-02-") - --directory Directory to scan (default: "news") - --help, -h Show this help message - -Examples: - node scripts/extract-vocabulary.js - node scripts/extract-vocabulary.js --date-prefix 2026-02- - node scripts/extract-vocabulary.js --directory news --date-prefix 2026-03- -`); - process.exit(0); - } -} - -// Run analysis -const results = analyzeArticles(directory, datePrefix); -generateReport(results); diff --git a/scripts/fix-keywords-localization.ts b/scripts/fix-keywords-localization.ts deleted file mode 100644 index 2860a26aaa..0000000000 --- a/scripts/fix-keywords-localization.ts +++ /dev/null @@ -1,299 +0,0 @@ -/** - * Script to localize meta keywords in non-English news articles. - * Translates English keywords to the target language for all non-EN articles. - * - * Processes: - * - in each non-English article - * - "keywords": "..." in JSON-LD structured data - * - * Keywords that have no translation in the map (proper nouns, specific terms) - * are left as-is (English fallback is acceptable for those). - * - * Usage: npx tsx scripts/fix-keywords-localization.ts [--dry-run] - * - * @author Hack23 AB - * @license Apache-2.0 - */ - -import * as fs from 'fs'; -import * as path from 'path'; - -// SEO keyword translations — kept in sync with scripts/data-transformers/metadata.ts -// Maps English keyword strings to their localized equivalents for all 13 non-EN languages. -const SEO_KEYWORD_TRANSLATIONS: Record> = { - 'parliament': { sv: 'riksdag', da: 'parlament', no: 'parlament', fi: 'eduskunta', de: 'parlament', fr: 'parlement', es: 'parlamento', nl: 'parlement', ar: 'برلمان', he: 'פרלמנט', ja: '議会', ko: '의회', zh: '议会' }, - 'Swedish Parliament': { sv: 'Riksdagen', da: 'Svensk Parlament', no: 'Svensk Parlament', fi: 'Ruotsin Eduskunta', de: 'Schwedisches Parlament', fr: 'Parlement Suédois', es: 'Parlamento Sueco', nl: 'Zweeds Parlement', ar: 'البرلمان السويدي', he: 'הפרלמנט השבדי', ja: 'スウェーデン議会', ko: '스웨덴 의회', zh: '瑞典议会' }, - 'Sweden': { sv: 'Sverige', da: 'Sverige', no: 'Sverige', fi: 'Ruotsi', de: 'Schweden', fr: 'Suède', es: 'Suecia', nl: 'Zweden', ar: 'السويد', he: 'שבדיה', ja: 'スウェーデン', ko: '스웨덴', zh: '瑞典' }, - 'politics': { sv: 'politik', da: 'politik', no: 'politikk', fi: 'politiikka', de: 'politik', fr: 'politique', es: 'política', nl: 'politiek', ar: 'سياسة', he: 'פוליטיקה', ja: '政治', ko: '정치', zh: '政治' }, - 'week ahead': { sv: 'veckan framåt', da: 'ugen forude', no: 'uken fremover', fi: 'tuleva viikko', de: 'kommende woche', fr: 'semaine à venir', es: 'semana próxima', nl: 'week vooruit', ar: 'الأسبوع القادم', he: 'השבוע הקרוב', ja: '来週の展望', ko: '다음 주', zh: '下周展望' }, - 'month ahead': { sv: 'månaden framåt', da: 'måneden forude', no: 'måneden fremover', fi: 'tuleva kuukausi', de: 'kommender monat', fr: 'mois à venir', es: 'mes próximo', nl: 'maand vooruit', ar: 'الشهر القادم', he: 'החודש הקרוב', ja: '来月の展望', ko: '다음 달', zh: '下月展望' }, - 'calendar': { sv: 'kalender', da: 'kalender', no: 'kalender', fi: 'kalenteri', de: 'kalender', fr: 'calendrier', es: 'calendario', nl: 'kalender', ar: 'تقويم', he: 'לוח שנה', ja: 'カレンダー', ko: '일정', zh: '日历' }, - 'events': { sv: 'händelser', da: 'begivenheder', no: 'hendelser', fi: 'tapahtumat', de: 'ereignisse', fr: 'événements', es: 'eventos', nl: 'evenementen', ar: 'أحداث', he: 'אירועים', ja: '出来事', ko: '이벤트', zh: '事件' }, - 'committee': { sv: 'utskott', da: 'udvalg', no: 'komité', fi: 'valiokunta', de: 'ausschuss', fr: 'commission', es: 'comisión', nl: 'commissie', ar: 'لجنة', he: 'ועדה', ja: '委員会', ko: '위원회', zh: '委员会' }, - 'committees': { sv: 'utskott', da: 'udvalg', no: 'komiteer', fi: 'valiokunnat', de: 'ausschüsse', fr: 'commissions', es: 'comisiones', nl: 'commissies', ar: 'لجان', he: 'ועדות', ja: '委員会', ko: '위원회들', zh: '委员会' }, - 'reports': { sv: 'betänkanden', da: 'betænkninger', no: 'innstillinger', fi: 'mietinnöt', de: 'berichte', fr: 'rapports', es: 'informes', nl: 'rapporten', ar: 'تقارير', he: 'דוחות', ja: '報告書', ko: '보고서', zh: '报告' }, - 'betänkanden': { sv: 'betänkanden', da: 'betænkninger', no: 'innstillinger', fi: 'mietinnöt', de: 'parlamentsberichte', fr: 'rapports parlementaires', es: 'informes parlamentarios', nl: 'parlementaire rapporten', ar: 'تقارير برلمانية', he: 'דוחות פרלמנטריים', ja: '議会報告書', ko: '의회 보고서', zh: '议会报告' }, - 'government': { sv: 'regering', da: 'regering', no: 'regjering', fi: 'hallitus', de: 'regierung', fr: 'gouvernement', es: 'gobierno', nl: 'regering', ar: 'حكومة', he: 'ממשלה', ja: '政府', ko: '정부', zh: '政府' }, - 'propositions': { sv: 'propositioner', da: 'lovforslag', no: 'proposisjoner', fi: 'esitykset', de: 'gesetzentwürfe', fr: 'propositions de loi', es: 'proposiciones', nl: 'wetsvoorstellen', ar: 'مقترحات', he: 'הצעות חוק', ja: '法律案', ko: '법률안', zh: '提案' }, - 'legislation': { sv: 'lagstiftning', da: 'lovgivning', no: 'lovgivning', fi: 'lainsäädäntö', de: 'gesetzgebung', fr: 'législation', es: 'legislación', nl: 'wetgeving', ar: 'تشريع', he: 'חקיקה', ja: '立法', ko: '법률', zh: '立法' }, - 'motions': { sv: 'motioner', da: 'forslag', no: 'forslag', fi: 'aloitteet', de: 'anträge', fr: 'motions', es: 'mociones', nl: 'moties', ar: 'اقتراحات', he: 'הצעות', ja: '動議', ko: '동의', zh: '动议' }, - 'opposition': { sv: 'opposition', da: 'opposition', no: 'opposisjon', fi: 'oppositio', de: 'opposition', fr: 'opposition', es: 'oposición', nl: 'oppositie', ar: 'معارضة', he: 'אופוזיציה', ja: '野党', ko: '야당', zh: '反对派' }, - 'proposals': { sv: 'förslag', da: 'forslag', no: 'forslag', fi: 'ehdotukset', de: 'vorschläge', fr: 'propositions', es: 'propuestas', nl: 'voorstellen', ar: 'مقترحات', he: 'הצעות', ja: '提案', ko: '제안', zh: '提案' }, - 'outlook': { sv: 'utsikter', da: 'udsigt', no: 'utsikter', fi: 'näkymät', de: 'ausblick', fr: 'perspectives', es: 'perspectivas', nl: 'vooruitzichten', ar: 'توقعات', he: 'תחזית', ja: '見通し', ko: '전망', zh: '展望' }, - 'weekly review': { sv: 'veckans sammanfattning', da: 'ugentlig gennemgang', no: 'ukentlig gjennomgang', fi: 'viikkokatsaus', de: 'wochenbericht', fr: 'bilan hebdomadaire', es: 'revisión semanal', nl: 'wekelijks overzicht', ar: 'مراجعة أسبوعية', he: 'סקירה שבועית', ja: '週間レビュー', ko: '주간 리뷰', zh: '每周回顾' }, - 'monthly review': { sv: 'månadens sammanfattning', da: 'månedlig gennemgang', no: 'månedlig gjennomgang', fi: 'kuukausikatsaus', de: 'monatsbericht', fr: 'bilan mensuel', es: 'revisión mensual', nl: 'maandelijks overzicht', ar: 'مراجعة شهرية', he: 'סקירה חודשית', ja: '月間レビュー', ko: '월간 리뷰', zh: '每月回顾' }, - 'analysis': { sv: 'analys', da: 'analyse', no: 'analyse', fi: 'analyysi', de: 'analyse', fr: 'analyse', es: 'análisis', nl: 'analyse', ar: 'تحليل', he: 'ניתוח', ja: '分析', ko: '분석', zh: '分析' }, - 'recap': { sv: 'sammanfattning', da: 'resumé', no: 'oppsummering', fi: 'yhteenveto', de: 'zusammenfassung', fr: 'récapitulatif', es: 'resumen', nl: 'samenvatting', ar: 'ملخص', he: 'סיכום', ja: 'まとめ', ko: '요약', zh: '总结' }, - 'breaking news': { sv: 'senaste nytt', da: 'seneste nyt', no: 'siste nytt', fi: 'viimeisimmät uutiset', de: 'Eilmeldung', fr: 'dernières nouvelles', es: 'noticias de última hora', nl: 'laatste nieuws', ar: 'أخبار عاجلة', he: 'חדשות אחרונות', ja: '速報', ko: '속보', zh: '突发新闻' }, - 'urgent': { sv: 'brådskande', da: 'presserende', no: 'haster', fi: 'kiireellinen', de: 'dringend', fr: 'urgent', es: 'urgente', nl: 'dringend', ar: 'عاجل', he: 'דחוף', ja: '緊急', ko: '긴급', zh: '紧急' }, - 'alert': { sv: 'varning', da: 'advarsel', no: 'varsel', fi: 'hälytys', de: 'warnung', fr: 'alerte', es: 'alerta', nl: 'waarschuwing', ar: 'تنبيه', he: 'התראה', ja: '警告', ko: '경보', zh: '警告' }, - 'debates': { sv: 'debatter', da: 'debatter', no: 'debatter', fi: 'keskustelut', de: 'debatten', fr: 'débats', es: 'debates', nl: 'debatten', ar: 'مناقشات', he: 'דיונים', ja: '討論', ko: '토론', zh: '辩论' }, - // Additional compound keywords that appear frequently in article metadata - 'evening analysis': { sv: 'kvällsanalys', da: 'aftenanalyse', no: 'kveldsanalyse', fi: 'ilta-analyysi', de: 'abendanalyse', fr: 'analyse du soir', es: 'análisis vespertino', nl: 'avondanalyse', ar: 'تحليل مسائي', he: 'ניתוח ערב', ja: '夜の分析', ko: '저녁 분석', zh: '晚间分析' }, - 'morning briefing': { sv: 'morgonbriefing', da: 'morgenbriefing', no: 'morgenbriefing', fi: 'aamuinfo', de: 'morgenbriefing', fr: 'briefing matinal', es: 'informe matutino', nl: 'ochtendbriefing', ar: 'إحاطة صباحية', he: 'תדריך בוקר', ja: '朝のブリーフィング', ko: '아침 브리핑', zh: '早间简报' }, - 'committee reports': { sv: 'utskottsbetänkanden', da: 'udvalgsbetænkninger', no: 'komitéinnstillinger', fi: 'valiokuntamietinnöt', de: 'ausschussberichte', fr: 'rapports de commission', es: 'informes de comisión', nl: 'commissierapporten', ar: 'تقارير اللجان', he: 'דוחות ועדה', ja: '委員会報告書', ko: '위원회 보고서', zh: '委员会报告' }, - 'Government Propositions': { sv: 'Regeringspropositioner', da: 'Lovforslag fra Regeringen', no: 'Regjeringens proposisjoner', fi: 'Hallituksen esitykset', de: 'Regierungsvorlagen', fr: 'Propositions gouvernementales', es: 'Proposiciones gubernamentales', nl: 'Regeringsvoorstellen', ar: 'مقترحات الحكومة', he: 'הצעות ממשלה', ja: '政府法律案', ko: '정부 법률안', zh: '政府提案' }, - 'Opposition Motions': { sv: 'Oppositionsmotioner', da: 'Oppositionsforslag', no: 'Opposisjonsforslag', fi: 'Oppositioaloitteet', de: 'Oppositionsanträge', fr: "Motions de l'opposition", es: 'Mociones de la oposición', nl: 'Oppositiemoties', ar: 'اقتراحات المعارضة', he: 'הצעות האופוזיציה', ja: '野党動議', ko: '야당 동의', zh: '反对党动议' }, - 'parliamentary questions': { sv: 'parlamentariska frågor', da: 'parlamentariske spørgsmål', no: 'parlamentariske spørsmål', fi: 'parlamenttikyselyt', de: 'parlamentarische anfragen', fr: 'questions parlementaires', es: 'preguntas parlamentarias', nl: 'parlementaire vragen', ar: 'أسئلة برلمانية', he: 'שאלות פרלמנטריות', ja: '国会質問', ko: '의회 질문', zh: '议会质询' }, - 'interpellations': { sv: 'interpellationer', da: 'interpellationer', no: 'interpellasjoner', fi: 'interpellaatiot', de: 'interpellationen', fr: 'interpellations', es: 'interpelaciones', nl: 'interpellaties', ar: 'استجوابات', he: 'אינטרפלציות', ja: '質問主意書', ko: '대정부질문', zh: '质询' }, - 'defence': { sv: 'försvar', da: 'forsvar', no: 'forsvar', fi: 'puolustus', de: 'verteidigung', fr: 'défense', es: 'defensa', nl: 'defensie', ar: 'الدفاع', he: 'ביטחון', ja: '防衛', ko: '방위', zh: '国防' }, - 'defense': { sv: 'försvar', da: 'forsvar', no: 'forsvar', fi: 'puolustus', de: 'verteidigung', fr: 'défense', es: 'defensa', nl: 'defensie', ar: 'الدفاع', he: 'ביטחון', ja: '防衛', ko: '방위', zh: '国防' }, - 'security': { sv: 'säkerhet', da: 'sikkerhed', no: 'sikkerhet', fi: 'turvallisuus', de: 'sicherheit', fr: 'sécurité', es: 'seguridad', nl: 'veiligheid', ar: 'الأمن', he: 'ביטחון', ja: 'セキュリティ', ko: '보안', zh: '安全' }, - 'foreign policy': { sv: 'utrikespolitik', da: 'udenrigspolitik', no: 'utenrikspolitikk', fi: 'ulkopolitiikka', de: 'außenpolitik', fr: 'politique étrangère', es: 'política exterior', nl: 'buitenlands beleid', ar: 'السياسة الخارجية', he: 'מדיניות חוץ', ja: '外交政策', ko: '외교 정책', zh: '外交政策' }, - 'migration': { sv: 'migration', da: 'migration', no: 'migrasjon', fi: 'maahanmuutto', de: 'migration', fr: 'migration', es: 'migración', nl: 'migratie', ar: 'الهجرة', he: 'הגירה', ja: '移民', ko: '이민', zh: '移民' }, - 'energy': { sv: 'energi', da: 'energi', no: 'energi', fi: 'energia', de: 'energie', fr: 'énergie', es: 'energía', nl: 'energie', ar: 'الطاقة', he: 'אנרגיה', ja: 'エネルギー', ko: '에너지', zh: '能源' }, - 'healthcare': { sv: 'sjukvård', da: 'sundhed', no: 'helse', fi: 'terveydenhuolto', de: 'gesundheit', fr: 'santé', es: 'sanidad', nl: 'gezondheidszorg', ar: 'الرعاية الصحية', he: 'בריאות', ja: '医療', ko: '의료', zh: '医疗' }, - 'education': { sv: 'utbildning', da: 'uddannelse', no: 'utdanning', fi: 'koulutus', de: 'bildung', fr: 'éducation', es: 'educación', nl: 'onderwijs', ar: 'التعليم', he: 'חינוך', ja: '教育', ko: '교육', zh: '教育' }, - 'economy': { sv: 'ekonomi', da: 'økonomi', no: 'økonomi', fi: 'talous', de: 'wirtschaft', fr: 'économie', es: 'economía', nl: 'economie', ar: 'الاقتصاد', he: 'כלכלה', ja: '経済', ko: '경제', zh: '经济' }, - 'justice': { sv: 'rättsväsende', da: 'retsvæsen', no: 'rettsvesen', fi: 'oikeus', de: 'justiz', fr: 'justice', es: 'justicia', nl: 'justitie', ar: 'العدالة', he: 'משפט', ja: '司法', ko: '사법', zh: '司法' }, - 'welfare': { sv: 'välfärd', da: 'velfærd', no: 'velferd', fi: 'sosiaaliturva', de: 'sozialpolitik', fr: 'protection sociale', es: 'bienestar social', nl: 'sociale zekerheid', ar: 'الرعاية الاجتماعية', he: 'רווחה', ja: '社会保障', ko: '사회복지', zh: '社会保障' }, - 'environment': { sv: 'miljö', da: 'miljø', no: 'miljø', fi: 'ympäristö', de: 'umwelt', fr: 'environnement', es: 'medio ambiente', nl: 'milieu', ar: 'البيئة', he: 'סביבה', ja: '環境', ko: '환경', zh: '环境' }, - 'climate': { sv: 'klimat', da: 'klima', no: 'klima', fi: 'ilmasto', de: 'klima', fr: 'climat', es: 'clima', nl: 'klimaat', ar: 'المناخ', he: 'אקלים', ja: '気候', ko: '기후', zh: '气候' }, - 'housing': { sv: 'bostäder', da: 'boliger', no: 'boliger', fi: 'asuminen', de: 'wohnungsbau', fr: 'logement', es: 'vivienda', nl: 'huisvesting', ar: 'الإسكان', he: 'דיור', ja: '住宅', ko: '주택', zh: '住房' }, - 'taxation': { sv: 'beskattning', da: 'beskatning', no: 'beskatning', fi: 'verotus', de: 'besteuerung', fr: 'fiscalité', es: 'tributación', nl: 'belasting', ar: 'الضرائب', he: 'מיסוי', ja: '課税', ko: '과세', zh: '税收' }, - 'budget': { sv: 'budget', da: 'budget', no: 'budsjett', fi: 'budjetti', de: 'haushalt', fr: 'budget', es: 'presupuesto', nl: 'begroting', ar: 'الميزانية', he: 'תקציב', ja: '予算', ko: '예산', zh: '预算' }, - // Topic-specific keywords found in existing articles - 'artificial intelligence': { sv: 'artificiell intelligens', da: 'kunstig intelligens', no: 'kunstig intelligens', fi: 'tekoäly', de: 'künstliche intelligenz', fr: 'intelligence artificielle', es: 'inteligencia artificial', nl: 'kunstmatige intelligentie', ar: 'الذكاء الاصطناعي', he: 'בינה מלאכותית', ja: '人工知能', ko: '인공지능', zh: '人工智能' }, - 'bidragsreform': { sv: 'bidragsreform', da: 'bidragsreform', no: 'bidragsreform', fi: 'tukiuudistus', de: 'sozialleistungsreform', fr: 'réforme des prestations sociales', es: 'reforma de prestaciones sociales', nl: 'bijstandshervorming', ar: 'إصلاح الإعانات الاجتماعية', he: 'רפורמת הקצבאות', ja: '給付金制度改革', ko: '급여 제도 개혁', zh: '福利补贴改革' }, - 'citizenship': { sv: 'medborgarskap', da: 'statsborgerskab', no: 'statsborgerskap', fi: 'kansalaisuus', de: 'staatsbürgerschaft', fr: 'citoyenneté', es: 'ciudadanía', nl: 'burgerschap', ar: 'المواطنة', he: 'אזרחות', ja: '市民権', ko: '시민권', zh: '公民身份' }, - 'civilian defence': { sv: 'civilförsvar', da: 'civilbeskyttelse', no: 'sivil beredskap', fi: 'siviilisuojelu', de: 'zivilschutz', fr: 'défense civile', es: 'defensa civil', nl: 'civiele bescherming', ar: 'الدفاع المدني', he: 'הגנה אזרחית', ja: '民間防衛', ko: '민방위', zh: '民防' }, - 'coalition government': { sv: 'koalitionsregering', da: 'koalitionsregering', no: 'koalisjonsregjering', fi: 'koalitiohallitus', de: 'koalitionsregierung', fr: 'gouvernement de coalition', es: 'gobierno de coalición', nl: 'coalitieregering', ar: 'حكومة ائتلافية', he: 'ממשלת קואליציה', ja: '連立政権', ko: '연립정부', zh: '联合政府' }, - 'competition': { sv: 'konkurrens', da: 'konkurrence', no: 'konkurranse', fi: 'kilpailu', de: 'wettbewerb', fr: 'concurrence', es: 'competencia', nl: 'concurrentie', ar: 'المنافسة', he: 'תחרות', ja: '競争', ko: '경쟁', zh: '竞争' }, - 'consumer protection': { sv: 'konsumentskydd', da: 'forbrugerbeskyttelse', no: 'forbrukervern', fi: 'kuluttajansuoja', de: 'verbraucherschutz', fr: 'protection des consommateurs', es: 'protección al consumidor', nl: 'consumentenbescherming', ar: 'حماية المستهلك', he: 'הגנת הצרכן', ja: '消費者保護', ko: '소비자 보호', zh: '消费者保护' }, - 'criminal justice': { sv: 'rättsväsende', da: 'strafferet', no: 'strafferett', fi: 'rikosoikeus', de: 'strafjustiz', fr: 'justice pénale', es: 'justicia penal', nl: 'strafrechtpleging', ar: 'العدالة الجنائية', he: 'משפט פלילי', ja: '刑事司法', ko: '형사 사법', zh: '刑事司法' }, - 'data protection': { sv: 'dataskydd', da: 'databeskyttelse', no: 'personvern', fi: 'tietosuoja', de: 'datenschutz', fr: 'protection des données', es: 'protección de datos', nl: 'gegevensbescherming', ar: 'حماية البيانات', he: 'הגנת מידע', ja: 'データ保護', ko: '데이터 보호', zh: '数据保护' }, - 'diplomacy': { sv: 'diplomati', da: 'diplomati', no: 'diplomati', fi: 'diplomatia', de: 'diplomatie', fr: 'diplomatie', es: 'diplomacia', nl: 'diplomatie', ar: 'الدبلوماسية', he: 'דיפלומטיה', ja: '外交', ko: '외교', zh: '外交' }, - 'discrimination': { sv: 'diskriminering', da: 'diskrimination', no: 'diskriminering', fi: 'syrjintä', de: 'diskriminierung', fr: 'discrimination', es: 'discriminación', nl: 'discriminatie', ar: 'التمييز', he: 'אפליה', ja: '差別', ko: '차별', zh: '歧视' }, - 'ecosystem collapse': { sv: 'ekosystemkollaps', da: 'økosystemkollaps', no: 'økosystemkollaps', fi: 'ekosysteemin romahtaminen', de: 'ökosystemzusammenbruch', fr: "effondrement de l'écosystème", es: 'colapso del ecosistema', nl: 'ecosysteeminstorting', ar: 'انهيار النظام البيئي', he: 'קריסת המערכת האקולוגית', ja: '生態系崩壊', ko: '생태계 붕괴', zh: '生态系统崩溃' }, - 'employer contributions': { sv: 'arbetsgivaravgifter', da: 'arbejdsgiverbidrag', no: 'arbeidsgiveravgift', fi: 'työnantajamaksut', de: 'arbeitgeberbeiträge', fr: "cotisations patronales", es: 'cotizaciones empresariales', nl: 'werkgeversbijdragen', ar: 'اشتراكات أصحاب العمل', he: 'דמי ביטוח לאומי מעסיק', ja: '雇用者負担', ko: '고용주 부담금', zh: '雇主缴款' }, - 'explosives control': { sv: 'explosivkontroll', da: 'eksplosivkontrol', no: 'eksplosivkontroll', fi: 'räjähdysaineiden valvonta', de: 'sprengstoffkontrolle', fr: 'contrôle des explosifs', es: 'control de explosivos', nl: 'explosiefbeheersing', ar: 'التحكم في المتفجرات', he: 'פיקוח על חומרי נפץ', ja: '爆発物管理', ko: '폭발물 통제', zh: '爆炸物管控' }, - 'food reserves': { sv: 'livsmedelsreserver', da: 'fødevarereserver', no: 'matreserver', fi: 'elintarvikevarannot', de: 'nahrungsmittelreserven', fr: 'réserves alimentaires', es: 'reservas alimentarias', nl: 'voedselvoorraden', ar: 'احتياطيات الغذاء', he: 'מלאי מזון', ja: '食料備蓄', ko: '식량 비축', zh: '粮食储备' }, - 'foreign affairs': { sv: 'utrikesfrågor', da: 'udenrigsanliggender', no: 'utenrikssaker', fi: 'ulkoasiat', de: 'außenangelegenheiten', fr: 'affaires étrangères', es: 'asuntos exteriores', nl: 'buitenlandse zaken', ar: 'الشؤون الخارجية', he: 'ענייני חוץ', ja: '外交問題', ko: '외교 문제', zh: '外交事务' }, - 'foreign policy debate': { sv: 'utrikespolitisk debatt', da: 'udenrigspolitisk debat', no: 'utenrikspolitisk debatt', fi: 'ulkopoliittinen debatti', de: 'außenpolitische debatte', fr: 'débat de politique étrangère', es: 'debate de política exterior', nl: 'debat buitenlands beleid', ar: 'نقاش السياسة الخارجية', he: 'דיון מדיניות חוץ', ja: '外交政策論争', ko: '외교정책 토론', zh: '外交政策辩论' }, - 'gang criminalization': { sv: 'gängkriminalisering', da: 'bandekriminalitet', no: 'gjengkriminalisering', fi: 'jengirikollisuu', de: 'bandenkriminalität', fr: 'criminalité des gangs', es: 'criminalidad de bandas', nl: 'gangcriminaliteit', ar: 'تجريم العصابات', he: 'עבריינות כנופיות', ja: 'ギャング犯罪化', ko: '갱 범죄화', zh: '帮派犯罪' }, - 'government policy': { sv: 'regeringspolitik', da: 'regeringspolitik', no: 'regjeringspolitikk', fi: 'hallituspolitiikka', de: 'regierungspolitik', fr: 'politique gouvernementale', es: 'política gubernamental', nl: 'regeringsbeleid', ar: 'السياسة الحكومية', he: 'מדיניות ממשלה', ja: '政府方針', ko: '정부 정책', zh: '政府政策' }, - 'housing cooperatives': { sv: 'bostadsrättsföreningar', da: 'boligforeninger', no: 'borettslag', fi: 'asunto-osuuskunnat', de: 'wohnungsbaugenossenschaften', fr: 'coopératives de logement', es: 'cooperativas de vivienda', nl: 'woningcorporaties', ar: 'تعاونيات الإسكان', he: 'אגודות שיתופיות לדיור', ja: '住宅協同組合', ko: '주택 협동조합', zh: '住房合作社' }, - 'immigration reform': { sv: 'migrationspolitisk reform', da: 'immigrationsreform', no: 'innvandringsreform', fi: 'maahanmuuttouudistus', de: 'einwanderungsreform', fr: "réforme de l'immigration", es: 'reforma migratoria', nl: 'immigratiereform', ar: 'إصلاح الهجرة', he: 'רפורמת ההגירה', ja: '移民改革', ko: '이민 개혁', zh: '移民改革' }, - 'interpellation': { sv: 'interpellation', da: 'interpellation', no: 'interpellasjon', fi: 'interpellaatio', de: 'interpellation', fr: 'interpellation', es: 'interpelación', nl: 'interpellatie', ar: 'استجواب', he: 'אינטרפלציה', ja: '質問主意書', ko: '대정부질문', zh: '质询' }, - 'labour immigration': { sv: 'arbetskraftsinvandring', da: 'arbejdskraftsindvandring', no: 'arbeidskraftinnvandring', fi: 'työvoimamaahanmuutto', de: 'arbeitskräftemigration', fr: "immigration de main-d'œuvre", es: 'inmigración laboral', nl: 'arbeidsmigratie', ar: 'هجرة العمالة', he: 'הגירת עבודה', ja: '労働移民', ko: '노동 이민', zh: '劳动力移民' }, - 'labour reform': { sv: 'arbetsmarknadsreform', da: 'arbejdsmarkedsreform', no: 'arbeidsmarkedsreform', fi: 'työmarkkinauudistus', de: 'arbeitsmarktreform', fr: 'réforme du travail', es: 'reforma laboral', nl: 'arbeidsmarkthervorming', ar: 'إصلاح سوق العمل', he: 'רפורמת שוק העבודה', ja: '労働改革', ko: '노동 개혁', zh: '劳动改革' }, - 'legislative session': { sv: 'riksmöte', da: 'parlamentssamling', no: 'stortingssesjon', fi: 'istuntokausi', de: 'legislaturperiode', fr: 'session législative', es: 'sesión legislativa', nl: 'wetgevingssessie', ar: 'دورة تشريعية', he: 'מושב חקיקה', ja: '立法会期', ko: '입법 회기', zh: '立法会期' }, - 'military aid': { sv: 'militärt bistånd', da: 'militær støtte', no: 'militær støtte', fi: 'sotilaallinen tuki', de: 'militärhilfe', fr: 'aide militaire', es: 'ayuda militar', nl: 'militaire steun', ar: 'المساعدات العسكرية', he: 'סיוע צבאי', ja: '軍事支援', ko: '군사 지원', zh: '军事援助' }, - 'narcotics': { sv: 'narkotika', da: 'narkotika', no: 'narkotika', fi: 'huumeet', de: 'betäubungsmittel', fr: 'stupéfiants', es: 'narcóticos', nl: 'verdovende middelen', ar: 'المخدرات', he: 'סמים', ja: '麻薬', ko: '마약', zh: '麻醉药品' }, - 'national security': { sv: 'nationell säkerhet', da: 'national sikkerhed', no: 'nasjonal sikkerhet', fi: 'kansallinen turvallisuus', de: 'nationale sicherheit', fr: 'sécurité nationale', es: 'seguridad nacional', nl: 'nationale veiligheid', ar: 'الأمن الوطني', he: 'ביטחון לאומי', ja: '国家安全保障', ko: '국가 안보', zh: '国家安全' }, - 'nuclear energy': { sv: 'kärnkraft', da: 'kernekraft', no: 'kjernekraft', fi: 'ydinvoima', de: 'kernenergie', fr: 'énergie nucléaire', es: 'energía nuclear', nl: 'kernenergie', ar: 'الطاقة النووية', he: 'אנרגיה גרעינית', ja: '原子力エネルギー', ko: '핵에너지', zh: '核能' }, - 'organised crime': { sv: 'organiserad brottslighet', da: 'organiseret kriminalitet', no: 'organisert kriminalitet', fi: 'järjestäytynyt rikollisuus', de: 'organisierte kriminalität', fr: 'crime organisé', es: 'crimen organizado', nl: 'georganiseerde misdaad', ar: 'الجريمة المنظمة', he: 'פשע מאורגן', ja: '組織犯罪', ko: '조직 범죄', zh: '有组织犯罪' }, - 'parental leave': { sv: 'föräldraledighet', da: 'forældreorlov', no: 'foreldrepermisjon', fi: 'vanhempainvapaa', de: 'elternzeit', fr: 'congé parental', es: 'permiso parental', nl: 'ouderschapsverlof', ar: 'إجازة الوالدين', he: 'חופשת הורים', ja: '育児休暇', ko: '육아 휴직', zh: '育儿假' }, - 'political risk': { sv: 'politisk risk', da: 'politisk risiko', no: 'politisk risiko', fi: 'poliittinen riski', de: 'politisches risiko', fr: 'risque politique', es: 'riesgo político', nl: 'politiek risico', ar: 'المخاطر السياسية', he: 'סיכון פוליטי', ja: '政治リスク', ko: '정치적 위험', zh: '政治风险' }, - 'preparedness': { sv: 'beredskap', da: 'beredskab', no: 'beredskap', fi: 'varautuminen', de: 'krisenvorsorge', fr: 'préparation aux crises', es: 'preparación ante crisis', nl: 'crisisparaatheid', ar: 'الاستعداد', he: 'מוכנות', ja: '危機対応準備', ko: '대비 태세', zh: '应急准备' }, - 'press freedom': { sv: 'pressfrihet', da: 'pressefrihed', no: 'pressefrihet', fi: 'lehdistönvapaus', de: 'pressefreiheit', fr: 'liberté de la presse', es: 'libertad de prensa', nl: 'persvrijheid', ar: 'حرية الصحافة', he: 'חופש העיתונות', ja: '報道の自由', ko: '언론 자유', zh: '新闻自由' }, - 'psychological violence': { sv: 'psykiskt våld', da: 'psykisk vold', no: 'psykisk vold', fi: 'psyykkinen väkivalta', de: 'psychische gewalt', fr: 'violence psychologique', es: 'violencia psicológica', nl: 'psychologisch geweld', ar: 'العنف النفسي', he: 'אלימות פסיכולוגית', ja: '精神的暴力', ko: '심리적 폭력', zh: '心理暴力' }, - 'reform': { sv: 'reform', da: 'reform', no: 'reform', fi: 'uudistus', de: 'reform', fr: 'réforme', es: 'reforma', nl: 'hervorming', ar: 'إصلاح', he: 'רפורמה', ja: '改革', ko: '개혁', zh: '改革' }, - 'rural policy': { sv: 'landsbygdspolitik', da: 'landdistriktspolitik', no: 'distriktspolitikk', fi: 'maaseutupolitiikka', de: 'ländliche entwicklungspolitik', fr: 'politique rurale', es: 'política rural', nl: 'plattelandsbeleid', ar: 'السياسة الريفية', he: 'מדיניות כפרית', ja: '農村政策', ko: '농촌 정책', zh: '农村政策' }, - 'smuggling': { sv: 'smuggling', da: 'smugling', no: 'smugling', fi: 'salakuljetus', de: 'schmuggel', fr: 'contrebande', es: 'contrabando', nl: 'smokkel', ar: 'التهريب', he: 'הברחה', ja: '密輸', ko: '밀수', zh: '走私' }, - 'social dumping': { sv: 'social dumpning', da: 'social dumping', no: 'sosial dumping', fi: 'sosiaalinen dumppaus', de: 'sozialdumping', fr: 'dumping social', es: 'dumping social', nl: 'sociale dumping', ar: 'الإغراق الاجتماعي', he: 'דאמפינג סוציאלי', ja: '社会的ダンピング', ko: '사회적 덤핑', zh: '社会倾销' }, - 'social insurance': { sv: 'socialförsäkring', da: 'socialforsikring', no: 'sosialforsikring', fi: 'sosiaalivakuutus', de: 'sozialversicherung', fr: 'assurance sociale', es: 'seguro social', nl: 'sociale verzekering', ar: 'التأمين الاجتماعي', he: 'ביטוח סוציאלי', ja: '社会保険', ko: '사회 보험', zh: '社会保险' }, - 'social welfare': { sv: 'socialt välfärd', da: 'social velfærd', no: 'sosial velferd', fi: 'sosiaalinen hyvinvointi', de: 'soziale wohlfahrt', fr: 'aide sociale', es: 'bienestar social', nl: 'sociale welzijn', ar: 'الرعاية الاجتماعية', he: 'רווחה חברתית', ja: '社会福祉', ko: '사회 복지', zh: '社会福利' }, - 'supplementary budget': { sv: 'tilläggsbudget', da: 'tillægsbudget', no: 'tilleggsbudsjett', fi: 'lisätalousarvio', de: 'nachtragshaushalt', fr: 'budget supplémentaire', es: 'presupuesto suplementario', nl: 'aanvullend budget', ar: 'ميزانية تكميلية', he: 'תקציב נוסף', ja: '補正予算', ko: '추가 예산', zh: '补充预算' }, - 'surveillance': { sv: 'övervakning', da: 'overvågning', no: 'overvåkning', fi: 'valvonta', de: 'überwachung', fr: 'surveillance', es: 'vigilancia', nl: 'bewaking', ar: 'المراقبة', he: 'מעקב', ja: '監視', ko: '감시', zh: '监控' }, - 'technology': { sv: 'teknik', da: 'teknologi', no: 'teknologi', fi: 'teknologia', de: 'technologie', fr: 'technologie', es: 'tecnología', nl: 'technologie', ar: 'التكنولوجيا', he: 'טכנולוגיה', ja: 'テクノロジー', ko: '기술', zh: '技术' }, - 'utrikesdeklarationen': { sv: 'utrikesdeklarationen', da: 'udenrigserklæringen', no: 'utenrikspolitisk erklæring', fi: 'ulkopoliittinen julistus', de: 'außenpolitische erklärung', fr: 'déclaration de politique étrangère', es: 'declaración de política exterior', nl: 'verklaring buitenlands beleid', ar: 'إعلان السياسة الخارجية', he: 'הצהרת מדיניות החוץ', ja: '外交政策宣言', ko: '외교정책 선언', zh: '外交政策宣言' }, - 'vaccine': { sv: 'vaccin', da: 'vaccine', no: 'vaksine', fi: 'rokote', de: 'impfstoff', fr: 'vaccin', es: 'vacuna', nl: 'vaccin', ar: 'لقاح', he: 'חיסון', ja: 'ワクチン', ko: '백신', zh: '疫苗' }, - 'weapons law': { sv: 'vapenlag', da: 'våbenlov', no: 'våpenlov', fi: 'asevoimalaki', de: 'waffengesetz', fr: 'loi sur les armes', es: 'ley de armas', nl: 'wapenwet', ar: 'قانون الأسلحة', he: 'חוק הנשק', ja: '武器法', ko: '무기법', zh: '武器法' }, - 'weekend analysis': { sv: 'helganalys', da: 'weekendanalyse', no: 'helganalyse', fi: 'viikonloppuanalyysi', de: 'wochenendanalyse', fr: "analyse du week-end", es: 'análisis del fin de semana', nl: 'weekendanalyse', ar: 'تحليل نهاية الأسبوع', he: 'ניתוח סוף שבוע', ja: '週末分析', ko: '주말 분석', zh: '周末分析' }, - 'wind power': { sv: 'vindkraft', da: 'vindkraft', no: 'vindkraft', fi: 'tuulivoima', de: 'windenergie', fr: 'énergie éolienne', es: 'energía eólica', nl: 'windenergie', ar: 'طاقة الرياح', he: 'אנרגיית רוח', ja: '風力発電', ko: '풍력 발전', zh: '风力发电' }, -}; - -// Build a case-insensitive lookup map keyed by lowercase English term -const LOWER_MAP = new Map }>(); -for (const [key, translations] of Object.entries(SEO_KEYWORD_TRANSLATIONS)) { - LOWER_MAP.set(key.toLowerCase(), { original: key, translations }); -} - -/** Return the localized form of a single keyword for the given language. Falls back to English. */ -function localizeKeyword(keyword: string, lang: string): string { - if (lang === 'en') return keyword; - const trimmed = keyword.trim(); - // Try exact match first - const exact = SEO_KEYWORD_TRANSLATIONS[trimmed]; - if (exact?.[lang]) return exact[lang]; - // Try case-insensitive match - const lower = trimmed.toLowerCase(); - const entry = LOWER_MAP.get(lower); - if (entry?.translations[lang]) return entry.translations[lang]; - // No translation available — keep as English (acceptable for proper nouns / specific terms) - return trimmed; -} - -/** - * Translate a comma-separated keyword string to the target language. - * Keywords that have no translation entry are left as-is. - */ -function localizeKeywords(keywordsStr: string, lang: string): string { - return keywordsStr - .split(', ') - .map(kw => localizeKeyword(kw, lang)) - .join(', '); -} - -/** Replace the meta keywords tag and JSON-LD keywords string in HTML content. */ -function replaceKeywords(html: string, lang: string): string { - let result = html; - - // 1. Replace - result = result.replace( - /( { - const localized = localizeKeywords(keywords, lang); - return `${prefix}${localized}${suffix}`; - } - ); - - // 2. Replace "keywords": "..." in JSON-LD structured data (string format) - result = result.replace( - /("keywords": ")([^"]+)(")/g, - (_match, prefix, keywords, suffix) => { - const localized = localizeKeywords(keywords, lang); - return `${prefix}${localized}${suffix}`; - } - ); - - // 3. Replace "keywords": [...] in JSON-LD structured data (array format) - result = result.replace( - /("keywords": \[)([^\]]+)(\])/g, - (_match, prefix, keywordsJson, suffix) => { - // Parse the JSON array of strings - const localized = keywordsJson.replace(/"([^"]+)"/g, (_m: string, kw: string) => { - return `"${localizeKeyword(kw, lang)}"`; - }); - return `${prefix}${localized}${suffix}`; - } - ); - - return result; -} - -function getLanguageFromFilename(filename: string): string | null { - const match = filename.match(/-([a-z]{2})\.html$/); - return match ? (match[1] ?? null) : null; -} - -function processFile(filepath: string, dryRun: boolean): boolean { - const filename = path.basename(filepath); - const lang = getLanguageFromFilename(filename); - - // Skip English articles — they are already in the correct language - if (!lang || lang === 'en') return false; - - const original = fs.readFileSync(filepath, 'utf-8'); - - // Quick check: does this file have any keywords meta tag with translatable English terms? - const keywordsMatch = original.match(/ f.endsWith('.html')) - .sort(); - -let modifiedCount = 0; -let skippedCount = 0; -const langStats: Record = {}; - -for (const file of files) { - const filepath = path.join(NEWS_DIR, file); - const lang = getLanguageFromFilename(file); - - if (processFile(filepath, dryRun)) { - modifiedCount++; - if (lang) langStats[lang] = (langStats[lang] ?? 0) + 1; - if (dryRun) console.log(` 📝 Would modify: ${file}`); - } else { - skippedCount++; - } -} - -console.log(`\n✅ Done!`); -console.log(` Modified: ${modifiedCount} files`); -console.log(` Skipped: ${skippedCount} files (no changes needed)`); - -if (Object.keys(langStats).length > 0) { - console.log(`\n📊 Changes by language:`); - for (const [lang, count] of Object.entries(langStats).sort()) { - console.log(` ${lang}: ${count} files`); - } -} - -// ── Verification pass ───────────────────────────────────────────────────────── -console.log(`\n🔍 Verifying: checking for remaining English-only standard keywords in non-EN articles...`); - -// Patterns that indicate untranslated standard template keywords -const ENGLISH_ONLY_PATTERNS = [ - /name="keywords"[^>]*content="government, propositions/, - /name="keywords"[^>]*content="committee, reports, bet/, - /name="keywords"[^>]*content="motions, opposition, parliament, proposals/, - /name="keywords"[^>]*content="parliament, week ahead/, - /name="keywords"[^>]*content="parliament, month ahead/, - /name="keywords"[^>]*content="parliament, weekly review/, - /name="keywords"[^>]*content="parliament, monthly review/, -]; - -let remaining = 0; -for (const file of files) { - const lang = getLanguageFromFilename(file); - if (!lang || lang === 'en') continue; - - const content = fs.readFileSync(path.join(NEWS_DIR, file), 'utf-8'); - for (const pattern of ENGLISH_ONLY_PATTERNS) { - if (pattern.test(content)) { - remaining++; - if (remaining <= 10) { - console.log(` ⚠️ ${file}: still has English-only template keywords`); - } - break; - } - } -} - -if (remaining === 0) { - console.log(` ✅ No remaining English-only standard keywords found.`); -} else { - console.log(` ⚠️ ${remaining} file(s) still have English-only standard keywords.`); - process.exitCode = 1; -} diff --git a/scripts/fix-old-articles-branding.ts b/scripts/fix-old-articles-branding.ts deleted file mode 100644 index 8e5713952c..0000000000 --- a/scripts/fix-old-articles-branding.ts +++ /dev/null @@ -1,291 +0,0 @@ -/** - * Script to replace "The Economist" branding references in old news articles - * with OSINT/INTOP political intelligence branding across all 14 languages. - * - * Usage: npx tsx scripts/fix-old-articles-branding.ts [--dry-run] - * - * Replaces: - * - site-tagline div content (language-specific from SITE_TAGLINE constants) - * - "style: The Economist" in HTML comment frontmatter - * - "The Economist" references in article body/disclaimers - * - "Journalism Standards" footer text - * - * Preserves: - * - External links to economist.com (legitimate references) - */ - -import * as fs from 'fs'; -import * as path from 'path'; - -// New taglines from scripts/article-template/constants.ts -const SITE_TAGLINE: Record = { - en: "Latest news and analysis from Sweden's Riksdag. AI-generated political intelligence based on OSINT/INTOP data covering parliament, government, and agencies with systematic transparency.", - sv: 'Senaste nyheter och analyser från Sveriges riksdag. AI-genererad politisk underrättelsejournalistik baserad på OSINT/INTOP-data som bevakar riksdagen, regeringen och myndigheter med systematisk transparens.', - da: 'Seneste nyheder og analyser fra Sveriges Riksdag. AI-genereret politisk efterretningsjournalistik baseret på OSINT/INTOP-data, der dækker parlament, regering og myndigheder med systematisk gennemsigtighed.', - no: 'Siste nyheter og analyser fra Sveriges riksdag. AI-generert politisk etterretningsjournalistikk basert på OSINT/INTOP-data som dekker parlament, regjering og myndigheter med systematisk åpenhet.', - fi: 'Uusimmat uutiset ja analyysit Ruotsin valtiopäiviltä. Tekoälyn tuottama poliittinen tiedustelujournalismi OSINT/INTOP-dataan perustuen, joka kattaa eduskunnan, hallituksen ja viranomaiset järjestelmällisellä läpinäkyvyydellä.', - de: 'Aktuelle Nachrichten und Analysen aus dem schwedischen Riksdag. KI-generierter politischer Nachrichtendienst-Journalismus basierend auf OSINT/INTOP-Daten über Parlament, Regierung und Behörden mit systematischer Transparenz.', - fr: 'Dernières nouvelles et analyses du Riksdag suédois. Journalisme de renseignement politique généré par IA basé sur des données OSINT/INTOP couvrant le parlement, le gouvernement et les agences avec une transparence systématique.', - es: 'Últimas noticias y análisis del Riksdag sueco. Periodismo de inteligencia política generado por IA basado en datos OSINT/INTOP que cubre el parlamento, el gobierno y las agencias con transparencia sistemática.', - nl: 'Laatste nieuws en analyses van de Zweedse Riksdag. AI-gegenereerde politieke inlichtingenjournalistiek gebaseerd op OSINT/INTOP-data over parlement, regering en instanties met systematische transparantie.', - ar: 'أحدث الأخبار والتحليلات من البرلمان السويدي. صحافة استخبارات سياسية مولّدة بالذكاء الاصطناعي مبنية على بيانات OSINT/INTOP تغطي البرلمان والحكومة والوكالات بشفافية منهجية.', - he: 'חדשות ניתוחים אחרונים מהריקסדאג השוודי. עיתונות מודיעין פוליטי מבוססת AI ונתוני OSINT/INTOP המכסה פרלמנט, ממשלה וסוכנויות עם שקיפות שיטתית.', - ja: 'スウェーデン議会リクスダーグの最新ニュースと分析。OSINT/INTOPデータに基づくAI生成の政治インテリジェンスジャーナリズムで、議会、政府、機関を体系的な透明性で報道。', - ko: '스웨덴 의회 릭스다그의 최신 뉴스와 분석. OSINT/INTOP 데이터 기반 AI 생성 정치 인텔리전스 저널리즘으로 의회, 정부, 기관을 체계적인 투명성으로 보도.', - zh: '来自瑞典议会的最新新闻和分析。基于OSINT/INTOP数据的AI生成政治情报新闻,以系统性透明度报道议会、政府和机构。', -}; - -const NEWS_DIR = path.join(process.cwd(), 'news'); -const dryRun = process.argv.includes('--dry-run'); - -function getLanguageFromFilename(filename: string): string | null { - const match = filename.match(/-([a-z]{2})\.html$/); - return match ? match[1] : null; -} - -function replaceTagline(html: string, lang: string): string { - const tagline = SITE_TAGLINE[lang]; - if (!tagline) return html; - - // Replace site-tagline div content - return html.replace( - /(
)([^<]*?)(<\/div>)/g, - `$1${tagline}$3` - ); -} - -function replaceEconomistReferences(html: string): string { - let result = html; - - // 1. Replace "style: The Economist" in HTML comment frontmatter - result = result.replace(/style: The Economist\b/g, 'style: OSINT/INTOP'); - - // 2. Replace "Journalism Standards" footer patterns (en + sv) - result = result.replace( - /Journalism Standards<\/strong>: The Economist style/g, - 'Journalism Standards: OSINT/INTOP data-driven AI-generated political intelligence' - ); - result = result.replace( - /Journalistiska standarder<\/strong>: The Economist-stil/g, - 'Journalistiska standarder: OSINT/INTOP-datadriven AI-genererad politisk underrättelsejournalistik' - ); - - // 3. Skip lines containing economist.com URLs (legitimate external references) - // Process line by line to preserve external links - const lines = result.split('\n'); - const processedLines = lines.map(line => { - // Skip lines with economist.com URLs (legitimate external reference links) - if (/href=["'][^"']*economist\.com/.test(line)) return line; - - // Replace "The Economist" patterns not already handled - // Various editorial standards patterns - line = line.replace(/following The Economist editorial standards/g, 'following OSINT/INTOP editorial standards'); - line = line.replace(/enligt The Economist-standarder/g, 'enligt OSINT/INTOP-standarder'); - line = line.replace(/enligt The Economists redaktionella standard/g, 'enligt OSINT/INTOP redaktionella standard'); - line = line.replace(/i enlighet med The Economists redaktionella standard/g, 'i enlighet med OSINT/INTOP redaktionella standard'); - line = line.replace(/in The Economist style/g, 'in OSINT/INTOP style'); - line = line.replace(/i The Economist-stil/g, 'i OSINT/INTOP-stil'); - - // English patterns - line = line.replace(/The Economist-style analysis/g, 'OSINT/INTOP data-driven analysis'); - line = line.replace(/The Economist-style political journalism emphasizing/g, 'OSINT/INTOP data-driven AI-generated political intelligence emphasizing'); - line = line.replace(/The Economist-style political journalism/g, 'OSINT/INTOP data-driven political journalism'); - line = line.replace(/The Economist-style/g, 'OSINT/INTOP data-driven'); - - // Generic "The Economist" that references the brand, not external links - // Replace remaining "The Economist" in article contexts (not in tags) - // Use negative lookahead to avoid replacing inside link text to economist.com - line = line.replace(/The Economist noted in its/g, 'As noted in a'); - - // Swedish patterns - line = line.replace(/The Economist-inspirerad/g, 'OSINT/INTOP-baserad'); - - // Danish patterns - line = line.replace(/The Economist-inspireret/g, 'OSINT/INTOP-baseret'); - - // Norwegian patterns - line = line.replace(/The Economist-inspirert/g, 'OSINT/INTOP-basert'); - - // Generic suffix patterns across Scandinavian languages - line = line.replace(/The Economist-stil\b/g, 'OSINT/INTOP-stil'); - - // Dutch - line = line.replace(/The Economist-stijl\b/g, 'OSINT/INTOP-stijl'); - - // German (with and without "The ") - line = line.replace(/The Economist-Stil\b/g, 'OSINT/INTOP-Stil'); - line = line.replace(/im Economist-Stil\b/g, 'im OSINT/INTOP-Stil'); - line = line.replace(/Economist-Stil\b/g, 'OSINT/INTOP-Stil'); - - // Dutch (without "The " prefix) - line = line.replace(/in Economist-stijl\b/g, 'in OSINT/INTOP-stijl'); - line = line.replace(/Economist-stijl\b/g, 'OSINT/INTOP-stijl'); - - // Finnish (note the space before the hyphen) - line = line.replace(/The Economist -tyylinen/g, 'OSINT/INTOP -tyylinen'); - line = line.replace(/The Economist -tyyliin/g, 'OSINT/INTOP -tyyliin'); - line = line.replace(/The Economist-tyylinen/g, 'OSINT/INTOP-tyylinen'); - - // French - line = line.replace(/style The Economist\b/g, 'style OSINT/INTOP'); - line = line.replace(/The Economist couvrant/g, 'OSINT/INTOP couvrant'); - line = line.replace(/The Economist avec/g, 'OSINT/INTOP avec'); - - // Spanish - line = line.replace(/estilo The Economist\b/g, 'estilo OSINT/INTOP'); - line = line.replace(/The Economist que cubre/g, 'OSINT/INTOP que cubre'); - line = line.replace(/The Economist cubriendo/g, 'OSINT/INTOP cubriendo'); - line = line.replace(/The Economist sobre/g, 'OSINT/INTOP sobre'); - line = line.replace(/The Economist con/g, 'OSINT/INTOP con'); - - // Dutch additional - line = line.replace(/The Economist over/g, 'OSINT/INTOP over'); - line = line.replace(/The Economist met/g, 'OSINT/INTOP met'); - line = line.replace(/The Economist politieke/g, 'OSINT/INTOP politieke'); - - // German additional - line = line.replace(/The Economist mit/g, 'OSINT/INTOP mit'); - line = line.replace(/The Economist zu /g, 'OSINT/INTOP zu '); - line = line.replace(/The Economist über/g, 'OSINT/INTOP über'); - - // Hebrew - line = line.replace(/The Economist המכסה/g, 'OSINT/INTOP המכסה'); - line = line.replace(/The Economist עם/g, 'OSINT/INTOP עם'); - - // Arabic - line = line.replace(/The Economist تغطي/g, 'OSINT/INTOP تغطي'); - - // Japanese - line = line.replace(/The Economist スタイル/g, 'OSINT/INTOP スタイル'); - - // Chinese - line = line.replace(/The Economist 风格/g, 'OSINT/INTOP 风格'); - line = line.replace(/The Economist风格/g, 'OSINT/INTOP風格'); - - // Korean - line = line.replace(/The Economist 스타일/g, 'OSINT/INTOP 스타일'); - - // Catch remaining "The Economist." and "The Economist," sentence endings - // But not "The Economist:" (which is a reference title) - line = line.replace(/The Economist\./g, 'OSINT/INTOP.'); - line = line.replace(/The Economist,/g, 'OSINT/INTOP,'); - - // HTML entity versions - line = line.replace(/The Economist-stil som täcker/g, 'OSINT/INTOP-stil som täcker'); - line = line.replace(/The Economist-stil som dekker riksdag, regjering og myndigheter med systematisk åpenhet/g, - 'OSINT/INTOP-stil som dekker riksdag, regjering og myndigheter med systematisk åpenhet'); - line = line.replace(/The Economist-stil der dækker/g, 'OSINT/INTOP-stil der dækker'); - line = line.replace(/The Economist couvrant le parlement, le gouvernement et les agences avec une transparence systématique/g, - 'OSINT/INTOP couvrant le parlement, le gouvernement et les agences avec une transparence systématique'); - line = line.replace(/The Economist cubriendo parlamento, gobierno y agencias con transparencia sistemática/g, - 'OSINT/INTOP cubriendo parlamento, gobierno y agencias con transparencia sistemática'); - line = line.replace(/The Economist -tyylin poliittista journalismia, joka kattaa parlamentin, hallituksen ja virastot systemaattisella läpinäkyvyydellä/g, - 'OSINT/INTOP -tyylin poliittista journalismia, joka kattaa parlamentin, hallituksen ja virastot systemaattisella läpinäkyvyydellä'); - - return line; - }); - - return processedLines.join('\n'); -} - -function processFile(filepath: string): boolean { - const filename = path.basename(filepath); - const lang = getLanguageFromFilename(filename); - - if (!lang) { - console.warn(` ⚠️ Could not determine language for: ${filename}`); - return false; - } - - const original = fs.readFileSync(filepath, 'utf-8'); - - // Check if file contains any Economist references - if (!original.includes('Economist')) { - return false; - } - - let modified = original; - - // Replace site-tagline - modified = replaceTagline(modified, lang); - - // Replace other Economist references - modified = replaceEconomistReferences(modified); - - if (modified === original) { - return false; - } - - if (!dryRun) { - fs.writeFileSync(filepath, modified, 'utf-8'); - } - - return true; -} - -// Main execution -console.log(`🔄 Fixing old articles branding (${dryRun ? 'DRY RUN' : 'LIVE'})...\n`); - -const files = fs.readdirSync(NEWS_DIR) - .filter(f => f.endsWith('.html')) - .sort(); - -let modified = 0; -let skipped = 0; -const langStats: Record = {}; - -for (const file of files) { - const filepath = path.join(NEWS_DIR, file); - const lang = getLanguageFromFilename(file); - - if (processFile(filepath)) { - modified++; - if (lang) { - langStats[lang] = (langStats[lang] || 0) + 1; - } - if (dryRun) { - console.log(` 📝 Would modify: ${file}`); - } - } else { - skipped++; - } -} - -console.log(`\n✅ Done!`); -console.log(` Modified: ${modified} files`); -console.log(` Skipped: ${skipped} files (no changes needed)`); -console.log(`\n📊 Changes by language:`); -for (const [lang, count] of Object.entries(langStats).sort()) { - console.log(` ${lang}: ${count} files`); -} - -// Verify no remaining Economist references (except legitimate external links) -console.log(`\n🔍 Checking for remaining "Economist" references...`); -let remaining = 0; -for (const file of files) { - const filepath = path.join(NEWS_DIR, file); - const content = dryRun - ? fs.readFileSync(filepath, 'utf-8') - : fs.readFileSync(filepath, 'utf-8'); - - // Split into lines and check each - const lines = content.split('\n'); - for (let i = 0; i < lines.length; i++) { - if (lines[i].includes('Economist') && !/href=["'][^"']*economist\.com/.test(lines[i])) { - remaining++; - if (remaining <= 20) { - console.log(` ⚠️ ${file}:${i + 1}: ${lines[i].trim().substring(0, 120)}`); - } - } - } -} - -if (remaining > 20) { - console.log(` ... and ${remaining - 20} more`); -} - -if (remaining === 0) { - console.log(' ✅ No remaining references (except legitimate external links)'); -} else { - console.log(`\n ⚠️ ${remaining} remaining references found`); -} diff --git a/scripts/generate-news-backport.ts b/scripts/generate-news-backport.ts deleted file mode 100644 index 97c7dbd60d..0000000000 --- a/scripts/generate-news-backport.ts +++ /dev/null @@ -1,441 +0,0 @@ -/** - * @module Intelligence/DataMigration - * @category Intelligence Operations / Supporting Infrastructure - * @name News Backport Generation - Historical Article Migration System - * - * @description - * Automated legacy article generation system creating historical news coverage for - * past dates by querying the riksdag-regering-mcp (MCP) client for parliamentary - * activities, government actions, and committee developments. - * - * @author Hack23 AB (Intelligence Archive Team) - * @license Apache-2.0 - * @version 1.5.0 - */ - -import fs from 'fs'; -import path from 'path'; -import { fileURLToPath } from 'url'; -import { MCPClient } from './mcp-client.js'; -import { - generateArticleContent, - extractWatchPoints, - generateMetadata, - calculateReadTime, - generateSources, -} from './data-transformers.js'; -import { generateArticleHTML } from './article-template.js'; -import type { AnalysisEnrichment } from './generate-news-enhanced/helpers.js'; -import { readDailyAnalysis, deriveArticleClassificationMeta } from './analysis-reader.js'; -import type { Language } from './types/language.js'; - -const __filename = fileURLToPath(import.meta.url); -const __dirname = path.dirname(__filename); - -// --------------------------------------------------------------------------- -// Types -// --------------------------------------------------------------------------- - -interface TitleInfo { - readonly title: string; - readonly subtitle: string; -} - -type TitleMapWeekAhead = Readonly TitleInfo>>; -type TitleMapCount = Readonly TitleInfo>>; - -// --------------------------------------------------------------------------- -// CLI Arguments -// --------------------------------------------------------------------------- - -const args = process.argv.slice(2); - -function getArg(name: string, defaultValue: string = ''): string { - const arg = args.find((a) => a.startsWith(`--${name}=`)); - return arg ? arg.split('=').slice(1).join('=') : defaultValue; -} - -const fromDate: string = getArg('from', '2026-01-05'); -const toDate: string = getArg('to', new Date().toISOString().split('T')[0]); -const mode: string = getArg('mode', 'weekly'); -const typesInput: string = getArg('types', 'week-ahead,committee-reports,propositions,motions'); -const dryRun: boolean = args.includes('--dry-run'); -const delayMs: number = parseInt(getArg('delay', '2000'), 10); - -// Language support -const ALL_LANGUAGES: readonly Language[] = ['en', 'sv', 'da', 'no', 'fi', 'de', 'fr', 'es', 'nl', 'ar', 'he', 'ja', 'ko', 'zh']; -const LANGUAGE_PRESETS: Readonly> = { - all: ALL_LANGUAGES, - nordic: ['en', 'sv', 'da', 'no', 'fi'], - 'eu-core': ['en', 'sv', 'de', 'fr', 'es', 'nl'], -}; - -let langInput: string = getArg('languages', 'en,sv').trim().toLowerCase(); -const preset = LANGUAGE_PRESETS[langInput]; -if (preset) { - langInput = preset.join(','); -} -const languages: Language[] = langInput - .split(',') - .map((l) => l.trim()) - .filter((l): l is Language => (ALL_LANGUAGES as readonly string[]).includes(l)); - -const articleTypes: string[] = typesInput.split(',').filter(Boolean); -const NEWS_DIR: string = path.join(__dirname, '..', 'news'); - -// Ensure directory exists -if (!fs.existsSync(NEWS_DIR)) { - fs.mkdirSync(NEWS_DIR, { recursive: true }); -} - -// --------------------------------------------------------------------------- -// Date Helpers -// --------------------------------------------------------------------------- - -function addDays(date: Date | string, days: number): Date { - const d = new Date(date); - d.setDate(d.getDate() + days); - return d; -} - -function formatDate(date: Date): string { - return date.toISOString().split('T')[0] ?? ''; -} - -function getMondaysInRange(from: string, to: string): Date[] { - const mondays: Date[] = []; - let current = new Date(from); - const dayOfWeek = current.getDay(); - if (dayOfWeek !== 1) { - current = addDays(current, (8 - dayOfWeek) % 7); - } - const end = new Date(to); - while (current <= end) { - mondays.push(new Date(current)); - current = addDays(current, 7); - } - return mondays; -} - -function getDaysInRange(from: string, to: string): Date[] { - const days: Date[] = []; - let current = new Date(from); - const end = new Date(to); - while (current <= end) { - const day = current.getDay(); - if (day !== 0 && day !== 6) { - days.push(new Date(current)); - } - current = addDays(current, 1); - } - return days; -} - -function articleExists(slug: string, lang: Language): boolean { - const filename = `${slug}-${lang}.html`; - return fs.existsSync(path.join(NEWS_DIR, filename)); -} - -// --------------------------------------------------------------------------- -// Rate Limiter -// --------------------------------------------------------------------------- - -function delay(ms: number): Promise { - return new Promise((resolve) => setTimeout(resolve, ms)); -} - -// --------------------------------------------------------------------------- -// Title Maps -// --------------------------------------------------------------------------- - -const WEEK_AHEAD_TITLES: TitleMapWeekAhead = { - en: (from, to) => ({ title: `Week Ahead: ${from} to ${to}`, subtitle: 'Prospective coverage of upcoming parliamentary activity' }), - sv: (from, to) => ({ title: `Veckan framåt: ${from} till ${to}`, subtitle: 'Översikt av kommande parlamentarisk verksamhet' }), - da: (from, to) => ({ title: `Ugen forude: ${from} til ${to}`, subtitle: 'Oversigt over kommende parlamentarisk aktivitet' }), - no: (from, to) => ({ title: `Uken fremover: ${from} til ${to}`, subtitle: 'Oversikt over kommende parlamentarisk aktivitet' }), - fi: (from, to) => ({ title: `Tuleva viikko: ${from}–${to}`, subtitle: 'Katsaus tulevaan parlamentaariseen toimintaan' }), - de: (from, to) => ({ title: `Woche voraus: ${from} bis ${to}`, subtitle: 'Vorschau auf die parlamentarische Aktivität' }), - fr: (from, to) => ({ title: `Semaine à venir : ${from} au ${to}`, subtitle: "Aperçu de l'activité parlementaire à venir" }), - es: (from, to) => ({ title: `Semana por delante: ${from} al ${to}`, subtitle: 'Perspectiva de la actividad parlamentaria próxima' }), - nl: (from, to) => ({ title: `Week vooruit: ${from} tot ${to}`, subtitle: 'Vooruitblik op parlementaire activiteit' }), - ar: (from, to) => ({ title: `الأسبوع القادم: ${from} إلى ${to}`, subtitle: 'نظرة مسبقة على النشاط البرلماني القادم' }), - he: (from, to) => ({ title: `השבוע הקרוב: ${from} עד ${to}`, subtitle: 'סקירה מקדימה של הפעילות הפרלמנטרית' }), - ja: (from, to) => ({ title: `今週の展望:${from}〜${to}`, subtitle: '今後の議会活動の予測報道' }), - ko: (from, to) => ({ title: `주간 전망: ${from}~${to}`, subtitle: '향후 의회 활동 전망' }), - zh: (from, to) => ({ title: `本周展望:${from}至${to}`, subtitle: '即将到来的议会活动前瞻' }), -}; - -const COMMITTEE_TITLES: TitleMapCount = { - en: (n) => ({ title: `Committee Reports Analysis`, subtitle: `Analysis of ${n} committee reports` }), - sv: (n) => ({ title: `Utskottsbetänkanden`, subtitle: `Analys av ${n} utskottsbetänkanden` }), - da: (n) => ({ title: `Udvalgsrapporter`, subtitle: `Analyse af ${n} udvalgsrapporter` }), - no: (n) => ({ title: `Komitérapporter`, subtitle: `Analyse av ${n} komitérapporter` }), - fi: (n) => ({ title: `Valiokunnan mietinnöt`, subtitle: `${n} valiokunnan mietinnön analyysi` }), - de: (n) => ({ title: `Ausschussberichte`, subtitle: `Analyse von ${n} Ausschussberichten` }), - fr: (n) => ({ title: `Rapports de commission`, subtitle: `Analyse de ${n} rapports de commission` }), - es: (n) => ({ title: `Informes de comisión`, subtitle: `Análisis de ${n} informes de comisión` }), - nl: (n) => ({ title: `Commissierapporten`, subtitle: `Analyse van ${n} commissierapporten` }), - ar: (n) => ({ title: `تقارير اللجان`, subtitle: `تحليل ${n} تقارير لجان` }), - he: (n) => ({ title: `דוחות ועדות`, subtitle: `ניתוח ${n} דוחות ועדות` }), - ja: (n) => ({ title: `委員会報告`, subtitle: `${n}件の委員会報告の分析` }), - ko: (n) => ({ title: `위원회 보고서`, subtitle: `${n}개 위원회 보고서 분석` }), - zh: (n) => ({ title: `委员会报告`, subtitle: `${n}份委员会报告分析` }), -}; - -const PROPOSITION_TITLES: TitleMapCount = { - en: (n) => ({ title: `Government Propositions`, subtitle: `Analysis of ${n} government propositions` }), - sv: (n) => ({ title: `Regeringens propositioner`, subtitle: `Analys av ${n} propositioner` }), - da: (n) => ({ title: `Regeringsforslag`, subtitle: `Analyse af ${n} regeringsforslag` }), - no: (n) => ({ title: `Regjeringens proposisjoner`, subtitle: `Analyse av ${n} proposisjoner` }), - fi: (n) => ({ title: `Hallituksen esitykset`, subtitle: `${n} hallituksen esityksen analyysi` }), - de: (n) => ({ title: `Regierungsvorlagen`, subtitle: `Analyse von ${n} Regierungsvorlagen` }), - fr: (n) => ({ title: `Propositions gouvernementales`, subtitle: `Analyse de ${n} propositions` }), - es: (n) => ({ title: `Proposiciones gubernamentales`, subtitle: `Análisis de ${n} proposiciones` }), - nl: (n) => ({ title: `Regeringsvoorstellen`, subtitle: `Analyse van ${n} voorstellen` }), - ar: (n) => ({ title: `مقترحات حكومية`, subtitle: `تحليل ${n} مقترحات حكومية` }), - he: (n) => ({ title: `הצעות ממשלתיות`, subtitle: `ניתוח ${n} הצעות ממשלתיות` }), - ja: (n) => ({ title: `政府提案`, subtitle: `${n}件の政府提案の分析` }), - ko: (n) => ({ title: `정부 제안`, subtitle: `${n}개 정부 제안 분석` }), - zh: (n) => ({ title: `政府提案`, subtitle: `${n}份政府提案分析` }), -}; - -const MOTION_TITLES: TitleMapCount = { - en: (n) => ({ title: `Opposition Motions`, subtitle: `Analysis of ${n} opposition motions` }), - sv: (n) => ({ title: `Oppositionsmotioner`, subtitle: `Analys av ${n} motioner` }), - da: (n) => ({ title: `Oppositionsforslag`, subtitle: `Analyse af ${n} forslag` }), - no: (n) => ({ title: `Opposisjonsforslag`, subtitle: `Analyse av ${n} forslag` }), - fi: (n) => ({ title: `Opposition aloitteet`, subtitle: `${n} aloitteen analyysi` }), - de: (n) => ({ title: `Oppositionsanträge`, subtitle: `Analyse von ${n} Anträgen` }), - fr: (n) => ({ title: `Motions d'opposition`, subtitle: `Analyse de ${n} motions` }), - es: (n) => ({ title: `Mociones de oposición`, subtitle: `Análisis de ${n} mociones` }), - nl: (n) => ({ title: `Oppositiemoties`, subtitle: `Analyse van ${n} moties` }), - ar: (n) => ({ title: `اقتراحات المعارضة`, subtitle: `تحليل ${n} اقتراحات` }), - he: (n) => ({ title: `הצעות אופוזיציה`, subtitle: `ניתוח ${n} הצעות` }), - ja: (n) => ({ title: `野党動議`, subtitle: `${n}件の動議分析` }), - ko: (n) => ({ title: `야당 동의`, subtitle: `${n}개 동의 분석` }), - zh: (n) => ({ title: `反对党动议`, subtitle: `${n}份动议分析` }), -}; - -// --------------------------------------------------------------------------- -// Article Generator -// --------------------------------------------------------------------------- - -async function generateForDate(targetDate: Date, type: string, client: MCPClient): Promise { - const dateStr = formatDate(targetDate); - const weekEnd = formatDate(addDays(targetDate, 6)); - let slug: string; - let titleMap: TitleMapWeekAhead | TitleMapCount; - let data: Record; - let dataKey: string; - let toolName: string; - - switch (type) { - case 'week-ahead': { - slug = `${dateStr}-week-ahead`; - titleMap = WEEK_AHEAD_TITLES; - console.log(` 📆 Fetching calendar ${dateStr} → ${weekEnd}...`); - const events = await client.fetchCalendarEvents(dateStr, weekEnd); - data = { events }; - dataKey = 'week-ahead'; - toolName = 'get_calendar_events'; - break; - } - case 'committee-reports': { - slug = `${dateStr}-committee-reports`; - titleMap = COMMITTEE_TITLES; - console.log(` 📋 Fetching committee reports near ${dateStr}...`); - const reports = await client.fetchCommitteeReports(15); - data = { reports }; - dataKey = 'committee-reports'; - toolName = 'get_betankanden'; - break; - } - case 'propositions': { - slug = `${dateStr}-government-propositions`; - titleMap = PROPOSITION_TITLES; - console.log(` 📜 Fetching propositions near ${dateStr}...`); - const propositions = await client.fetchPropositions(10); - data = { propositions }; - dataKey = 'propositions'; - toolName = 'get_propositioner'; - break; - } - case 'motions': { - slug = `${dateStr}-opposition-motions`; - titleMap = MOTION_TITLES; - console.log(` 📝 Fetching motions near ${dateStr}...`); - const motions = await client.fetchMotions(10); - data = { motions }; - dataKey = 'motions'; - toolName = 'get_motioner'; - break; - } - default: - console.log(` ⚠️ Unknown type: ${type}, skipping`); - return 0; - } - - // Check if articles already exist for this slug - const existingLangs = languages.filter((l) => articleExists(slug, l)); - if (existingLangs.length === languages.length) { - console.log(` ⏭️ All ${languages.length} language versions already exist for ${slug}`); - return 0; - } - - const missingLangs = languages.filter((l) => !articleExists(slug, l)); - console.log(` 🌐 Generating ${missingLangs.length} missing language versions...`); - - let generated = 0; - - // Prefer date-specific analysis for the backport target date. - // Fall back to null rather than latest (which would be historically inaccurate). - let enrichment: AnalysisEnrichment | null = null; - try { - const dateAnalysis = await readDailyAnalysis(dateStr); - if (dateAnalysis.hasAnalysis) { - const meta = deriveArticleClassificationMeta(dateAnalysis); - enrichment = { - classificationLevel: meta.classificationLevel, - riskLevel: meta.riskLevel, - confidenceLabel: meta.confidenceLabel, - significance: meta.significanceScore, - urgency: meta.urgency, - }; - } - } catch (error: unknown) { - // No date-specific analysis available — proceed without enrichment - if (process.env.DEBUG || process.env.LOG_LEVEL === 'debug') { - console.error(`⚠️ Failed to load date-specific analysis for ${dateStr}:`, error); - } - } - - for (const lang of missingLangs) { - try { - const content = generateArticleContent(data, dataKey, lang); - const watchPoints = extractWatchPoints(data, lang); - const metadata = generateMetadata(data, dataKey, lang); - const readTime = calculateReadTime(content); - const sources = generateSources([toolName]); - - // Get titles - const dataItems = Object.values(data)[0]; - const count = Array.isArray(dataItems) ? dataItems.length : 0; - - let titles: TitleInfo; - if (type === 'week-ahead') { - const fn = (titleMap as TitleMapWeekAhead)[lang] || (titleMap as TitleMapWeekAhead).en; - titles = fn(dateStr, weekEnd); - } else { - const fn = (titleMap as TitleMapCount)[lang] || (titleMap as TitleMapCount).en; - titles = fn(count); - } - - const html = generateArticleHTML({ - slug: `${slug}-${lang}.html`, - title: titles.title, - subtitle: titles.subtitle, - date: dateStr, - type: type === 'week-ahead' ? 'prospective' : 'analysis', - readTime, - lang, - content, - watchPoints, - sources, - keywords: metadata.keywords, - topics: metadata.topics, - tags: metadata.tags, - // Analysis references are injected by fix-analysis-references.ts post-processor - ...(enrichment ?? {}), - }); - - if (dryRun) { - console.log(` [DRY RUN] Would write: ${slug}-${lang}.html`); - } else { - const filepath = path.join(NEWS_DIR, `${slug}-${lang}.html`); - fs.writeFileSync(filepath, html, 'utf-8'); - console.log(` ✅ ${slug}-${lang}.html`); - } - generated++; - } catch (err: unknown) { - console.error(` ❌ ${lang}: ${(err as Error).message}`); - } - } - - return generated; -} - -// --------------------------------------------------------------------------- -// Main -// --------------------------------------------------------------------------- - -async function main(): Promise { - console.log(''); - console.log('═══════════════════════════════════════════════════════'); - console.log(' 📰 Riksdagsmonitor News Backport Generator'); - console.log('═══════════════════════════════════════════════════════'); - console.log(` From: ${fromDate}`); - console.log(` To: ${toDate}`); - console.log(` Mode: ${mode}`); - console.log(` Types: ${articleTypes.join(', ')}`); - console.log(` Languages: ${languages.join(', ')}`); - console.log(` Dry run: ${dryRun ? 'Yes' : 'No'}`); - console.log(` Delay: ${delayMs}ms between MCP calls`); - console.log('═══════════════════════════════════════════════════════'); - console.log(''); - - const dates: Date[] = - mode === 'weekly' - ? getMondaysInRange(fromDate, toDate) - : getDaysInRange(fromDate, toDate); - - console.log(`📅 ${dates.length} ${mode === 'weekly' ? 'weeks' : 'days'} to process\n`); - - const client = new MCPClient(); - let totalGenerated = 0; - let totalSkipped = 0; - let totalErrors = 0; - - for (const date of dates) { - const dateStr = formatDate(date); - console.log(`\n📆 Processing ${dateStr}...`); - - for (const type of articleTypes) { - console.log(` 📰 Type: ${type}`); - try { - const count = await generateForDate(date, type, client); - totalGenerated += count; - if (count === 0) totalSkipped++; - await delay(delayMs); - } catch (err: unknown) { - console.error(` ❌ Error for ${type} on ${dateStr}: ${(err as Error).message}`); - totalErrors++; - } - } - } - - console.log(''); - console.log('═══════════════════════════════════════════════════════'); - console.log(' 📊 Backport Summary'); - console.log('═══════════════════════════════════════════════════════'); - console.log(` Dates processed: ${dates.length}`); - console.log(` Articles created: ${totalGenerated}`); - console.log(` Skipped (exist): ${totalSkipped}`); - console.log(` Errors: ${totalErrors}`); - console.log('═══════════════════════════════════════════════════════'); - console.log(''); - - if (!dryRun && totalGenerated > 0) { - console.log('💡 Run the following to update indexes:'); - console.log(' node scripts/generate-news-indexes.js'); - console.log(' node scripts/generate-sitemap.js'); - } - - if (totalErrors > 0 && totalGenerated === 0) { - process.exit(1); - } -} - -main().catch((err: unknown) => { - console.error('💥 Fatal error:', (err as Error).message); - process.exit(1); -}); diff --git a/scripts/government-role-validator.ts b/scripts/government-role-validator.ts deleted file mode 100644 index 105c08a55b..0000000000 --- a/scripts/government-role-validator.ts +++ /dev/null @@ -1,264 +0,0 @@ -/** - * @module government-role-validator - * @description Validates government role attributions against the CIA platform's - * authoritative government role member data (view_riksdagen_goverment_role_member_sample.csv). - * - * ROOT CAUSE PREVENTION: Agentic workflows previously hallucinated government titles - * (e.g. calling Lotta Edholm "Deputy Prime Minister" when she is gymnasie-, högskole- - * och forskningsminister, and the actual Vice statsminister is Ebba Busch (KD)). - * This module provides a validation layer that cross-references names against - * known government roles from the CIA data export. - * - * The CSV is downloaded from: - * https://raw.githubusercontent.com/Hack23/cia/refs/heads/master/service.data.impl/sample-data/view_riksdagen_goverment_role_member_sample.csv - * - * @author Hack23 AB - * @license Apache-2.0 - */ - -import { readFileSync } from 'node:fs'; -import { resolve } from 'node:path'; - -/** A single government role record from the CIA data export. */ -export interface GovernmentRoleMember { - readonly roleId: string; - readonly department: string; - readonly roleCode: string; - readonly firstName: string; - readonly lastName: string; - readonly fromDate: string; - readonly toDate: string; - readonly personId: string; - readonly party: string; - readonly active: boolean; -} - -/** Result of validating a name + claimed role against known data. */ -export interface RoleValidationResult { - readonly valid: boolean; - readonly name: string; - readonly claimedRole: string; - readonly actualRoles: readonly GovernmentRoleMember[]; - readonly suggestion: string; -} - -const CSV_RELATIVE_PATH = 'cia-data/view_riksdagen_goverment_role_member_sample.csv'; - -/** - * Parse a single CSV line respecting quoted fields (RFC 4180). - * Handles commas inside double-quoted values (e.g. "Gymnasie-, högskole-…"). - */ -function parseCSVLine(line: string): string[] { - const fields: string[] = []; - let current = ''; - let inQuotes = false; - for (let i = 0; i < line.length; i++) { - const ch = line[i]; - if (inQuotes) { - if (ch === '"' && line[i + 1] === '"') { - current += '"'; - i++; // skip escaped quote - } else if (ch === '"') { - inQuotes = false; - } else { - current += ch; - } - } else if (ch === '"') { - inQuotes = true; - } else if (ch === ',') { - fields.push(current); - current = ''; - } else { - current += ch; - } - } - fields.push(current); - return fields; -} - -/** Parse the CSV into GovernmentRoleMember records. */ -function parseCSV(csvText: string): GovernmentRoleMember[] { - const lines = csvText.split('\n').filter(l => l.trim().length > 0); - if (lines.length < 2) return []; - // Skip header - return lines.slice(1).map(line => { - const cols = parseCSVLine(line); - return { - roleId: cols[0] ?? '', - department: cols[1] ?? '', - roleCode: cols[2] ?? '', - firstName: cols[3] ?? '', - lastName: cols[4] ?? '', - fromDate: cols[5] ?? '', - toDate: cols[6] ?? '', - personId: cols[7] ?? '', - party: cols[8] ?? '', - active: cols[10] === 't', - }; - }); -} - -/** Normalise a name for fuzzy comparison (lowercase, trim, collapse whitespace). */ -function normaliseName(name: string): string { - return name.toLowerCase().replace(/\s+/g, ' ').trim(); -} - -/** - * Load and cache the government role member data. - * Uses the local CSV in cia-data/ as the authoritative source. - */ -let cachedMembers: GovernmentRoleMember[] | null = null; - -export function loadGovernmentRoleMembers(repoRoot?: string): GovernmentRoleMember[] { - if (cachedMembers) return cachedMembers; - const root = repoRoot ?? resolve(import.meta.dirname ?? '.', '..'); - const csvPath = resolve(root, CSV_RELATIVE_PATH); - try { - const csvText = readFileSync(csvPath, 'utf-8'); - cachedMembers = parseCSV(csvText); - return cachedMembers; - } catch (err: unknown) { - const code = (err as NodeJS.ErrnoException)?.code; - if (code === 'ENOENT') { - console.warn(`[government-role-validator] CSV not found at ${csvPath}; role validation disabled.`); - } else { - console.warn(`[government-role-validator] Error loading ${csvPath}: ${err}; role validation disabled.`); - } - cachedMembers = []; - return cachedMembers; - } -} - -/** Clear the cache (useful for testing). */ -export function clearCache(): void { - cachedMembers = null; -} - -/** - * Find all government roles for a person by last name (and optionally first name). - * Returns roles sorted by most recent first. - */ -export function findRolesForPerson( - lastName: string, - firstName?: string, - repoRoot?: string, -): GovernmentRoleMember[] { - const members = loadGovernmentRoleMembers(repoRoot); - const normLast = normaliseName(lastName); - const normFirst = firstName ? normaliseName(firstName) : undefined; - - return members - .filter(m => { - if (normaliseName(m.lastName) !== normLast) return false; - if (normFirst && normaliseName(m.firstName) !== normFirst) return false; - return true; - }) - .sort((a, b) => b.fromDate.localeCompare(a.fromDate)); -} - -/** - * Get the current (most recent active, or most recent) role for a person. - */ -export function getCurrentRole( - lastName: string, - firstName?: string, - repoRoot?: string, -): GovernmentRoleMember | undefined { - const roles = findRolesForPerson(lastName, firstName, repoRoot); - // Prefer active roles - const active = roles.find(r => r.active); - return active ?? roles[0]; -} - -/** - * Validate whether a claimed government role title is correct for a person. - * Returns a validation result with the actual role and a correction suggestion. - * - * @param fullName - Full name of the person (e.g. "Lotta Edholm") - * @param claimedRole - The role attributed in the article (e.g. "Deputy Prime Minister") - * @param repoRoot - Optional repository root path - */ -export function validateGovernmentRole( - fullName: string, - claimedRole: string, - repoRoot?: string, -): RoleValidationResult { - const parts = fullName.trim().split(/\s+/); - const lastName = parts.pop() ?? ''; - const firstName = parts.join(' ') || undefined; - const roles = findRolesForPerson(lastName, firstName, repoRoot); - - if (roles.length === 0) { - return { - valid: false, - name: fullName, - claimedRole, - actualRoles: [], - suggestion: `No government role records found for "${fullName}" in CIA data. Verify the name and role manually.`, - }; - } - - const currentRole = roles.find(r => r.active) ?? roles[0]; - const claimedLower = claimedRole.toLowerCase(); - - // Check if claimed role matches the actual role code or department - const roleCodeLower = currentRole.roleCode.toLowerCase(); - const departmentLower = currentRole.department.toLowerCase(); - - // Known title mappings for Deputy PM across supported languages - const deputyPMTerms = ['deputy prime minister', 'vice statsminister', 'vice premier', - 'vicepremier', 'viceministerpräsident', 'vice-première ministre', 'viceprimera ministra', - 'varapääministeri', 'visestatsminister', 'vicestatsminister', - 'نائبة رئيس الوزراء', 'סגנית ראש הממשלה', '副首相', '부총리']; - - const isClaimingDeputyPM = deputyPMTerms.some(term => claimedLower.includes(term)); - - // Deputy PM (Vice statsminister) is a constitutional designation given to one minister. - // The CIA CSV does not have an explicit "Vice statsminister" role_code — the Deputy PM - // has their regular ministerial role_code. To validate, we check that the person's - // department is Statsrådsberedningen (PM's office) or role_code is Statsminister. - // For any other person, claiming Deputy PM is invalid. - if (isClaimingDeputyPM) { - const isPMRole = roleCodeLower === 'statsminister' || - departmentLower === 'statsrådsberedningen'; - if (!isPMRole) { - return { - valid: false, - name: fullName, - claimedRole, - actualRoles: roles, - suggestion: `"${fullName}" is ${currentRole.roleCode} at ${currentRole.department} (${currentRole.party}), NOT Deputy Prime Minister. ` + - `Check if the actual Vice statsminister should be cited instead.`, - }; - } - } - - // Check if claimed role roughly matches known role - const matchesRole = claimedLower.includes(roleCodeLower) || - roleCodeLower.includes(claimedLower) || - claimedLower.includes(departmentLower); - - return { - valid: matchesRole || !isClaimingDeputyPM, - name: fullName, - claimedRole, - actualRoles: roles, - suggestion: matchesRole - ? `Role verified: "${fullName}" is ${currentRole.roleCode} at ${currentRole.department} (${currentRole.party}).` - : `"${fullName}" is recorded as ${currentRole.roleCode} at ${currentRole.department} (${currentRole.party}). Claimed role "${claimedRole}" may need verification.`, - }; -} - -/** - * Get a formatted role description for a person suitable for article text. - * Returns the most current role in "RoleCode FirstName LastName (Party)" format. - */ -export function getFormattedRole( - lastName: string, - firstName?: string, - repoRoot?: string, -): string | undefined { - const role = getCurrentRole(lastName, firstName, repoRoot); - if (!role) return undefined; - return `${role.roleCode} ${role.firstName} ${role.lastName} (${role.party})`; -} diff --git a/scripts/party-variants.ts b/scripts/party-variants.ts deleted file mode 100644 index b0f67bcbac..0000000000 --- a/scripts/party-variants.ts +++ /dev/null @@ -1,62 +0,0 @@ -/** - * @module Intelligence/PartyAnalysis - * @description Swedish political party name normalization and mention extraction. - * Bounded context: Political Entities - * - * @author Hack23 AB - * @license Apache-2.0 - */ - -import type { PartyCode, PartyVariantMap } from './types/party.js'; - -/** - * Canonical mapping from party codes to their known name variants. - * Used for normalizing different textual references to the same party. - */ -export const PARTY_VARIANTS: PartyVariantMap = { - S: ['Socialdemokraterna', 'S'], - M: ['Moderaterna', 'M'], - SD: ['Sverigedemokraterna', 'SD'], - V: ['Vänsterpartiet', 'V'], - MP: ['Miljöpartiet', 'MP'], - C: ['Centerpartiet', 'C'], - L: ['Liberalerna', 'L'], - KD: ['Kristdemokraterna', 'KD'], -} as const; - -/** - * Extract unique party mentions from HTML content. - * Uses Unicode-aware regex boundaries for proper word detection across scripts. - * - * @param html - HTML content to search for party references - * @returns Set of canonical party codes found in the content - */ -export function extractPartyMentions(html: string | null | undefined): Set { - const parties = new Set(); - - if (!html) { - return parties; - } - - for (const [canonicalCode, variants] of Object.entries(PARTY_VARIANTS) as Array< - [PartyCode, readonly string[]] - >) { - for (const variant of variants) { - // Escape special regex characters in variant - const escapedVariant = variant.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); - - // Unicode-aware non-letter/non-number boundary for ALL variants. - // \b doesn't work well with non-ASCII (ä, ö, å) so we use [^\p{L}\p{N}]. - const pattern = new RegExp( - `(?:^|[^\\p{L}\\p{N}])${escapedVariant}(?=$|[^\\p{L}\\p{N}])`, - 'ui', - ); - if (pattern.test(html)) { - parties.add(canonicalCode); - break; - } - } - } - - return parties; -} diff --git a/scripts/pipeline/index.ts b/scripts/pipeline/index.ts deleted file mode 100644 index 4f9b345bfd..0000000000 --- a/scripts/pipeline/index.ts +++ /dev/null @@ -1,32 +0,0 @@ -/** - * @module pipeline - * @description Barrel re-export for the content pipeline module. - * - * Public API: - * - Types: `ContentPipeline`, `PipelineOptions`, `PipelineResult`, - * `OrchestratorConfig`, `OrchestratorResult`, `PipelineStage` - * - Class: `PipelineOrchestrator` - * - Validation: `validateArticleHTML`, `validateArticleBatch`, - * `ArticleValidationResult`, `ValidationOptions` - * - * @author Hack23 AB - * @license Apache-2.0 - */ - -export type { - ContentPipeline, - PipelineOptions, - PipelineResult, - PipelineStage, - OrchestratorConfig, - OrchestratorResult, -} from './types.js'; - -export { PipelineOrchestrator } from './orchestrator.js'; - -export type { - ArticleValidationResult, - ValidationOptions, -} from './validation.js'; - -export { validateArticleHTML, validateArticleBatch } from './validation.js'; diff --git a/scripts/pipeline/orchestrator.ts b/scripts/pipeline/orchestrator.ts deleted file mode 100644 index 792e728ad4..0000000000 --- a/scripts/pipeline/orchestrator.ts +++ /dev/null @@ -1,179 +0,0 @@ -/** - * @module pipeline/orchestrator - * @description Unified pipeline orchestrator. - * - * The `PipelineOrchestrator` runs a collection of `ContentPipeline` instances - * either sequentially (default) or in parallel, collects results, logs - * progress, and surfaces a structured `OrchestratorResult`. - * - * Usage (sequential): - * ```ts - * import { PipelineOrchestrator } from './pipeline/orchestrator.js'; - * import { MotionsPipeline } from './pipeline/plugins/motions-pipeline.js'; - * - * const orchestrator = new PipelineOrchestrator({ - * pipelines: [new MotionsPipeline()], - * }); - * const result = await orchestrator.run(); - * ``` - * - * @author Hack23 AB - * @license Apache-2.0 - */ - -import type { - ContentPipeline, - OrchestratorConfig, - OrchestratorResult, - PipelineOptions, - PipelineResult, -} from './types.js'; - -// --------------------------------------------------------------------------- -// PipelineOrchestrator -// --------------------------------------------------------------------------- - -/** - * Orchestrates the execution of one or more `ContentPipeline` instances. - * - * Key behaviours: - * - **Error isolation**: a failure in one pipeline does not abort others. - * - **Parallel mode**: when `config.parallel = true` all pipelines run via - * `Promise.allSettled` for throughput and robust error handling; otherwise - * they execute sequentially for predictable log output. - * - **Result aggregation**: all per-pipeline `PipelineResult` objects are - * merged into a single `OrchestratorResult`. - */ -export class PipelineOrchestrator { - private readonly pipelines: ContentPipeline[]; - private readonly parallel: boolean; - private readonly defaultOptions: PipelineOptions; - - constructor(config: OrchestratorConfig) { - this.pipelines = config.pipelines; - this.parallel = config.parallel ?? false; - this.defaultOptions = config.defaultOptions ?? {}; - } - - // ------------------------------------------------------------------------- - // Public API - // ------------------------------------------------------------------------- - - /** - * Run all pipelines and return an aggregated `OrchestratorResult`. - * - * @param overrideOptions - Options forwarded to every pipeline, merged on - * top of `defaultOptions`. - */ - async run(overrideOptions?: PipelineOptions): Promise { - const startTime = Date.now(); - const effectiveOptions: PipelineOptions = { - ...this.defaultOptions, - ...(overrideOptions ?? {}), - }; - - const results: Record = {}; - - if (this.parallel) { - const settled = await Promise.allSettled( - this.pipelines.map(p => this._runSingle(p, effectiveOptions)), - ); - for (let i = 0; i < this.pipelines.length; i++) { - const pipeline = this.pipelines[i]!; - const outcome = settled[i]!; - if (outcome.status === 'fulfilled') { - results[pipeline.name] = outcome.value; - } else { - // Promise should never reject because _runSingle catches all errors, - // but handle it defensively. - results[pipeline.name] = { - success: false, - error: String((outcome as PromiseRejectedResult).reason), - warnings: [], - degraded: false, - files: 0, - }; - } - } - } else { - for (const pipeline of this.pipelines) { - results[pipeline.name] = await this._runSingle(pipeline, effectiveOptions); - } - } - - return this._aggregate(results, Date.now() - startTime); - } - - // ------------------------------------------------------------------------- - // Private helpers - // ------------------------------------------------------------------------- - - /** - * Run a single pipeline, wrapping any unexpected throws in a failed result. - */ - private async _runSingle( - pipeline: ContentPipeline, - options: PipelineOptions, - ): Promise { - console.log(`[Orchestrator] ▶ Starting pipeline: ${pipeline.name}`); - const t0 = Date.now(); - try { - const result = await pipeline.run(options); - const durationMs = Date.now() - t0; - console.log( - `[Orchestrator] ${result.success ? '✅' : '❌'} Pipeline "${pipeline.name}" completed in ${durationMs}ms`, - ); - return { ...result, durationMs }; - } catch (err: unknown) { - const durationMs = Date.now() - t0; - const message = err instanceof Error ? err.message : String(err); - console.error( - `[Orchestrator] 💥 Pipeline "${pipeline.name}" threw unexpectedly after ${durationMs}ms: ${message}`, - ); - return { - success: false, - error: message, - durationMs, - warnings: [`Unexpected throw from pipeline "${pipeline.name}": ${message}`], - degraded: false, - files: 0, - }; - } - } - - /** - * Aggregate individual pipeline results into a single orchestrator result. - */ - private _aggregate( - results: Record, - totalDurationMs: number, - ): OrchestratorResult { - let totalFiles = 0; - let allSucceeded = true; - const warnings: string[] = []; - - for (const [name, result] of Object.entries(results)) { - if (!result.success) { - allSucceeded = false; - console.warn(`[Orchestrator] ⚠ Pipeline "${name}" did not succeed: ${result.error ?? 'unknown error'}`); - } - totalFiles += result.files ?? 0; - if (result.warnings) { - warnings.push(...result.warnings); - } - } - - console.log( - `[Orchestrator] 🏁 All pipelines done. ` + - `success=${allSucceeded}, files=${totalFiles}, duration=${totalDurationMs}ms`, - ); - - return { - allSucceeded, - totalFiles, - results, - warnings, - durationMs: totalDurationMs, - }; - } -} diff --git a/scripts/pipeline/types.ts b/scripts/pipeline/types.ts deleted file mode 100644 index 71e61b914b..0000000000 --- a/scripts/pipeline/types.ts +++ /dev/null @@ -1,144 +0,0 @@ -/** - * @module pipeline/types - * @description ContentPipeline interface and related types for the standardised - * article-generation lifecycle: fetch → transform → generate → validate → write. - * - * Each article type implements `ContentPipeline` and is executed by the - * `PipelineOrchestrator` in `orchestrator.ts`. - * - * @author Hack23 AB - * @license Apache-2.0 - */ - -import type { Language } from '../types/language.js'; -import type { GenerationResult } from '../types/article.js'; - -// --------------------------------------------------------------------------- -// Pipeline stage lifecycle -// --------------------------------------------------------------------------- - -/** - * Named stages of the article-generation lifecycle. - * Stages execute sequentially for each language variant. - */ -export type PipelineStage = 'fetch' | 'transform' | 'generate' | 'validate' | 'write'; - -// --------------------------------------------------------------------------- -// Pipeline result -// --------------------------------------------------------------------------- - -/** - * Rich result returned by a completed pipeline run. - * Extends `GenerationResult` with per-stage timing and degradation metadata. - */ -export interface PipelineResult extends GenerationResult { - /** Wall-clock time in milliseconds for the complete pipeline run. */ - durationMs?: number; - /** - * Per-stage duration breakdown (ms). - * Keys match `PipelineStage` values. - */ - stageDurations?: Partial>; - /** - * Warnings collected during the run (e.g. MCP queries that returned no data - * but were handled via graceful degradation). - */ - warnings?: string[]; - /** - * Whether the pipeline used cached / fallback data for any stage. - * `true` indicates at least one graceful-degradation path was taken. - */ - degraded?: boolean; -} - -// --------------------------------------------------------------------------- -// Pipeline options -// --------------------------------------------------------------------------- - -/** - * Common options accepted by every `ContentPipeline.run()` implementation. - */ -export interface PipelineOptions { - /** Language variants to generate. Defaults to `['en', 'sv']`. */ - languages?: Language[]; - /** - * Callback invoked after each article HTML is generated. - * Used by the orchestrator to write files. When `null` the article is - * generated in-memory only (useful for tests / dry-run mode). - */ - writeArticle?: ((html: string, filename: string) => Promise) | null; - /** - * When `true` articles are generated even when source data is sparse. - * Defaults to `false`. - */ - allowDegradedContent?: boolean; -} - -// --------------------------------------------------------------------------- -// ContentPipeline interface -// --------------------------------------------------------------------------- - -/** - * Contract that every article-type plugin must implement. - * - * Lifecycle: - * 1. **fetch** – Retrieve raw data from MCP tools (may partially fail). - * 2. **transform** – Convert raw data into article payloads per language. - * 3. **generate** – Render HTML via `generateArticleHTML`. - * 4. **validate** – Verify HTML structure before writing. - * 5. **write** – Persist files to `news/` directory. - * - * Implementations are responsible for graceful degradation: if the `fetch` - * stage partially fails they should log a warning and continue with whatever - * data is available rather than throwing. - */ -export interface ContentPipeline { - /** - * Human-readable name of this pipeline (e.g. `'motions'`). - * Used for logging and metrics. - */ - readonly name: string; - - /** - * Execute the full pipeline lifecycle and return a `PipelineResult`. - * - * Implementations **must not throw** — all errors should be caught and - * returned via `PipelineResult.success = false` and `PipelineResult.error`. - */ - run(options?: PipelineOptions): Promise; -} - -// --------------------------------------------------------------------------- -// Orchestrator configuration -// --------------------------------------------------------------------------- - -/** - * Configuration passed to `PipelineOrchestrator`. - */ -export interface OrchestratorConfig { - /** Pipelines to run. Order matters when `parallel = false`. */ - pipelines: ContentPipeline[]; - /** - * When `true` all pipelines run concurrently via `Promise.allSettled`. - * Defaults to `false` (sequential) for predictable logging. - */ - parallel?: boolean; - /** Default options forwarded to each pipeline unless overridden. */ - defaultOptions?: PipelineOptions; -} - -/** - * Aggregate result produced by the orchestrator after running all pipelines. - */ -export interface OrchestratorResult { - /** `true` when every pipeline in the run reported success. */ - allSucceeded: boolean; - /** Total number of files written across all pipelines. */ - totalFiles: number; - /** Individual results keyed by pipeline name. */ - results: Record; - /** Warnings collected across all pipelines. */ - warnings: string[]; - /** Total wall-clock time for the orchestrator run (ms). */ - durationMs: number; -} diff --git a/scripts/pipeline/validation.ts b/scripts/pipeline/validation.ts deleted file mode 100644 index b494abf07b..0000000000 --- a/scripts/pipeline/validation.ts +++ /dev/null @@ -1,180 +0,0 @@ -/** - * @module pipeline/validation - * @description Post-generation HTML structure validation. - * - * Validates that generated article HTML meets minimum structural requirements - * before the file is written to disk. Failures are non-fatal by default — - * the orchestrator collects validation warnings and continues. - * - * @author Hack23 AB - * @license Apache-2.0 - */ - -// --------------------------------------------------------------------------- -// Validation result types -// --------------------------------------------------------------------------- - -/** - * Result of validating a single HTML article string. - */ -export interface ArticleValidationResult { - /** `true` when the HTML passes all required checks. */ - passed: boolean; - /** Informational messages about checks that passed. */ - passedChecks: string[]; - /** Error messages for checks that failed (non-empty means `passed = false`). */ - errors: string[]; - /** Warning messages for checks that are advisory only. */ - warnings: string[]; -} - -/** - * Options controlling which checks are enforced. - */ -export interface ValidationOptions { - /** Require a `

` element (default: `true`). */ - requireH1?: boolean; - /** Require at least one `

` section (default: `true`). */ - requireSections?: boolean; - /** Require the sources footer block and fail when absent (default: `true`). */ - requireSources?: boolean; - /** Minimum word count threshold (default: `50`). */ - minWordCount?: number; - /** Require valid `` attribute (default: `true`). */ - requireLangAttr?: boolean; - /** Require `` declaration (default: `true`). */ - requireDoctype?: boolean; -} - -// --------------------------------------------------------------------------- -// Default options -// --------------------------------------------------------------------------- - -const DEFAULT_OPTIONS: Required = { - requireH1: true, - requireSections: true, - requireSources: true, - minWordCount: 50, - requireLangAttr: true, - requireDoctype: true, -}; - -// --------------------------------------------------------------------------- -// HTML structure validation -// --------------------------------------------------------------------------- - -/** - * Validate the structure of a generated article HTML string. - * - * This is a lightweight regex / string-based check, not a full DOM parse. - * It is intentionally fast and dependency-free. - * - * @param html - The complete HTML string to validate. - * @param opts - Optional configuration overrides. - * @returns Structured validation result. - */ -export function validateArticleHTML( - html: string, - opts: ValidationOptions = {}, -): ArticleValidationResult { - const options: Required = { ...DEFAULT_OPTIONS, ...opts }; - - const errors: string[] = []; - const warnings: string[] = []; - const passedChecks: string[] = []; - - if (!html || typeof html !== 'string') { - return { - passed: false, - passedChecks, - errors: ['HTML is empty or not a string'], - warnings, - }; - } - - // --- DOCTYPE --- - if (options.requireDoctype) { - if (//i.test(html)) { - passedChecks.push('DOCTYPE present'); - } else { - errors.push('Missing declaration'); - } - } - - // --- lang attribute --- - if (options.requireLangAttr) { - if (/]+lang=["'][a-z]{2,5}["']/i.test(html)) { - passedChecks.push('lang attribute present'); - } else { - errors.push('Missing valid lang attribute on element'); - } - } - - // --- H1 --- - if (options.requireH1) { - if (/]*>[\s\S]+?<\/h1>/i.test(html)) { - passedChecks.push('H1 heading present'); - } else { - errors.push('Missing

heading'); - } - } - - // --- Sections (H2) --- - if (options.requireSections) { - const h2Matches = html.match(/]*>/gi); - const h2Count = h2Matches ? h2Matches.length : 0; - if (h2Count >= 1) { - passedChecks.push(`${h2Count}

section(s) present`); - } else { - errors.push('No

sections found — article content may be missing'); - } - } - - // --- Sources footer --- - if (options.requireSources) { - if (/article-sources|data-sources|riksdag-regering-mcp/i.test(html)) { - passedChecks.push('Sources block present'); - } else { - errors.push('Sources footer block not detected — article may lack attribution'); - } - } - - // --- Word count (article body only: strip , ')).toBe('alert(1)'); - }); - - it('strips HTML tags', () => { - expect(sanitizePlainText('bold text')).toBe('bold text'); - }); - - it('preserves special characters without escaping (escaping deferred to render sites)', () => { - expect(sanitizePlainText('Tom & Jerry < Friends')).toBe('Tom & Jerry < Friends'); - }); - - it('strips complete tags including event handler attributes', () => { - const result = sanitizePlainText(''); - expect(result).toBe(''); - }); - - it('handles incomplete/malformed tags by preserving remaining text', () => { - const result = sanitizePlainText('text < with unclosed bracket'); - expect(result).toBe('text < with unclosed bracket'); - }); - - it('handles nested quotes in tags', () => { - const result = sanitizePlainText('click'); - expect(result).toBe('click'); - }); - - it('returns empty string for empty input', () => { - expect(sanitizePlainText('')).toBe(''); - }); - - it('passes through plain text unchanged', () => { - expect(sanitizePlainText('Budget Analysis 2026')).toBe('Budget Analysis 2026'); - }); - - it('handles Swedish characters without escaping ampersand', () => { - expect(sanitizePlainText('Försvarsbudget & Säkerhetspolitik')).toBe('Försvarsbudget & Säkerhetspolitik'); - }); - - it('handles nested tag reconstruction attempts', () => { - const result = sanitizePlainText('ipt>alert(1)'); - expect(result).not.toContain('alert(1)'); - }); -}); - -describe('hashPathSuffix', () => { - it('returns a deterministic base-36 string for a given path', () => { - const result = hashPathSuffix('/pressmeddelanden/2026/03/example-doc'); - expect(typeof result).toBe('string'); - // Same input always yields same output - expect(hashPathSuffix('/pressmeddelanden/2026/03/example-doc')).toBe(result); - }); - - it('returns different hashes for different paths', () => { - const a = hashPathSuffix('/path/a'); - const b = hashPathSuffix('/path/b'); - expect(a).not.toBe(b); - }); - - it('replaces leading minus with "n"', () => { - // The function should never return a string starting with '-' - const result = hashPathSuffix('/some/path'); - expect(result).not.toMatch(/^-/); - }); - - it('handles empty string', () => { - const result = hashPathSuffix(''); - expect(typeof result).toBe('string'); - expect(result.length).toBeGreaterThan(0); - }); -}); diff --git a/tests/deep-inspection-pipeline.test.ts b/tests/deep-inspection-pipeline.test.ts deleted file mode 100644 index a57fc219c7..0000000000 --- a/tests/deep-inspection-pipeline.test.ts +++ /dev/null @@ -1,268 +0,0 @@ -/** - * Tests for deep-inspection multi-iteration intelligence enhancement: - * - analysisDepth config parameter parsing - * - DeepInspectionPipeline class structure and interface - * - New section labels in DEEP_SECTION_LABELS (executiveSummary, predictiveAssessment, - * historicalContext, methodology, likelyOutcome, coalitionStability, riskScenarios) - * - Strategic implications now available for all 14 languages - * - generateDeepInspectionContent depth-gated sections via exported utilities - */ - -import { describe, it, expect, vi } from 'vitest'; -import type { RawDocument } from '../scripts/data-transformers.js'; - -// --------------------------------------------------------------------------- -// 1. Config — analysisDepth parsing -// --------------------------------------------------------------------------- - -describe.sequential('analysisDepth config', () => { - it('exports analysisDepth as a valid depth value (1–4)', async () => { - const { analysisDepth } = await import('../scripts/generate-news-enhanced/config.js'); - expect([1, 2, 3, 4]).toContain(analysisDepth); - }); - - it('analysisDepth defaults to 1 when no --depth CLI arg is present', async () => { - // Save original process.argv and remove any --depth flags - const originalArgv = process.argv; - process.argv = originalArgv.filter(a => !a.startsWith('--depth')); - vi.resetModules(); - try { - const { analysisDepth } = await import('../scripts/generate-news-enhanced/config.js'); - expect(analysisDepth).toBe(1); - } finally { - process.argv = originalArgv; - vi.resetModules(); // Clear cached module so other tests get a clean slate - } - }); -}); - -// --------------------------------------------------------------------------- -// 2. DeepInspectionPipeline — class structure -// --------------------------------------------------------------------------- - -describe('DeepInspectionPipeline', () => { - it('exports DeepInspectionPipeline class', async () => { - const mod = await import('../scripts/deep-inspection/index.js'); - expect(typeof mod.DeepInspectionPipeline).toBe('function'); - }); - - it('exports default as DeepInspectionPipeline', async () => { - const mod = await import('../scripts/deep-inspection/index.js'); - expect(typeof mod.default).toBe('function'); - }); - - it('creates pipeline instance with no params', async () => { - const { DeepInspectionPipeline } = await import('../scripts/deep-inspection/index.js'); - const pipeline = new DeepInspectionPipeline(); - expect(pipeline).toBeDefined(); - expect(typeof pipeline.run).toBe('function'); - }); - - it('pipeline run() returns a Promise via mocked generator', async () => { - // Use vi.doMock so only this test block sees the mock — other describe - // blocks import the real module. - vi.doMock('../scripts/generate-news-enhanced/generators.js', () => ({ - generateDeepInspection: vi.fn().mockResolvedValue({ - success: true, - files: 0, - slug: 'test-slug', - }), - extractDocIdFromUrl: vi.fn(), - isGovernmentUrl: vi.fn(), - sanitizePlainText: vi.fn(), - hashPathSuffix: vi.fn(), - })); - vi.resetModules(); - - try { - const { DeepInspectionPipeline } = await import('../scripts/deep-inspection/index.js'); - const pipeline = new DeepInspectionPipeline(); - const resultPromise = pipeline.run(); - expect(resultPromise).toBeInstanceOf(Promise); - - const result = await resultPromise; - expect(result).toBeDefined(); - expect(result.depth).toBeDefined(); - expect([1, 2, 3, 4]).toContain(result.depth); - } finally { - vi.doUnmock('../scripts/generate-news-enhanced/generators.js'); - vi.resetModules(); - } - }); -}); - -// --------------------------------------------------------------------------- -// 3–4. DEEP_SECTION_LABELS and depth-gated content — behavioral assertions -// --------------------------------------------------------------------------- - -describe('generateDeepInspectionContent depth-gated sections', () => { - const docs: RawDocument[] = [ - { - dok_id: 'H901FiU1', - doktyp: 'bet', - dokumentnamn: 'Betänkande FiU1', - titel: 'Finansutskottets betänkande', - summary: 'Sammanfattning av betänkande.', - contentFetched: true, - datum: '2026-03-10', - organ: 'FiU', - }, - { - dok_id: 'H901Prop1', - doktyp: 'prop', - dokumentnamn: 'Proposition 2025/26:1', - titel: 'Regeringens proposition', - summary: 'Sammanfattning av proposition.', - contentFetched: true, - datum: '2026-03-08', - organ: 'Fi', - }, - { - dok_id: 'H901SFS1', - doktyp: 'sfs', - dokumentnamn: 'SFS 2026:123', - titel: 'Svensk författningssamling', - summary: 'Antagen författning.', - contentFetched: true, - datum: '2026-03-01', - organ: 'KU', - }, - ]; - - const render = async (depth: 1 | 2 | 3 | 4, lang: 'en' | 'sv' = 'en') => { - const { __deepInspectionTestHooks } = await import('../scripts/generate-news-enhanced/generators.js'); - return __deepInspectionTestHooks.generateDeepInspectionContent( - docs, - 'Fiscal policy', - lang, - depth, - ); - }; - - it('depth 1 renders baseline sections and excludes advanced classes', async () => { - const html = await render(1, 'en'); - expect(html).toContain('class="deep-topic-context"'); - expect(html).toContain('class="document-intelligence-analysis"'); - expect(html).toContain('class="strategic-implications"'); - expect(html).toContain('class="key-takeaways"'); - expect(html).not.toContain('class="historical-context"'); - expect(html).not.toContain('class="predictive-assessment"'); - expect(html).not.toContain('class="executive-intelligence-summary"'); - expect(html).not.toContain('class="methodology-confidence"'); - }); - - it('emits AI_MUST_REPLACE markers when aiResult is absent', async () => { - const html = await render(1, 'en'); - expect(html).toContain(''); - expect(html).toContain(''); - }); - - it('emits AI_MUST_REPLACE markers when aiResult has empty content', async () => { - const { __deepInspectionTestHooks } = await import('../scripts/generate-news-enhanced/generators.js'); - const emptyAiResult: import('../scripts/generate-news-enhanced/ai-analysis-pipeline.js').AIAnalysisResult = { - iterations: 1, - documentAnalyses: [], - synthesis: { - policyConvergence: '', - coalitionStressIndicators: '', - emergingTrends: '', - stakeholderPowerDynamics: '', - }, - dynamicSwotEntries: { - government: { strengths: [], weaknesses: [], opportunities: [], threats: [] }, - opposition: { strengths: [], weaknesses: [], opportunities: [], threats: [] }, - privateSector: { strengths: [], weaknesses: [], opportunities: [], threats: [] }, - }, - strategicImplications: '', - keyTakeaways: [], - analysisScore: 0, - }; - const html = __deepInspectionTestHooks.generateDeepInspectionContent( - docs, 'Fiscal policy', 'en', 1, emptyAiResult, - ); - expect(html).toContain(''); - expect(html).toContain(''); - }); - - it('depth 2 adds historical and predictive sections only', async () => { - const html = await render(2, 'en'); - expect(html).toContain('class="historical-context"'); - expect(html).toContain('class="predictive-assessment"'); - expect(html).not.toContain('class="executive-intelligence-summary"'); - expect(html).not.toContain('class="methodology-confidence"'); - }); - - it('depth 3 adds executive summary, predictive headings, and methodology', async () => { - const html = await render(3, 'en'); - expect(html).toContain('class="executive-intelligence-summary"'); - expect(html).toContain('class="predictive-assessment"'); - expect(html).toContain('class="methodology-confidence"'); - - const predictiveSection = html.match(/
/)?.[0] ?? ''; - expect(predictiveSection).not.toBe(''); - expect((predictiveSection.match(/

/g) ?? []).length).toBeGreaterThanOrEqual(3); - expect(predictiveSection).toContain('class="risk-scenarios"'); - - const methodologySection = html.match(/
/)?.[0] ?? ''; - expect(methodologySection).not.toBe(''); - expect((methodologySection.match(/
    [\s\S]*?<\/ol>/g) ?? []).length).toBe(1); - expect((methodologySection.match(/
  1. /g) ?? []).length).toBe(3); - }); - - it('depth 4 includes all methodology iterations', async () => { - const html = await render(4, 'en'); - const methodologySection = html.match(/
    /)?.[0] ?? ''; - expect(methodologySection).not.toBe(''); - expect((methodologySection.match(/
  2. /g) ?? []).length).toBe(4); - }); - - it('renders localized Swedish labels for advanced sections', async () => { - const html = await render(3, 'sv'); - expect(html).toContain('Sammanfattning för beslutsfattare'); - expect(html).toContain('Historisk kontext och prejudikat'); - expect(html).toContain('Prediktiv bedömning'); - expect(html).toContain('Metodik och konfidensgrad'); - expect(html).toContain('Troligt utfall'); - expect(html).toContain('Koalitionsstabilitetsprognos'); - expect(html).toContain('Riskscenarier'); - }); -}); - -// --------------------------------------------------------------------------- -// 5. Strategic implications — 14-language coverage -// --------------------------------------------------------------------------- - -describe('buildStrategicImplications 14-language coverage', () => { - const ALL_LANGUAGES = ['en', 'sv', 'da', 'no', 'fi', 'de', 'fr', 'es', 'nl', 'ar', 'he', 'ja', 'ko', 'zh']; - - it('all 14 language codes are defined in the project language list', () => { - expect(ALL_LANGUAGES).toHaveLength(14); - }); - - it('generators module loads without error — validates all templates compile (real module)', async () => { - vi.resetModules(); - const mod = await import('../scripts/generate-news-enhanced/generators.js'); - expect(mod).toBeDefined(); - expect(typeof mod.generateDeepInspection).toBe('function'); - }); -}); - -// --------------------------------------------------------------------------- -// 6. Deep-inspection module — pipeline exports -// --------------------------------------------------------------------------- - -describe('scripts/deep-inspection/index.js exports', () => { - it('exports DeepInspectionPipeline class and default', async () => { - const mod = await import('../scripts/deep-inspection/index.js'); - expect(mod.DeepInspectionPipeline).toBeDefined(); - expect(mod.default).toBeDefined(); - // Same reference - expect(mod.default).toBe(mod.DeepInspectionPipeline); - }); - - it('pipeline uses config analysisDepth for effective depth', async () => { - const { analysisDepth } = await import('../scripts/generate-news-enhanced/config.js'); - // The pipeline reads analysisDepth from config, not from constructor params - expect([1, 2, 3, 4]).toContain(analysisDepth); - }); -}); diff --git a/tests/extract-vocabulary.test.ts b/tests/extract-vocabulary.test.ts deleted file mode 100644 index 38d7ecfc8b..0000000000 --- a/tests/extract-vocabulary.test.ts +++ /dev/null @@ -1,262 +0,0 @@ -/** - * Unit Tests for extract-vocabulary.ts - * Tests vocabulary extraction from news articles - */ - -import { describe, it, expect, beforeEach, afterAll } from 'vitest'; -import { writeFileSync, mkdirSync, rmSync } from 'fs'; -import { execSync } from 'child_process'; - -describe('extract-vocabulary.ts', () => { - const testDir = 'tests/fixtures/vocabulary-test'; - - beforeEach(() => { - // Create test fixtures directory - try { - rmSync(testDir, { recursive: true, force: true }); - } catch (_e: unknown) { - // Directory doesn't exist, that's fine - } - mkdirSync(testDir, { recursive: true }); - }); - - afterAll(() => { - // Clean up test fixtures after all tests complete - try { - rmSync(testDir, { recursive: true, force: true }); - } catch (_e: unknown) { - // Directory cleanup failed, ignore - } - }); - - describe('Language code extraction', () => { - it('should extract language code from filename', () => { - // Create test file - const content = ` - - - Test - -

    Test Article

    -

    What to Watch This Week

    - Committee: Finance Committee - Document: Report 2024:123 -

    Title 1

    -

    Title 2

    - - - `; - - writeFileSync(`${testDir}/2026-02-14-test-en.html`, content); - - const output = execSync(`node scripts/extract-vocabulary.ts --directory ${testDir} --date-prefix 2026-02-14`, { - encoding: 'utf-8' - }); - - expect(output).toContain('English (EN)'); - expect(output).toContain('Samples analyzed: 1'); - }); - - it('should handle multiple language files', () => { - const languages = ['en', 'sv', 'de', 'fr']; - - for (const lang of languages) { - const content = ` - - -

    Test

    - - `; - writeFileSync(`${testDir}/2026-02-14-test-${lang}.html`, content); - } - - const output = execSync(`node scripts/extract-vocabulary.ts --directory ${testDir}`, { - encoding: 'utf-8' - }); - - expect(output).toContain('English (EN)'); - expect(output).toContain('Swedish (SV)'); - expect(output).toContain('German (DE)'); - expect(output).toContain('French (FR)'); - }); - }); - - describe('Structure-based extraction', () => { - it('should extract committee label from strong tag', () => { - const content = ` - - - Committee: Finance Committee - Document: Report 123 - - `; - - writeFileSync(`${testDir}/2026-02-14-test-en.html`, content); - - const output = execSync(`node scripts/extract-vocabulary.ts --directory ${testDir}`, { - encoding: 'utf-8' - }); - - expect(output).toContain('"Committee": Committee'); - expect(output).toContain('"Document": Document'); - }); - - it('should extract labels in multiple languages', () => { - // German - writeFileSync(`${testDir}/test-de.html`, ` - - Ausschuss: Finanzausschuss - Dokument: Bericht 123 - - `); - - // French - writeFileSync(`${testDir}/test-fr.html`, ` - - Commission: Commission des finances - Document: Rapport 123 - - `); - - const output = execSync(`node scripts/extract-vocabulary.ts --directory ${testDir}`, { - encoding: 'utf-8' - }); - - expect(output).toContain('German (DE)'); - expect(output).toContain('"Committee": Ausschuss'); - expect(output).toContain('French (FR)'); - expect(output).toContain('"Committee": Commission'); - }); - - it('should extract What to Watch heading', () => { - const content = ` - -

    What to Watch This Week

    - Committee: Test - - `; - - writeFileSync(`${testDir}/test-en.html`, content); - - const output = execSync(`node scripts/extract-vocabulary.ts --directory ${testDir}`, { - encoding: 'utf-8' - }); - - expect(output).toContain('"What to Watch": What to Watch This Week'); - }); - - it('should extract h3 titles with nested span tags', () => { - const content = ` - -

    Bättre förutsättningar att sända ut statlig personal

    -

    Plain text title

    -

    Highlighted title

    - Committee: Test - - `; - - writeFileSync(`${testDir}/test-en.html`, content); - - const output = execSync(`node scripts/extract-vocabulary.ts --directory ${testDir}`, { - encoding: 'utf-8' - }); - - // Should extract clean text from all h3 tags - expect(output).toContain('Bättre förutsättningar att sända ut statlig personal'); - expect(output).toContain('Plain text title'); - expect(output).toContain('Highlighted title'); - }); - }); - - describe('CLI arguments', () => { - it('should filter by date prefix', () => { - writeFileSync(`${testDir}/2026-02-14-test-en.html`, '

    Feb 14

    '); - writeFileSync(`${testDir}/2026-03-01-test-en.html`, '

    March 1

    '); - - const output = execSync(`node scripts/extract-vocabulary.ts --directory ${testDir} --date-prefix 2026-02-`, { - encoding: 'utf-8' - }); - - expect(output).toContain('Filtering by date prefix: "2026-02-"'); - expect(output).toContain('Scanning 1 HTML files'); - }); - - it('should show help message', () => { - const output = execSync('node scripts/extract-vocabulary.ts --help', { - encoding: 'utf-8' - }); - - expect(output).toContain('Usage:'); - expect(output).toContain('--date-prefix'); - expect(output).toContain('--directory'); - expect(output).toContain('Examples:'); - }); - }); - - describe('Error handling', () => { - it('should report skipped files with reasons', () => { - // File without language code - writeFileSync(`${testDir}/invalid.html`, 'Test'); - - // File with invalid language code - writeFileSync(`${testDir}/test-xx.html`, 'Test'); - - const output = execSync(`node scripts/extract-vocabulary.ts --directory ${testDir}`, { - encoding: 'utf-8' - }); - - expect(output).toContain('⚠️ WARNING: Skipped Files Summary'); - expect(output).toContain('No language code in filename'); - expect(output).toContain('Unknown language code: xx'); - }); - - it('should handle read errors gracefully', () => { - // Create a file then make it unreadable (chmod doesn't work well in CI, so skip this test in CI) - if (process.env.CI) { - return; // Skip in CI - } - - writeFileSync(`${testDir}/test-en.html`, 'test'); - execSync(`chmod 000 ${testDir}/test-en.html`); - - const output = execSync(`node scripts/extract-vocabulary.ts --directory ${testDir}`, { - encoding: 'utf-8' - }); - - expect(output).toContain('⚠️ WARNING'); - - // Restore permissions - execSync(`chmod 644 ${testDir}/test-en.html`); - }); - }); - - describe('Article type detection', () => { - it('should detect committee reports', () => { - writeFileSync(`${testDir}/2026-02-14-committee-reports-en.html`, ` - -

    Committee Reports

    -

    Report Title 1

    - - `); - - const output = execSync(`node scripts/extract-vocabulary.ts --directory ${testDir}`, { - encoding: 'utf-8' - }); - - expect(output).toContain('Sample titles: Report Title 1'); - }); - - it('should detect multiple article types', () => { - writeFileSync(`${testDir}/committee-en.html`, '

    Committee Report

    '); - writeFileSync(`${testDir}/motion-en.html`, '

    Motion

    '); - writeFileSync(`${testDir}/proposition-en.html`, '

    Proposition

    '); - - const output = execSync(`node scripts/extract-vocabulary.ts --directory ${testDir}`, { - encoding: 'utf-8' - }); - - expect(output).toContain('English (EN)'); - expect(output).toContain('Samples analyzed: 3'); - }); - }); -}); diff --git a/tests/government-role-validator.test.ts b/tests/government-role-validator.test.ts deleted file mode 100644 index d37e965897..0000000000 --- a/tests/government-role-validator.test.ts +++ /dev/null @@ -1,151 +0,0 @@ -/** - * Unit Tests for Government Role Validator - * Tests validation of government role attributions against CIA data. - * - * ROOT CAUSE PREVENTION: Ensures that agentic workflows cannot hallucinate - * government titles (e.g. calling Lotta Edholm "Deputy Prime Minister" - * when she is Statsråd at Utbildningsdepartementet). - */ - -import { describe, it, expect, afterEach } from 'vitest'; -import { - loadGovernmentRoleMembers, - findRolesForPerson, - getCurrentRole, - validateGovernmentRole, - getFormattedRole, - clearCache, -} from '../scripts/government-role-validator.js'; -import { resolve } from 'node:path'; - -const REPO_ROOT = resolve(import.meta.dirname, '..'); - -afterEach(() => { - clearCache(); -}); - -describe('Government Role Validator', () => { - describe('loadGovernmentRoleMembers', () => { - it('should load government role members from CIA CSV', () => { - const members = loadGovernmentRoleMembers(REPO_ROOT); - expect(members.length).toBeGreaterThan(0); - // Check that records have expected fields - const first = members[0]; - expect(first).toHaveProperty('roleId'); - expect(first).toHaveProperty('department'); - expect(first).toHaveProperty('roleCode'); - expect(first).toHaveProperty('firstName'); - expect(first).toHaveProperty('lastName'); - expect(first).toHaveProperty('party'); - }); - - it('should gracefully return empty array if CSV not found', () => { - const members = loadGovernmentRoleMembers('/nonexistent/path'); - expect(members).toEqual([]); - }); - }); - - describe('findRolesForPerson', () => { - it('should find roles for Lotta Edholm', () => { - const roles = findRolesForPerson('Edholm', 'Lotta', REPO_ROOT); - expect(roles.length).toBeGreaterThan(0); - expect(roles[0].firstName).toBe('Lotta'); - expect(roles[0].lastName).toBe('Edholm'); - }); - - it('should find roles by last name only', () => { - const roles = findRolesForPerson('Strömmer', undefined, REPO_ROOT); - expect(roles.length).toBeGreaterThan(0); - }); - - it('should return empty for unknown person', () => { - const roles = findRolesForPerson('UnknownPerson123', undefined, REPO_ROOT); - expect(roles).toEqual([]); - }); - - it('should find multiple roles for Lotta Edholm and sort by most recent', () => { - const roles = findRolesForPerson('Edholm', 'Lotta', REPO_ROOT); - expect(roles.length).toBeGreaterThan(1); - // Verify sorted by most recent first - expect(roles[0].fromDate >= roles[1].fromDate).toBe(true); - }); - }); - - describe('getCurrentRole', () => { - it('should return a role for known politicians', () => { - const role = getCurrentRole('Edholm', 'Lotta', REPO_ROOT); - expect(role).toBeDefined(); - expect(role!.lastName).toBe('Edholm'); - }); - - it('should correctly parse quoted CSV fields with commas in role code', () => { - const role = getCurrentRole('Edholm', 'Lotta', REPO_ROOT); - expect(role).toBeDefined(); - // The active role has a comma in the quoted role_code field - expect(role!.roleCode).toContain('Gymnasie-'); - expect(role!.roleCode).toContain('forskningsminister'); - expect(role!.active).toBe(true); - expect(role!.party).toBe('L'); - }); - - it('should return undefined for unknown politicians', () => { - const role = getCurrentRole('Nobody123', 'Test', REPO_ROOT); - expect(role).toBeUndefined(); - }); - }); - - describe('validateGovernmentRole', () => { - it('should reject Lotta Edholm as Deputy Prime Minister', () => { - const result = validateGovernmentRole('Lotta Edholm', 'Deputy Prime Minister', REPO_ROOT); - expect(result.valid).toBe(false); - expect(result.suggestion).toContain('NOT Deputy Prime Minister'); - }); - - it('should reject Lotta Edholm as vice statsminister', () => { - const result = validateGovernmentRole('Lotta Edholm', 'vice statsminister', REPO_ROOT); - expect(result.valid).toBe(false); - }); - - it('should flag unknown persons', () => { - const result = validateGovernmentRole('Unknown Person', 'Minister', REPO_ROOT); - expect(result.valid).toBe(false); - expect(result.suggestion).toContain('No government role records found'); - }); - - it('should handle multi-language Deputy PM terms', () => { - // Japanese - const ja = validateGovernmentRole('Lotta Edholm', '副首相', REPO_ROOT); - expect(ja.valid).toBe(false); - - // Korean - const ko = validateGovernmentRole('Lotta Edholm', '부총리', REPO_ROOT); - expect(ko.valid).toBe(false); - - // Arabic - const ar = validateGovernmentRole('Lotta Edholm', 'نائبة رئيس الوزراء', REPO_ROOT); - expect(ar.valid).toBe(false); - }); - - it('should include correct actual role in rejection suggestion', () => { - const result = validateGovernmentRole('Lotta Edholm', 'Deputy Prime Minister', REPO_ROOT); - expect(result.valid).toBe(false); - expect(result.suggestion).toContain('Gymnasie-'); - expect(result.suggestion).toContain('Utbildningsdepartementet'); - expect(result.suggestion).toContain('(L)'); - }); - }); - - describe('getFormattedRole', () => { - it('should return formatted role for known politician', () => { - const formatted = getFormattedRole('Edholm', 'Lotta', REPO_ROOT); - expect(formatted).toBeDefined(); - expect(formatted).toContain('Edholm'); - expect(formatted).toContain('L'); - }); - - it('should return undefined for unknown politician', () => { - const formatted = getFormattedRole('Nobody123', 'Test', REPO_ROOT); - expect(formatted).toBeUndefined(); - }); - }); -}); diff --git a/tests/news-evening-analysis.test.ts b/tests/news-evening-analysis.test.ts index 84e21850b1..03e7a1723e 100644 --- a/tests/news-evening-analysis.test.ts +++ b/tests/news-evening-analysis.test.ts @@ -18,8 +18,29 @@ import { describe, it, expect, vi, afterEach } from 'vitest'; import fs from 'fs'; import path from 'path'; import { fileURLToPath } from 'url'; -import { extractPartyMentions } from '../scripts/party-variants.js'; import { detectArticleLanguage, getLocalizedHeading } from '../scripts/editorial-pillars.js'; + +/** Simple party mention extraction for test purposes. */ +const PARTY_NAMES: Record = { + S: ['Socialdemokraterna', 'S'], M: ['Moderaterna', 'M'], + SD: ['Sverigedemokraterna', 'SD'], V: ['Vänsterpartiet', 'V'], + MP: ['Miljöpartiet', 'MP'], C: ['Centerpartiet', 'C'], + L: ['Liberalerna', 'L'], KD: ['Kristdemokraterna', 'KD'], +}; +function extractPartyMentions(html: string | null | undefined): Set { + const parties = new Set(); + if (!html) return parties; + for (const [code, variants] of Object.entries(PARTY_NAMES)) { + for (const v of variants) { + const escaped = v.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + if (new RegExp(`(?:^|[^\\p{L}\\p{N}])${escaped}(?=$|[^\\p{L}\\p{N}])`, 'ui').test(html)) { + parties.add(code); + break; + } + } + } + return parties; +} import type { Language } from '../scripts/types/language.js'; const __filename = fileURLToPath(import.meta.url); diff --git a/tests/party-variants.test.ts b/tests/party-variants.test.ts deleted file mode 100644 index 4b7b5a8f3f..0000000000 --- a/tests/party-variants.test.ts +++ /dev/null @@ -1,424 +0,0 @@ -import { describe, it, expect, afterEach, vi } from 'vitest'; -import { PARTY_VARIANTS, extractPartyMentions } from '../scripts/party-variants.js'; -import type { PartyCode } from '../scripts/types/party.js'; - -describe('party-variants', () => { - afterEach(() => { - vi.clearAllMocks(); - }); - - describe('PARTY_VARIANTS', () => { - it('should have all 8 Swedish parliamentary parties', () => { - const expectedParties: string[] = ['S', 'M', 'SD', 'V', 'MP', 'C', 'L', 'KD']; - const actualParties: string[] = Object.keys(PARTY_VARIANTS); - - expect(actualParties.sort()).toEqual(expectedParties.sort()); - }); - - it('should have canonical code and variants for each party', () => { - (Object.entries(PARTY_VARIANTS) as Array<[PartyCode, readonly string[]]>).forEach(([code, variants]) => { - expect(Array.isArray(variants)).toBe(true); - expect(variants.length).toBeGreaterThan(0); - // Code should be included in variants - expect(variants).toContain(code); - }); - }); - - it('should have correct Socialdemokraterna variants', () => { - expect(PARTY_VARIANTS.S).toEqual(['Socialdemokraterna', 'S']); - }); - - it('should have correct Moderaterna variants', () => { - expect(PARTY_VARIANTS.M).toEqual(['Moderaterna', 'M']); - }); - - it('should have correct Sverigedemokraterna variants', () => { - expect(PARTY_VARIANTS.SD).toEqual(['Sverigedemokraterna', 'SD']); - }); - - it('should have correct Vänsterpartiet variants', () => { - expect(PARTY_VARIANTS.V).toEqual(['Vänsterpartiet', 'V']); - }); - - it('should have correct Miljöpartiet variants', () => { - expect(PARTY_VARIANTS.MP).toEqual(['Miljöpartiet', 'MP']); - }); - - it('should have correct Centerpartiet variants', () => { - expect(PARTY_VARIANTS.C).toEqual(['Centerpartiet', 'C']); - }); - - it('should have correct Liberalerna variants', () => { - expect(PARTY_VARIANTS.L).toEqual(['Liberalerna', 'L']); - }); - - it('should have correct Kristdemokraterna variants', () => { - expect(PARTY_VARIANTS.KD).toEqual(['Kristdemokraterna', 'KD']); - }); - - it('should have unique variants within each party', () => { - (Object.entries(PARTY_VARIANTS) as Array<[PartyCode, readonly string[]]>).forEach(([_code, variants]) => { - const uniqueVariants: readonly string[] = [...new Set(variants)]; - expect(variants.length).toBe(uniqueVariants.length); - }); - }); - }); - - describe('extractPartyMentions', () => { - describe('Single party detection', () => { - it('should detect Socialdemokraterna by full name', () => { - const html = '

    Socialdemokraterna presenterade sitt förslag.

    '; - const parties: Set = extractPartyMentions(html); - - expect(parties.size).toBe(1); - expect(parties.has('S')).toBe(true); - }); - - it('should detect Socialdemokraterna by abbreviation', () => { - const html = '

    S presenterade sitt förslag.

    '; - const parties: Set = extractPartyMentions(html); - - expect(parties.size).toBe(1); - expect(parties.has('S')).toBe(true); - }); - - it('should detect Moderaterna by full name', () => { - const html = '

    Moderaterna röstade mot förslaget.

    '; - const parties: Set = extractPartyMentions(html); - - expect(parties.size).toBe(1); - expect(parties.has('M')).toBe(true); - }); - - it('should detect Sverigedemokraterna', () => { - const html = '

    Sverigedemokraterna lade fram ett ändringsförslag.

    '; - const parties: Set = extractPartyMentions(html); - - expect(parties.size).toBe(1); - expect(parties.has('SD')).toBe(true); - }); - - it('should detect Vänsterpartiet', () => { - const html = '

    Vänsterpartiet kritiserade regeringen.

    '; - const parties: Set = extractPartyMentions(html); - - expect(parties.size).toBe(1); - expect(parties.has('V')).toBe(true); - }); - - it('should detect Miljöpartiet', () => { - const html = '

    Miljöpartiet betonade klimatfrågan.

    '; - const parties: Set = extractPartyMentions(html); - - expect(parties.size).toBe(1); - expect(parties.has('MP')).toBe(true); - }); - - it('should detect MP abbreviation after HTML tag', () => { - const html = '

    MP röstade för förslaget.

    '; - const parties: Set = extractPartyMentions(html); - - expect(parties.size).toBe(1); - expect(parties.has('MP')).toBe(true); - }); - - it('should detect SD abbreviation after HTML tag', () => { - const html = '

    SD lade fram ett ändringsförslag.

    '; - const parties: Set = extractPartyMentions(html); - - expect(parties.size).toBe(1); - expect(parties.has('SD')).toBe(true); - }); - - it('should detect KD abbreviation after HTML tag', () => { - const html = '

    KD lade fram ett alternativ.

    '; - const parties: Set = extractPartyMentions(html); - - expect(parties.size).toBe(1); - expect(parties.has('KD')).toBe(true); - }); - - it('should detect Centerpartiet', () => { - const html = '

    Centerpartiet föreslog en kompromiss.

    '; - const parties: Set = extractPartyMentions(html); - - expect(parties.size).toBe(1); - expect(parties.has('C')).toBe(true); - }); - - it('should detect Liberalerna', () => { - const html = '

    Liberalerna stödde förslaget.

    '; - const parties: Set = extractPartyMentions(html); - - expect(parties.size).toBe(1); - expect(parties.has('L')).toBe(true); - }); - - it('should detect Kristdemokraterna', () => { - const html = '

    Kristdemokraterna lade fram ett alternativ.

    '; - const parties: Set = extractPartyMentions(html); - - expect(parties.size).toBe(1); - expect(parties.has('KD')).toBe(true); - }); - }); - - describe('Multiple parties detection', () => { - it('should detect two parties', () => { - const html = '

    Socialdemokraterna och Moderaterna enades om budgeten.

    '; - const parties: Set = extractPartyMentions(html); - - expect(parties.size).toBe(2); - expect(parties.has('S')).toBe(true); - expect(parties.has('M')).toBe(true); - }); - - it('should detect three parties', () => { - const html = '

    S, M och SD röstade för förslaget.

    '; - const parties: Set = extractPartyMentions(html); - - expect(parties.size).toBe(3); - expect(parties.has('S')).toBe(true); - expect(parties.has('M')).toBe(true); - expect(parties.has('SD')).toBe(true); - }); - - it('should detect all eight parties', () => { - const html = ` -

    Socialdemokraterna, Moderaterna, Sverigedemokraterna, Vänsterpartiet, - Miljöpartiet, Centerpartiet, Liberalerna och Kristdemokraterna deltog i debatten.

    - `; - const parties: Set = extractPartyMentions(html); - - expect(parties.size).toBe(8); - expect(parties.has('S')).toBe(true); - expect(parties.has('M')).toBe(true); - expect(parties.has('SD')).toBe(true); - expect(parties.has('V')).toBe(true); - expect(parties.has('MP')).toBe(true); - expect(parties.has('C')).toBe(true); - expect(parties.has('L')).toBe(true); - expect(parties.has('KD')).toBe(true); - }); - - it('should detect mix of full names and abbreviations', () => { - const html = '

    Socialdemokraterna och M enades. SD och Vänsterpartiet var emot.

    '; - const parties: Set = extractPartyMentions(html); - - expect(parties.size).toBe(4); - expect(parties.has('S')).toBe(true); - expect(parties.has('M')).toBe(true); - expect(parties.has('SD')).toBe(true); - expect(parties.has('V')).toBe(true); - }); - }); - - describe('No double-counting', () => { - it('should count party once when both full name and abbreviation appear', () => { - const html = '

    Socialdemokraterna (S) röstade för förslaget.

    '; - const parties: Set = extractPartyMentions(html); - - expect(parties.size).toBe(1); - expect(parties.has('S')).toBe(true); - }); - - it('should count party once when full name appears multiple times', () => { - const html = '

    Socialdemokraterna kritiserade förslaget. Socialdemokraterna föreslog ett alternativ.

    '; - const parties: Set = extractPartyMentions(html); - - expect(parties.size).toBe(1); - expect(parties.has('S')).toBe(true); - }); - - it('should count party once when abbreviation appears multiple times', () => { - const html = '

    M röstade för. M stödde också ändringsförslaget.

    '; - const parties: Set = extractPartyMentions(html); - - expect(parties.size).toBe(1); - expect(parties.has('M')).toBe(true); - }); - - it('should prevent double-counting in complex scenario', () => { - const html = ` -

    Socialdemokraterna (S) och Moderaterna enades. - S bekräftade sitt stöd. Moderaterna (M) röstade också för.

    - `; - const parties: Set = extractPartyMentions(html); - - expect(parties.size).toBe(2); - expect(parties.has('S')).toBe(true); - expect(parties.has('M')).toBe(true); - }); - }); - - describe('Case-insensitive matching', () => { - it('should detect lowercase party names', () => { - const html = '

    socialdemokraterna röstade för förslaget.

    '; - const parties: Set = extractPartyMentions(html); - - expect(parties.size).toBe(1); - expect(parties.has('S')).toBe(true); - }); - - it('should detect uppercase party abbreviations', () => { - const html = '

    S RÖSTADE FÖR FÖRSLAGET.

    '; - const parties: Set = extractPartyMentions(html); - - expect(parties.size).toBe(1); - expect(parties.has('S')).toBe(true); - }); - - it('should detect mixed case party names', () => { - const html = '

    SoCiAlDeMoKrAtErNa och MoDeRaTErNa röstade för.

    '; - const parties: Set = extractPartyMentions(html); - - expect(parties.size).toBe(2); - expect(parties.has('S')).toBe(true); - expect(parties.has('M')).toBe(true); - }); - }); - - describe('Word boundary matching', () => { - it('should match full word "S" not partial "SD"', () => { - const html = '

    S röstade för men SD röstade emot.

    '; - const parties: Set = extractPartyMentions(html); - - expect(parties.size).toBe(2); - expect(parties.has('S')).toBe(true); - expect(parties.has('SD')).toBe(true); - }); - - it('should match full word "M" not partial "MP"', () => { - const html = '

    M och MP hade olika åsikter.

    '; - const parties: Set = extractPartyMentions(html); - - expect(parties.size).toBe(2); - expect(parties.has('M')).toBe(true); - expect(parties.has('MP')).toBe(true); - }); - - it('should not match party name as substring of another word', () => { - const html = '

    Demokratisering av samhället diskuterades.

    '; - const parties: Set = extractPartyMentions(html); - - // Should not match "demokrat" from Kristdemokraterna or Socialdemokraterna - expect(parties.size).toBe(0); - }); - - it('should match party name with punctuation', () => { - const html = '

    Socialdemokraterna, Moderaterna och Centerpartiet.

    '; - const parties: Set = extractPartyMentions(html); - - expect(parties.size).toBe(3); - expect(parties.has('S')).toBe(true); - expect(parties.has('M')).toBe(true); - expect(parties.has('C')).toBe(true); - }); - - it('should match party name at start of sentence', () => { - const html = '

    Socialdemokraterna röstade för.

    '; - const parties: Set = extractPartyMentions(html); - - expect(parties.size).toBe(1); - expect(parties.has('S')).toBe(true); - }); - - it('should match party name at end of sentence', () => { - const html = '

    Förslaget stöddes av Socialdemokraterna.

    '; - const parties: Set = extractPartyMentions(html); - - expect(parties.size).toBe(1); - expect(parties.has('S')).toBe(true); - }); - }); - - describe('Edge cases', () => { - it('should return empty Set for empty string', () => { - const parties: Set = extractPartyMentions(''); - - expect(parties.size).toBe(0); - expect(parties instanceof Set).toBe(true); - }); - - it('should return empty Set for null input', () => { - const parties: Set = extractPartyMentions(null); - - expect(parties.size).toBe(0); - expect(parties instanceof Set).toBe(true); - }); - - it('should return empty Set for undefined input', () => { - const parties: Set = extractPartyMentions(undefined); - - expect(parties.size).toBe(0); - expect(parties instanceof Set).toBe(true); - }); - - it('should return empty Set for text without party mentions', () => { - const html = '

    Detta är en artikel om något helt annat.

    '; - const parties: Set = extractPartyMentions(html); - - expect(parties.size).toBe(0); - }); - - it('should handle HTML with tags and attributes', () => { - const html = '

    Socialdemokraterna och Moderaterna enades.

    '; - const parties: Set = extractPartyMentions(html); - - expect(parties.size).toBe(2); - expect(parties.has('S')).toBe(true); - expect(parties.has('M')).toBe(true); - }); - - it('should handle HTML entities', () => { - const html = '

    Socialdemokraterna & Moderaterna

    '; - const parties: Set = extractPartyMentions(html); - - expect(parties.size).toBe(2); - expect(parties.has('S')).toBe(true); - expect(parties.has('M')).toBe(true); - }); - - it('should handle newlines and multiple spaces', () => { - const html = ` -

    Socialdemokraterna - - och Moderaterna

    - `; - const parties: Set = extractPartyMentions(html); - - expect(parties.size).toBe(2); - expect(parties.has('S')).toBe(true); - expect(parties.has('M')).toBe(true); - }); - }); - - describe('Returns Set', () => { - it('should return a Set instance', () => { - const html = '

    Socialdemokraterna röstade för.

    '; - const parties: Set = extractPartyMentions(html); - - expect(parties instanceof Set).toBe(true); - }); - - it('should return canonical codes not variants', () => { - const html = '

    Socialdemokraterna (S) röstade för.

    '; - const parties: Set = extractPartyMentions(html); - - expect(parties.has('S')).toBe(true); - expect(parties.has('Socialdemokraterna' as PartyCode)).toBe(false); - }); - - it('should be iterable', () => { - const html = '

    S, M och SD röstade för.

    '; - const parties: Set = extractPartyMentions(html); - const partyArray: PartyCode[] = Array.from(parties); - - expect(partyArray.length).toBe(3); - expect(partyArray).toContain('S'); - expect(partyArray).toContain('M'); - expect(partyArray).toContain('SD'); - }); - }); - }); -}); diff --git a/tests/pipeline/news-generation-e2e.test.ts b/tests/pipeline/news-generation-e2e.test.ts deleted file mode 100644 index a689d79f9c..0000000000 --- a/tests/pipeline/news-generation-e2e.test.ts +++ /dev/null @@ -1,781 +0,0 @@ -/** - * End-to-End Integration Tests for News Article Generation Pipeline - * - * Validates the full flow from MCP data fetching mock → content generation → - * template rendering → HTML validation → quality checks for all 8 article types. - * - * Coverage: - * - All 8 article type generators (week-ahead, month-ahead, weekly-review, - * monthly-review, committee-reports, propositions, motions, breaking-news) - * - All 14 language variants via generateArticleHTML - * - Schema.org JSON-LD structural validation - * - Hreflang tag consistency - * - Edge cases: empty data, RTL languages, long content - * - * @author Hack23 AB - * @license Apache-2.0 - */ - -import { describe, it, expect, vi, beforeAll, beforeEach, afterEach } from 'vitest'; -import { hreflangCode } from '../../scripts/article-template/helpers.js'; -import type { Language } from '../../scripts/types/language.js'; -import type { - ArticleData, - ArticleCategory, - GeneratedArticle, - GenerationResult, - BreakingEventData, -} from '../../scripts/types/article.js'; -import type { MCPClientConfig } from '../../scripts/types/mcp.js'; - -// --------------------------------------------------------------------------- -// Types for dynamically-imported modules -// --------------------------------------------------------------------------- - -interface ArticleTemplateModule { - readonly generateArticleHTML: (data: ArticleData) => string; -} - -interface PipelineValidationModule { - readonly validateArticleHTML: ( - html: string, - opts?: Record, - ) => { passed: boolean; errors: string[]; passedChecks: string[]; warnings: string[] }; - readonly validateArticleBatch: ( - articles: ReadonlyArray<{ filename: string; html: string }>, - opts?: Record, - ) => Array<{ filename: string; passed: boolean; errors: string[]; passedChecks: string[]; warnings: string[] }>; -} - -interface WeekAheadModule { - generateWeekAhead: (opts?: { languages?: Language[] }) => Promise; -} -interface MonthAheadModule { - generateMonthAhead: (opts?: { languages?: Language[] }) => Promise; -} -interface WeeklyReviewModule { - generateWeeklyReview: (opts?: { languages?: Language[]; lookbackDays?: number }) => Promise; -} -interface MonthlyReviewModule { - generateMonthlyReview: (opts?: { languages?: Language[] }) => Promise; -} -interface CommitteeReportsModule { - generateCommitteeReports: (opts?: { languages?: Language[] }) => Promise; -} -interface PropositionsModule { - generatePropositions: (opts?: { languages?: Language[] }) => Promise; -} -interface MotionsModule { - generateMotions: (opts?: { languages?: Language[] }) => Promise; -} -interface BreakingNewsModule { - generateBreakingNews: (opts?: { - languages?: Language[]; - eventContext?: string; - eventData?: BreakingEventData | null; - }) => Promise; -} - -// --------------------------------------------------------------------------- -// Comprehensive MCP client mock (vi.hoisted — available before imports) -// --------------------------------------------------------------------------- - -const { mockClientInstance, MockMCPClient } = vi.hoisted(() => { - const mockCalendarEvents = [ - { id: 'ev1', rubrik: 'Budget committee meeting', datum: '2026-03-02', tid: '10:00', type: 'committee', organ: 'FiU' }, - { id: 'ev2', rubrik: 'Chamber debate on defence', datum: '2026-03-03', tid: '14:00', type: 'chamber', organ: 'Kammaren' }, - { id: 'ev3', rubrik: 'EU affairs committee', datum: '2026-03-04', tid: '09:00', type: 'committee', organ: 'EUN' }, - ]; - - const mockCommitteeReports = [ - { id: 'bet1', title: 'Defence appropriations 2026', organ: 'FöU', rm: '2025/26', dok_id: 'FöU3' }, - { id: 'bet2', title: 'Social insurance reform', organ: 'SfU', rm: '2025/26', dok_id: 'SfU5' }, - ]; - - const mockPropositions = [ - { id: 'prop1', title: 'Prop. 2025/26:45 — Climate action plan', rm: '2025/26' }, - { id: 'prop2', title: 'Prop. 2025/26:67 — Defence funding increase', rm: '2025/26' }, - ]; - - const mockMotions = [ - { id: 'mot1', title: 'Mot. 2025/26:123 — Lower income tax', rm: '2025/26', parti: 'M' }, - { id: 'mot2', title: 'Mot. 2025/26:456 — Climate target revision', rm: '2025/26', parti: 'MP' }, - ]; - - const mockDocuments = [ - { id: 'doc1', title: 'Annual budget review', date: '2026-02-20', type: 'betankande' }, - { id: 'doc2', title: 'Defence white paper', date: '2026-02-18', type: 'prop' }, - ]; - - interface MockMCPClientInstance { - fetchCalendarEvents: ReturnType; - fetchCommitteeReports: ReturnType; - fetchPropositions: ReturnType; - fetchMotions: ReturnType; - searchDocuments: ReturnType; - searchSpeeches: ReturnType; - fetchWrittenQuestions: ReturnType; - fetchInterpellations: ReturnType; - fetchVotingRecords: ReturnType; - fetchVotingGroup: ReturnType; - fetchMPs: ReturnType; - fetchDocumentDetails: ReturnType; - enrichDocumentsWithContent: ReturnType; - request: ReturnType; - timeout: number; - baseURL: string; - } - - const mockClientInstance: MockMCPClientInstance = { - fetchCalendarEvents: vi.fn().mockResolvedValue(mockCalendarEvents), - fetchCommitteeReports: vi.fn().mockResolvedValue(mockCommitteeReports), - fetchPropositions: vi.fn().mockResolvedValue(mockPropositions), - fetchMotions: vi.fn().mockResolvedValue(mockMotions), - searchDocuments: vi.fn().mockResolvedValue(mockDocuments), - searchSpeeches: vi.fn().mockResolvedValue([]), - fetchWrittenQuestions: vi.fn().mockResolvedValue([]), - fetchInterpellations: vi.fn().mockResolvedValue([]), - fetchVotingRecords: vi.fn().mockResolvedValue([]), - fetchVotingGroup: vi.fn().mockResolvedValue([]), - fetchMPs: vi.fn().mockResolvedValue([]), - fetchDocumentDetails: vi.fn().mockResolvedValue({ - summary: 'Document summary text.', - fullText: 'Full detailed text of the document.', - }), - enrichDocumentsWithContent: vi.fn().mockImplementation(async (docs: unknown[]) => docs), - request: vi.fn().mockResolvedValue({ last_sync: '2026-03-01T00:00:00Z' }), - timeout: 30000, - baseURL: 'https://riksdag-regering-ai.onrender.com/mcp', - }; - - function MockMCPClient(config?: MCPClientConfig): MockMCPClientInstance { - if (config?.timeout) mockClientInstance.timeout = config.timeout; - return mockClientInstance; - } - - return { mockClientInstance, MockMCPClient }; -}); - -vi.mock('../../scripts/mcp-client.js', () => ({ - MCPClient: MockMCPClient, - getDefaultClient: () => mockClientInstance, -})); - -// --------------------------------------------------------------------------- -// Module holders — populated in beforeAll -// --------------------------------------------------------------------------- - -let articleTemplate: ArticleTemplateModule; -let pipelineValidation: PipelineValidationModule; -let weekAheadMod: WeekAheadModule; -let monthAheadMod: MonthAheadModule; -let weeklyReviewMod: WeeklyReviewModule; -let monthlyReviewMod: MonthlyReviewModule; -let committeeReportsMod: CommitteeReportsModule; -let propositionsMod: PropositionsModule; -let motionsMod: MotionsModule; -let breakingNewsMod: BreakingNewsModule; - -beforeAll(async () => { - [ - articleTemplate, - pipelineValidation, - weekAheadMod, - monthAheadMod, - weeklyReviewMod, - monthlyReviewMod, - committeeReportsMod, - propositionsMod, - motionsMod, - breakingNewsMod, - ] = await Promise.all([ - import('../../scripts/article-template.js') as Promise, - import('../../scripts/pipeline/validation.js') as Promise, - import('../../scripts/news-types/week-ahead.js') as Promise, - import('../../scripts/news-types/month-ahead.js') as Promise, - import('../../scripts/news-types/weekly-review.js') as Promise, - import('../../scripts/news-types/monthly-review.js') as Promise, - import('../../scripts/news-types/committee-reports.js') as Promise, - import('../../scripts/news-types/propositions.js') as Promise, - import('../../scripts/news-types/motions.js') as Promise, - import('../../scripts/news-types/breaking-news.js') as Promise, - ]); -}); - -beforeEach(() => { - // Re-initialize mock return values since mockReset:true clears implementations - mockClientInstance.fetchCalendarEvents.mockResolvedValue([ - { id: 'ev1', rubrik: 'Budget committee meeting', datum: '2026-03-02', tid: '10:00', type: 'committee', organ: 'FiU' }, - { id: 'ev2', rubrik: 'Chamber debate on defence', datum: '2026-03-03', tid: '14:00', type: 'chamber', organ: 'Kammaren' }, - ]); - mockClientInstance.fetchCommitteeReports.mockResolvedValue([ - { id: 'bet1', title: 'Defence appropriations 2026', organ: 'FöU', rm: '2025/26', dok_id: 'FöU3' }, - ]); - mockClientInstance.fetchPropositions.mockResolvedValue([ - { id: 'prop1', title: 'Prop. 2025/26:45 — Climate action plan', rm: '2025/26' }, - ]); - mockClientInstance.fetchMotions.mockResolvedValue([ - { id: 'mot1', title: 'Mot. 2025/26:123 — Lower income tax', rm: '2025/26', parti: 'M' }, - ]); - mockClientInstance.searchDocuments.mockResolvedValue([ - { id: 'doc1', title: 'Annual budget review', date: '2026-02-20', type: 'betankande' }, - ]); - mockClientInstance.searchSpeeches.mockResolvedValue([]); - mockClientInstance.fetchWrittenQuestions.mockResolvedValue([]); - mockClientInstance.fetchInterpellations.mockResolvedValue([]); - mockClientInstance.fetchVotingRecords.mockResolvedValue([]); - mockClientInstance.fetchVotingGroup.mockResolvedValue([]); - mockClientInstance.fetchMPs.mockResolvedValue([]); - mockClientInstance.fetchDocumentDetails.mockResolvedValue({ - summary: 'Document summary text.', - fullText: 'Full detailed text of the document.', - }); - mockClientInstance.enrichDocumentsWithContent.mockImplementation(async (docs: unknown[]) => docs); - mockClientInstance.request.mockResolvedValue({ last_sync: '2026-03-01T00:00:00Z' }); -}); - -afterEach(() => { - vi.clearAllMocks(); -}); - -// --------------------------------------------------------------------------- -// Helper: build a minimal ArticleData for direct template tests -// --------------------------------------------------------------------------- - -function makeArticleData( - lang: Language, - type: ArticleCategory = 'prospective', - overrides: Partial = {}, -): ArticleData { - const date = '2026-03-01'; - const typeSlug = type === 'prospective' ? 'week-ahead' - : type === 'retrospective' ? 'weekly-review' - : type === 'analysis' ? 'committee-reports' - : 'breaking-news'; - - return { - slug: `${date}-${typeSlug}-${lang}.html`, - title: `Test Article — ${lang.toUpperCase()}`, - subtitle: 'A comprehensive analysis of Swedish parliamentary affairs this week.', - date, - type, - lang, - readTime: '5 min read', - content: [ - '

    Overview

    ', - '

    The Swedish parliament discussed several important matters this week including budget allocation, defence policy, and climate legislation.

    ', - '

    Key Developments

    ', - '

    The Finance Committee reviewed proposals affecting over 200 000 Swedish citizens. Several parties expressed concern about the timeline.

    ', - '

    Analysis

    ', - '

    Political analysts predict a tight vote on the budget proposals. Coalition dynamics remain complex with three parties holding the balance of power.

    ', - ].join('\n'), - sources: ['riksdag-regering-mcp', 'Riksdagen calendar', 'SCB statistics'], - keywords: ['parliament', 'riksdag', 'budget', 'sweden', 'politics'], - tags: ['Budget', 'Parliament', 'Sweden'], - events: [], - watchPoints: [], - ...overrides, - }; -} - -// --------------------------------------------------------------------------- -// Helper: extract JSON-LD blocks from HTML -// --------------------------------------------------------------------------- - -function extractJsonLdBlocks(html: string): unknown[] { - const blocks: unknown[] = []; - const regex = / - - -
    -

    Short Article

    -

    Intro Section

    -

    Only a few visible words here.

    -
    -

    Data Sources: riksdag-regering-mcp

    -
    -
    - -`; - const result = validateArticleHTML(htmlWithHeadAndScriptWords, { minWordCount: 50 }); - expect(result.passed).toBe(false); - expect(result.errors.some(e => /word count/i.test(e))).toBe(true); - }); -}); - -describe('validateArticleHTML — sources attribution', () => { - it('fails when sources attribution block is missing', () => { - const html = makeValidHTML().replace(/
    .*?<\/div>/s, ''); - const result = validateArticleHTML(html, { requireSources: true }); - expect(result.passed).toBe(false); - expect(result.errors.some(e => /sources/i.test(e))).toBe(true); - }); - - it('passes when sources attribution block is present', () => { - const result = validateArticleHTML(makeValidHTML(), { requireSources: true }); - expect(result.passed).toBe(true); - expect(result.errors.some(e => /sources/i.test(e))).toBe(false); - }); -}); - -describe('validateArticleHTML — empty input', () => { - it('fails gracefully for empty string', () => { - const result = validateArticleHTML(''); - expect(result.passed).toBe(false); - expect(result.errors.length).toBeGreaterThan(0); - }); -}); - -describe('validateArticleHTML — custom options', () => { - it('skips H1 check when requireH1=false', () => { - const html = makeValidHTML().replace(/

    .*?<\/h1>/s, ''); - const result = validateArticleHTML(html, { requireH1: false }); - expect(result.errors.some(e => /h1/i.test(e))).toBe(false); - }); - - it('allows lower minWordCount threshold', () => { - const thinHTML = ` - -T -

    Title here

    Section

    Short but sufficient content for low threshold.

    riksdag-regering-mcp
    -`; - const result = validateArticleHTML(thinHTML, { minWordCount: 5 }); - expect(result.passed).toBe(true); - }); -}); - -// --------------------------------------------------------------------------- -// validateArticleBatch -// --------------------------------------------------------------------------- - -describe('validateArticleBatch', () => { - it('returns one result per article', () => { - const articles = [ - { filename: 'article-en.html', html: makeValidHTML('en') }, - { filename: 'article-sv.html', html: makeValidHTML('sv') }, - ]; - const results = validateArticleBatch(articles); - expect(results).toHaveLength(2); - expect(results[0]?.filename).toBe('article-en.html'); - expect(results[1]?.filename).toBe('article-sv.html'); - }); - - it('reports each article pass/fail independently', () => { - const badHTML = '

    x

    '; // missing DOCTYPE, lang, h2, word count - const articles = [ - { filename: 'good.html', html: makeValidHTML('en') }, - { filename: 'bad.html', html: badHTML }, - ]; - const results = validateArticleBatch(articles); - expect(results[0]?.passed).toBe(true); - expect(results[1]?.passed).toBe(false); - }); - - it('handles empty batch', () => { - const results = validateArticleBatch([]); - expect(results).toHaveLength(0); - }); -}); diff --git a/tests/pipeline/template-sections.test.ts b/tests/template-sections.test.ts similarity index 98% rename from tests/pipeline/template-sections.test.ts rename to tests/template-sections.test.ts index cdf909501f..c4ad376d5b 100644 --- a/tests/pipeline/template-sections.test.ts +++ b/tests/template-sections.test.ts @@ -7,8 +7,8 @@ */ import { describe, it, expect } from 'vitest'; -import { generateArticleHTML } from '../../scripts/article-template.js'; -import type { ArticleData, ArticleCategory, TemplateSection } from '../../scripts/types/article.js'; +import { generateArticleHTML } from '../scripts/article-template.js'; +import type { ArticleData, ArticleCategory, TemplateSection } from '../scripts/types/article.js'; // --------------------------------------------------------------------------- // Shared minimal article data diff --git a/tests/workflow-state-coordinator.test.ts b/tests/workflow-state-coordinator.test.ts deleted file mode 100644 index 2d0c169c3c..0000000000 --- a/tests/workflow-state-coordinator.test.ts +++ /dev/null @@ -1,1136 +0,0 @@ -/** - * Unit Tests for Workflow State Coordination - * Tests MCP caching, deduplication, workflow coordination, - * file locks, Jaccard similarity, atomic writes, and adaptive TTL - */ - -import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; -import fs from 'fs'; -import path from 'path'; -import { fileURLToPath } from 'url'; -import { - WorkflowStateCoordinator, - WorkflowLockManager, - SIMILARITY_THRESHOLD, - TOPIC_JACCARD_THRESHOLD, - LOCK_TIMEOUT_MS, - MCP_CACHE_TTL_SECONDS, - MCP_CACHE_TTL_NON_PLENARY_SECONDS, - jaccardTopicSimilarity, - getAdaptiveCacheTTL, -} from '../scripts/workflow-state-coordinator.js'; -import type { RecentArticleEntry, DuplicateCheckResult } from '../scripts/types/workflow.js'; - -const __filename = fileURLToPath(import.meta.url); -const __dirname = path.dirname(__filename); -const TEST_STATE_FILE = path.join(__dirname, 'fixtures', 'test-workflow-state.json'); -const TEST_LOCK_DIR = path.join(__dirname, 'fixtures', 'test-locks'); - -/** Input shape for addRecentArticle */ -interface RecentArticleInput { - slug: string; - workflow: string; - title: string; - topics: string[]; - mcpQueries: string[]; - timestamp?: string; -} - -describe('Workflow State Coordinator', () => { - let coordinator: InstanceType; - - beforeEach(() => { - // Use test-specific state file - coordinator = new WorkflowStateCoordinator(TEST_STATE_FILE); - - // Clean up test file if exists - if (fs.existsSync(TEST_STATE_FILE)) { - fs.unlinkSync(TEST_STATE_FILE); - } - }); - - afterEach(() => { - // Clean up test file - if (fs.existsSync(TEST_STATE_FILE)) { - fs.unlinkSync(TEST_STATE_FILE); - } - // Clean up any leftover temp files - const dir = path.dirname(TEST_STATE_FILE); - if (fs.existsSync(dir)) { - for (const f of fs.readdirSync(dir)) { - if (f.startsWith('test-workflow-state.json.tmp.')) { - fs.unlinkSync(path.join(dir, f)); - } - } - } - vi.clearAllMocks(); - }); - - describe('State Management', () => { - it('should initialize with empty state', async () => { - await coordinator.load(); - - expect((coordinator as any).state.recentArticles).toEqual([]); - expect((coordinator as any).state.mcpQueryCache).toEqual({}); - expect((coordinator as any).state.workflows).toEqual({}); - }); - - it('should save and load state', async () => { - (coordinator as any).state.recentArticles = [ - { slug: 'test-article-en.html', timestamp: new Date().toISOString(), workflow: 'test' } as RecentArticleEntry - ]; - - await coordinator.save(); - expect(fs.existsSync(TEST_STATE_FILE)).toBe(true); - - // Create new coordinator and load - const coordinator2 = new WorkflowStateCoordinator(TEST_STATE_FILE); - await coordinator2.load(); - - expect((coordinator2 as any).state.recentArticles).toHaveLength(1); - expect((coordinator2 as any).state.recentArticles[0].slug).toBe('test-article-en.html'); - }); - - it('should normalize legacy/partial state files on load', async () => { - // Write a minimal legacy state file missing workflows, mcpQueryCache, recentArticles - fs.writeFileSync( - TEST_STATE_FILE, - JSON.stringify({ lastUpdate: '2025-01-01T00:00:00.000Z' }), - ); - - const legacyCoordinator = new WorkflowStateCoordinator(TEST_STATE_FILE); - await legacyCoordinator.load(); - - const state = (legacyCoordinator as any).state; - expect(state.lastUpdate).toBe('2025-01-01T00:00:00.000Z'); - expect(Array.isArray(state.recentArticles)).toBe(true); - expect(state.recentArticles).toHaveLength(0); - expect(typeof state.mcpQueryCache).toBe('object'); - expect(Array.isArray(state.mcpQueryCache)).toBe(false); - expect(Object.keys(state.mcpQueryCache)).toHaveLength(0); - expect(typeof state.workflows).toBe('object'); - expect(Array.isArray(state.workflows)).toBe(false); - expect(Object.keys(state.workflows)).toHaveLength(0); - expect(Array.isArray(state.activeGenerations)).toBe(true); - }); - - it('should create metadata directory if missing', async () => { - const dir = path.dirname(TEST_STATE_FILE); - - // Ensure any existing test file is removed - if (fs.existsSync(TEST_STATE_FILE)) { - fs.unlinkSync(TEST_STATE_FILE); - } - - // Remove the directory to simulate a missing metadata directory - if (fs.existsSync(dir)) { - fs.rmSync(dir, { recursive: true }); - } - - expect(fs.existsSync(dir)).toBe(false); - - await coordinator.save(); - - // save() should recreate the directory and state file - expect(fs.existsSync(dir)).toBe(true); - expect(fs.existsSync(TEST_STATE_FILE)).toBe(true); - }); - - it('should set lastUpdate timestamp on save', async () => { - const before = new Date().toISOString(); - await coordinator.save(); - const after = new Date().toISOString(); - - expect((coordinator as any).state.lastUpdate).toBeDefined(); - expect((coordinator as any).state.lastUpdate! >= before).toBe(true); - expect((coordinator as any).state.lastUpdate! <= after).toBe(true); - }); - - it('should use atomic write (write-to-tmp + rename)', async () => { - const writeFileSyncSpy = vi.spyOn(fs, 'writeFileSync'); - const renameSyncSpy = vi.spyOn(fs, 'renameSync'); - - await coordinator.save(); - - // Verify writeFileSync was called with a tmp path - const tmpWriteCall = writeFileSyncSpy.mock.calls.find( - (call) => typeof call[0] === 'string' && (call[0] as string).includes('.tmp.'), - ); - expect(tmpWriteCall).toBeDefined(); - const tmpPath = tmpWriteCall![0] as string; - expect(tmpPath).toMatch(/\.tmp\.\d+$/); - - // Verify renameSync was called to move tmp → final state path - const renameCall = renameSyncSpy.mock.calls.find( - (call) => call[0] === tmpPath && call[1] === TEST_STATE_FILE, - ); - expect(renameCall).toBeDefined(); - - // Verify final state file is valid JSON - const content = fs.readFileSync(TEST_STATE_FILE, 'utf-8'); - expect(() => JSON.parse(content)).not.toThrow(); - - // Verify no leftover tmp files - const dir = path.dirname(TEST_STATE_FILE); - const tmpFiles = fs.readdirSync(dir).filter(f => f.startsWith('test-workflow-state.json.tmp.')); - expect(tmpFiles).toHaveLength(0); - - writeFileSyncSpy.mockRestore(); - renameSyncSpy.mockRestore(); - }); - - it('should initialize activeGenerations array on load', async () => { - // Write state without activeGenerations (backward compat) - const dir = path.dirname(TEST_STATE_FILE); - if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true }); - fs.writeFileSync(TEST_STATE_FILE, JSON.stringify({ - lastUpdate: new Date().toISOString(), - recentArticles: [], - mcpQueryCache: {}, - workflows: {}, - }), 'utf-8'); - - await coordinator.load(); - expect((coordinator as any).state.activeGenerations).toEqual([]); - }); - }); - - describe('MCP Query Caching', () => { - it('should cache MCP query result', async () => { - const queryKey = 'search_voteringar_2025-26'; - const result = { data: 'test voting data' }; - - await coordinator.cacheMCPQuery(queryKey, result); - - expect((coordinator as any).state.mcpQueryCache[queryKey]).toBeDefined(); - expect((coordinator as any).state.mcpQueryCache[queryKey].result).toEqual(result); - }); - - it('should retrieve cached MCP query', async () => { - const queryKey = 'search_voteringar_2025-26'; - const result = { data: 'test voting data' }; - - await coordinator.cacheMCPQuery(queryKey, result); - const cached = coordinator.getCachedMCPQuery(queryKey) as { data: string } | null; - - expect(cached).toEqual(result); - }); - - it('should return null for non-existent cache key', () => { - const cached = coordinator.getCachedMCPQuery('nonexistent') as unknown; - expect(cached).toBeNull(); - }); - - it('should expire cache after TTL', async () => { - vi.useFakeTimers(); - try { - const queryKey = 'test_query'; - const result = { data: 'test' }; - const shortTTL = 1; // 1 second - - await coordinator.cacheMCPQuery(queryKey, result, shortTTL); - - // Should be cached immediately - let cached = coordinator.getCachedMCPQuery(queryKey) as unknown; - expect(cached).toEqual(result); - - // Advance time past TTL to trigger expiration - await vi.advanceTimersByTimeAsync(1100); - - // Should be expired - cached = coordinator.getCachedMCPQuery(queryKey) as unknown; - expect(cached).toBeNull(); - } finally { - vi.useRealTimers(); - } - }); - - it('should include result hash in cache entry', async () => { - const queryKey = 'test_query'; - const result = { data: 'test' }; - - await coordinator.cacheMCPQuery(queryKey, result); - - expect((coordinator as any).state.mcpQueryCache[queryKey].resultHash).toBeDefined(); - expect(typeof (coordinator as any).state.mcpQueryCache[queryKey].resultHash).toBe('string'); - }); - - it('should use adaptive TTL when no explicit TTL provided', async () => { - const queryKey = 'adaptive_ttl_test'; - const result = { data: 'test' }; - - await coordinator.cacheMCPQuery(queryKey, result); - - const entry = (coordinator as any).state.mcpQueryCache[queryKey]; - // TTL should be one of the two adaptive values - expect([MCP_CACHE_TTL_SECONDS, MCP_CACHE_TTL_NON_PLENARY_SECONDS]).toContain(entry.ttl); - }); - }); - - describe('Recent Article Tracking', () => { - it('should add recent article', async () => { - const article: RecentArticleInput = { - slug: '2026-02-14-test-en.html', - workflow: 'realtime-monitor', - title: 'Test Article', - topics: ['parliament'], - mcpQueries: ['search_voteringar'] - }; - - await coordinator.addRecentArticle(article); - - expect((coordinator as any).state.recentArticles).toHaveLength(1); - expect((coordinator as any).state.recentArticles[0].slug).toBe(article.slug); - expect((coordinator as any).state.recentArticles[0].workflow).toBe(article.workflow); - }); - - it('should set timestamp on article addition', async () => { - const article: RecentArticleInput = { - slug: '2026-02-14-test-en.html', - workflow: 'test', - title: 'Test', - topics: [], - mcpQueries: [] - }; - - await coordinator.addRecentArticle(article); - - expect((coordinator as any).state.recentArticles[0].timestamp).toBeDefined(); - }); - - it('should get recent articles within time window', async () => { - // Add articles at different times (simulated) - const now = new Date(); - const article1: RecentArticleEntry = { - slug: 'recent-en.html', - timestamp: new Date(now.getTime() - 1 * 60 * 60 * 1000).toISOString(), // 1 hour ago - workflow: 'test', - title: '', - topics: [], - mcpQueries: [] - }; - const article2: RecentArticleEntry = { - slug: 'old-en.html', - timestamp: new Date(now.getTime() - 10 * 60 * 60 * 1000).toISOString(), // 10 hours ago - workflow: 'test', - title: '', - topics: [], - mcpQueries: [] - }; - - (coordinator as any).state.recentArticles = [article1, article2]; - - const recent = coordinator.getRecentArticles(6) as RecentArticleEntry[]; // Last 6 hours - - expect(recent).toHaveLength(1); - expect(recent[0]!.slug).toBe('recent-en.html'); - }); - - it('should cleanup expired articles', () => { - const now = new Date(); - const recentArticle: RecentArticleEntry = { - slug: 'recent-en.html', - timestamp: new Date(now.getTime() - 1 * 60 * 60 * 1000).toISOString(), // 1 hour ago - workflow: 'test', - title: '', - topics: [], - mcpQueries: [] - }; - const expiredArticle: RecentArticleEntry = { - slug: 'expired-en.html', - timestamp: new Date(now.getTime() - 10 * 60 * 60 * 1000).toISOString(), // 10 hours ago - workflow: 'test', - title: '', - topics: [], - mcpQueries: [] - }; - - (coordinator as any).state.recentArticles = [recentArticle, expiredArticle]; - coordinator.cleanupExpiredEntries(); - - expect((coordinator as any).state.recentArticles).toHaveLength(1); - expect((coordinator as any).state.recentArticles[0].slug).toBe('recent-en.html'); - }); - }); - - describe('Duplicate Detection', () => { - beforeEach(async () => { - // Setup existing articles - await coordinator.addRecentArticle({ - slug: '2026-02-14-budget-vote-en.html', - workflow: 'realtime-monitor', - title: 'Budget Vote Passes with Narrow Margin', - topics: ['budget', 'finance', 'parliament'], - mcpQueries: ['search_voteringar', 'get_voting_group'] - }); - }); - - it('should detect duplicate with high title similarity', async () => { - const result = await coordinator.checkDuplicateArticle( - 'Budget Vote Passes with Narrow Margin', // Very similar title - ['budget', 'finance', 'parliament'], // Same topics as original - ['search_voteringar'] // Same MCP query - ) as DuplicateCheckResult; - - expect(result.isDuplicate).toBe(true); - expect(result.similarityScore).toBeGreaterThan(SIMILARITY_THRESHOLD as number); - }); - - it('should not flag as duplicate with low similarity', async () => { - const result = await coordinator.checkDuplicateArticle( - 'PM Announces New Environmental Policy', // Completely different - ['environment', 'policy'], - ['search_regering'] - ) as DuplicateCheckResult; - - expect(result.isDuplicate).toBe(false); - expect(result.similarityScore).toBeLessThan(SIMILARITY_THRESHOLD as number); - }); - - it('should consider topic overlap in similarity', async () => { - const result = await coordinator.checkDuplicateArticle( - 'Budget Discussion in Parliament', // Different title, same topics - ['budget', 'finance', 'parliament'], - [] - ) as DuplicateCheckResult; - - // Should have some similarity due to topic overlap - expect(result.similarityScore).toBeGreaterThan(0.2); - }); - - it('should return matched article details when duplicate found', async () => { - const result = await coordinator.checkDuplicateArticle( - 'Budget Vote Passes with Narrow Margin', - ['budget', 'finance'], - [] - ) as DuplicateCheckResult; - - if (result.isDuplicate) { - expect(result.matchedArticle).toBeDefined(); - expect(result.matchedArticle!.slug).toBe('2026-02-14-budget-vote-en.html'); - } - }); - - it('should detect duplicate when combined similarity < 0.70 but topic Jaccard >= 0.5', async () => { - const result = await coordinator.checkDuplicateArticle( - 'Committee Report: Housing', - ['budget', 'finance', 'housing'], - [] - ) as DuplicateCheckResult; - - // Jaccard topics against ['budget','finance','parliament'] = 2/4 = 0.5 - // and combined similarity remains below 0.70 due to very different title/MCP queries. - expect(result.similarityScore).toBeGreaterThanOrEqual(TOPIC_JACCARD_THRESHOLD as number); - expect(result.similarityScore).toBeLessThan(SIMILARITY_THRESHOLD as number); - expect(result.isDuplicate).toBe(true); - }); - - it('should not trigger Jaccard duplicate when topic overlap is low', async () => { - const result = await coordinator.checkDuplicateArticle( - 'Completely Unrelated Story', - ['sports', 'weather', 'culture', 'entertainment'], - [] - ) as DuplicateCheckResult; - - expect(result.isDuplicate).toBe(false); - }); - }); - - describe('Similarity Calculations', () => { - it('should calculate string similarity correctly', () => { - const sim1 = coordinator.stringSimilarity( - 'Budget Vote Passes with Narrow Margin', - 'Budget Vote Passes with Small Margin' - ) as number; - expect(sim1).toBeGreaterThan(0.7); // High similarity - - const sim2 = coordinator.stringSimilarity( - 'Budget Vote Passes', - 'Environmental Policy Announced' - ) as number; - expect(sim2).toBeLessThan(0.3); // Low similarity - }); - - it('should calculate set overlap correctly', () => { - const overlap1 = coordinator.setOverlap( - ['budget', 'finance', 'parliament'], - ['budget', 'finance', 'vote'] - ) as number; - expect(overlap1).toBeCloseTo(0.5, 1); // 2/4 = 0.5 - - const overlap2 = coordinator.setOverlap( - ['budget', 'finance'], - ['environment', 'policy'] - ) as number; - expect(overlap2).toBe(0); // No overlap - }); - - it('should handle empty sets in overlap calculation', () => { - const overlap = coordinator.setOverlap([], ['test']) as number; - expect(overlap).toBe(0); - }); - - it('should combine factors in similarity calculation', () => { - const similarity = coordinator.calculateSimilarity( - 'Budget Vote', - ['budget', 'finance'], - ['source1'], - 'Budget Vote', - ['budget', 'finance'], - ['source1'] - ) as number; - expect(similarity).toBeCloseTo(1.0, 1); // Perfect match - }); - }); - - describe('Jaccard Topic Similarity', () => { - it('should return 1.0 for identical topic sets', () => { - expect(jaccardTopicSimilarity( - ['budget', 'finance', 'parliament'], - ['budget', 'finance', 'parliament'], - )).toBeCloseTo(1.0); - }); - - it('should return 0 for completely disjoint topics', () => { - expect(jaccardTopicSimilarity( - ['budget', 'finance'], - ['environment', 'policy'], - )).toBe(0); - }); - - it('should return 0 for empty arrays', () => { - expect(jaccardTopicSimilarity([], [])).toBe(0); - expect(jaccardTopicSimilarity(['a'], [])).toBe(0); - expect(jaccardTopicSimilarity([], ['a'])).toBe(0); - }); - - it('should be case-insensitive', () => { - expect(jaccardTopicSimilarity( - ['Budget', 'Finance'], - ['budget', 'finance'], - )).toBeCloseTo(1.0); - }); - - it('should compute partial overlap correctly', () => { - // intersection = {budget, finance} = 2, union = {budget, finance, parliament, vote} = 4 - expect(jaccardTopicSimilarity( - ['budget', 'finance', 'parliament'], - ['budget', 'finance', 'vote'], - )).toBeCloseTo(0.5, 1); - }); - - it('should meet threshold for same-topic articles with different titles', () => { - // Simulates: "Committee Report: Housing" vs "Riksdag Housing Committee Analysis" - const similarity = jaccardTopicSimilarity( - ['housing', 'committee', 'riksdag'], - ['housing', 'committee', 'analysis'], - ); - // intersection=2, union=4 → 0.5 — exactly at threshold - expect(similarity).toBeGreaterThanOrEqual(TOPIC_JACCARD_THRESHOLD as number); - }); - }); - - describe('Adaptive Cache TTL', () => { - it('should return 2-hour TTL during Stockholm plenary hours (08-16 local)', () => { - const plenaryDate = new Date('2026-03-23T10:00:00Z'); - expect(getAdaptiveCacheTTL(plenaryDate)).toBe(MCP_CACHE_TTL_SECONDS); - }); - - it('should return 4-hour TTL outside Stockholm plenary hours', () => { - const eveningDate = new Date('2026-03-23T20:00:00Z'); - expect(getAdaptiveCacheTTL(eveningDate)).toBe(MCP_CACHE_TTL_NON_PLENARY_SECONDS); - }); - - it('should return 4-hour TTL for early morning UTC', () => { - // 03:00 UTC = 04:00 CET — early morning - const earlyDate = new Date('2026-03-23T03:00:00Z'); - expect(getAdaptiveCacheTTL(earlyDate)).toBe(MCP_CACHE_TTL_NON_PLENARY_SECONDS); - }); - - it('should return 2-hour TTL at Stockholm opening boundary (08:00 local)', () => { - const boundaryDate = new Date('2026-03-23T07:00:00Z'); - expect(getAdaptiveCacheTTL(boundaryDate)).toBe(MCP_CACHE_TTL_SECONDS); - }); - - it('should return 2-hour TTL at Stockholm closing boundary (16:00 local)', () => { - const boundaryDate = new Date('2026-03-23T15:00:00Z'); - expect(getAdaptiveCacheTTL(boundaryDate)).toBe(MCP_CACHE_TTL_SECONDS); - }); - - it('should honor DST by using Stockholm local hour (summer time)', () => { - // 06:30 UTC on summer date => 08:30 CEST in Stockholm (plenary window) - const summerDate = new Date('2026-06-15T06:30:00Z'); - expect(getAdaptiveCacheTTL(summerDate)).toBe(MCP_CACHE_TTL_SECONDS); - }); - - it('should honor DST by using Stockholm local hour (winter time)', () => { - // 07:30 UTC on winter date => 08:30 CET in Stockholm (plenary window) - const winterDate = new Date('2026-01-15T07:30:00Z'); - expect(getAdaptiveCacheTTL(winterDate)).toBe(MCP_CACHE_TTL_SECONDS); - }); - }); - - describe('Workflow Recording', () => { - it('should record workflow execution', async () => { - await coordinator.recordWorkflowExecution('realtime-monitor', { - articlesGenerated: 3 - }); - - expect((coordinator as any).state.workflows['realtime-monitor']).toBeDefined(); - expect((coordinator as any).state.workflows['realtime-monitor'].runCount).toBe(1); - expect((coordinator as any).state.workflows['realtime-monitor'].articlesGenerated).toBe(3); - }); - - it('should increment run count on multiple executions', async () => { - await coordinator.recordWorkflowExecution('realtime-monitor'); - await coordinator.recordWorkflowExecution('realtime-monitor'); - await coordinator.recordWorkflowExecution('realtime-monitor'); - - expect((coordinator as any).state.workflows['realtime-monitor'].runCount).toBe(3); - }); - - it('should track articles generated across runs', async () => { - await coordinator.recordWorkflowExecution('realtime-monitor', { articlesGenerated: 2 }); - await coordinator.recordWorkflowExecution('realtime-monitor', { articlesGenerated: 3 }); - - expect((coordinator as any).state.workflows['realtime-monitor'].articlesGenerated).toBe(5); - }); - - it('should get workflow statistics', async () => { - await coordinator.addRecentArticle({ - slug: 'test-en.html', - workflow: 'test', - title: 'Test', - topics: [], - mcpQueries: [] - }); - - await coordinator.cacheMCPQuery('test_query', { data: 'test' }); - - const stats = coordinator.getWorkflowStatistics() as { cacheSize: number; recentArticlesCount: number }; - - expect(stats.cacheSize).toBe(1); - expect(stats.recentArticlesCount).toBe(1); - }); - }); - - describe('Active Generations (Cross-Workflow Visibility)', () => { - it('should register an active generation', async () => { - await coordinator.registerActiveGeneration('wf-123', 'propositions', '2026-03-23'); - - const active = coordinator.getActiveGenerations(); - expect(active).toHaveLength(1); - expect(active[0].workflowId).toBe('wf-123'); - expect(active[0].type).toBe('propositions'); - expect(active[0].date).toBe('2026-03-23'); - }); - - it('should unregister an active generation', async () => { - await coordinator.registerActiveGeneration('wf-123', 'propositions', '2026-03-23'); - await coordinator.unregisterActiveGeneration('wf-123', 'propositions', '2026-03-23'); - - expect(coordinator.getActiveGenerations()).toHaveLength(0); - }); - - it('should handle multiple concurrent active generations', async () => { - await coordinator.registerActiveGeneration('wf-1', 'propositions', '2026-03-23'); - await coordinator.registerActiveGeneration('wf-2', 'motions', '2026-03-23'); - - const active = coordinator.getActiveGenerations(); - expect(active).toHaveLength(2); - }); - - it('should only unregister the matching generation', async () => { - await coordinator.registerActiveGeneration('wf-1', 'propositions', '2026-03-23'); - await coordinator.registerActiveGeneration('wf-2', 'motions', '2026-03-23'); - await coordinator.unregisterActiveGeneration('wf-1', 'propositions', '2026-03-23'); - - const active = coordinator.getActiveGenerations(); - expect(active).toHaveLength(1); - expect(active[0].workflowId).toBe('wf-2'); - }); - - it('should not duplicate identical active generation registration', async () => { - await coordinator.registerActiveGeneration('wf-1', 'propositions', '2026-03-23'); - await coordinator.registerActiveGeneration('wf-1', 'propositions', '2026-03-23'); - - const active = coordinator.getActiveGenerations(); - expect(active).toHaveLength(1); - }); - - it('should cleanup stale active generations on registration', async () => { - (coordinator as any).state.activeGenerations = [ - { - workflowId: 'old', - type: 'propositions', - date: '2026-03-23', - startedAt: new Date(Date.now() - (46 * 60 * 1000)).toISOString(), - }, - ]; - await coordinator.registerActiveGeneration('wf-2', 'motions', '2026-03-23'); - - const active = coordinator.getActiveGenerations(); - expect(active).toHaveLength(1); - expect(active[0].workflowId).toBe('wf-2'); - }); - }); -}); - -describe('Workflow Lock Manager', () => { - let lockManager: InstanceType; - - beforeEach(() => { - lockManager = new WorkflowLockManager(TEST_LOCK_DIR); - // Clean up test lock directory - if (fs.existsSync(TEST_LOCK_DIR)) { - fs.rmSync(TEST_LOCK_DIR, { recursive: true, force: true }); - } - }); - - afterEach(() => { - if (fs.existsSync(TEST_LOCK_DIR)) { - fs.rmSync(TEST_LOCK_DIR, { recursive: true, force: true }); - } - }); - - describe('Lock Acquisition', () => { - it('should acquire a lock successfully', () => { - const result = lockManager.acquireLock('propositions', '2026-03-23', 'wf-123'); - expect(result).toBe(true); - }); - - it('should fail to acquire an already-held lock', () => { - lockManager.acquireLock('propositions', '2026-03-23', 'wf-123'); - const result = lockManager.acquireLock('propositions', '2026-03-23', 'wf-456'); - expect(result).toBe(false); - }); - - it('should allow acquiring locks for different types', () => { - expect(lockManager.acquireLock('propositions', '2026-03-23', 'wf-1')).toBe(true); - expect(lockManager.acquireLock('motions', '2026-03-23', 'wf-2')).toBe(true); - }); - - it('should allow acquiring locks for different dates', () => { - expect(lockManager.acquireLock('propositions', '2026-03-23', 'wf-1')).toBe(true); - expect(lockManager.acquireLock('propositions', '2026-03-24', 'wf-2')).toBe(true); - }); - - it('should write lock info.json', () => { - lockManager.acquireLock('propositions', '2026-03-23', 'wf-123'); - - const info = lockManager.getLockInfo('propositions', '2026-03-23'); - expect(info).not.toBeNull(); - expect(info!.workflowId).toBe('wf-123'); - expect(info!.acquiredAt).toBeDefined(); - expect(info!.expiresAfterMs).toBe(LOCK_TIMEOUT_MS); - }); - - it('should reclaim a stale lock', () => { - // Manually create a stale lock - const lockPath = path.join(TEST_LOCK_DIR, 'propositions-2026-03-23.lock'); - fs.mkdirSync(lockPath, { recursive: true }); - fs.writeFileSync(path.join(lockPath, 'info.json'), JSON.stringify({ - workflowId: 'old-wf', - acquiredAt: new Date(Date.now() - 60 * 60 * 1000).toISOString(), // 1 hour ago - expiresAfterMs: LOCK_TIMEOUT_MS, - })); - - // New workflow should reclaim the stale lock - const result = lockManager.acquireLock('propositions', '2026-03-23', 'new-wf'); - expect(result).toBe(true); - - const info = lockManager.getLockInfo('propositions', '2026-03-23'); - expect(info!.workflowId).toBe('new-wf'); - }); - - it('should reclaim orphaned lock directory without info.json', () => { - // Create lock directory without info.json (orphaned lock) - const lockPath = path.join(TEST_LOCK_DIR, 'propositions-2026-03-23.lock'); - fs.mkdirSync(lockPath, { recursive: true }); - // No info.json written — simulates crash after mkdir but before writeFile - - // acquireLock should reclaim the orphaned directory and acquire successfully - const result = lockManager.acquireLock('propositions', '2026-03-23', 'wf-new'); - expect(result).toBe(true); - - const info = lockManager.getLockInfo('propositions', '2026-03-23'); - expect(info).not.toBeNull(); - expect(info!.workflowId).toBe('wf-new'); - }); - - it('should reclaim lock with corrupt info.json', () => { - // Create lock directory with corrupt (unparseable) info.json - const lockPath = path.join(TEST_LOCK_DIR, 'propositions-2026-03-23.lock'); - fs.mkdirSync(lockPath, { recursive: true }); - fs.writeFileSync(path.join(lockPath, 'info.json'), 'NOT VALID JSON{{{', 'utf-8'); - - // acquireLock should treat corrupt info.json as reclaimable - const result = lockManager.acquireLock('propositions', '2026-03-23', 'wf-new'); - expect(result).toBe(true); - - const info = lockManager.getLockInfo('propositions', '2026-03-23'); - expect(info).not.toBeNull(); - expect(info!.workflowId).toBe('wf-new'); - }); - - it('should reclaim lock with invalid acquiredAt timestamp', () => { - // Create lock directory with info.json that has an invalid acquiredAt - const lockPath = path.join(TEST_LOCK_DIR, 'propositions-2026-03-23.lock'); - fs.mkdirSync(lockPath, { recursive: true }); - fs.writeFileSync(path.join(lockPath, 'info.json'), JSON.stringify({ - workflowId: 'old-wf', - acquiredAt: 'not-a-date', - expiresAfterMs: LOCK_TIMEOUT_MS, - }), 'utf-8'); - - // acquireLock should treat NaN acquiredAt as corrupt and reclaim - const result = lockManager.acquireLock('propositions', '2026-03-23', 'wf-new'); - expect(result).toBe(true); - - const info = lockManager.getLockInfo('propositions', '2026-03-23'); - expect(info).not.toBeNull(); - expect(info!.workflowId).toBe('wf-new'); - }); - - it('should reclaim lock with explicitly invalid expiresAfterMs', () => { - // Create lock with valid acquiredAt but explicitly invalid expiresAfterMs - const lockPath = path.join(TEST_LOCK_DIR, 'propositions-2026-03-23.lock'); - fs.mkdirSync(lockPath, { recursive: true }); - fs.writeFileSync(path.join(lockPath, 'info.json'), JSON.stringify({ - workflowId: 'old-wf', - acquiredAt: new Date().toISOString(), - expiresAfterMs: 0, - }), 'utf-8'); - - // acquireLock should treat zero expiresAfterMs as corrupt and reclaim immediately - const result = lockManager.acquireLock('propositions', '2026-03-23', 'wf-new'); - expect(result).toBe(true); - - const info = lockManager.getLockInfo('propositions', '2026-03-23'); - expect(info).not.toBeNull(); - expect(info!.workflowId).toBe('wf-new'); - }); - - it('should throw for non-EEXIST fs errors during acquire', () => { - const mkdirSpy = vi.spyOn(fs, 'mkdirSync').mockImplementationOnce(() => { - const error = new Error('permission denied') as NodeJS.ErrnoException; - error.code = 'EACCES'; - throw error; - }); - - expect(() => lockManager.acquireLock('propositions', '2026-03-23', 'wf-1')).toThrow(); - mkdirSpy.mockRestore(); - }); - - it('should reject invalid lock type and prevent path traversal', () => { - expect(() => lockManager.acquireLock('../evil', '2026-03-23', 'wf-1')).toThrow('Invalid lock type'); - }); - - it('should reject invalid lock date and prevent path traversal', () => { - expect(() => lockManager.acquireLock('propositions', '../2026-03-23', 'wf-1')).toThrow('Invalid lock date'); - expect(() => lockManager.acquireLock('propositions', '2026/03/23', 'wf-1')).toThrow('Invalid lock date'); - }); - }); - - describe('Lock Release', () => { - it('should release a held lock', () => { - lockManager.acquireLock('propositions', '2026-03-23', 'wf-123'); - lockManager.releaseLock('propositions', '2026-03-23'); - - expect(lockManager.isLocked('propositions', '2026-03-23')).toBe(false); - }); - - it('should not throw when releasing a non-existent lock', () => { - expect(() => lockManager.releaseLock('nonexistent', '2026-03-23')).not.toThrow(); - }); - - it('should allow re-acquiring after release', () => { - lockManager.acquireLock('propositions', '2026-03-23', 'wf-1'); - lockManager.releaseLock('propositions', '2026-03-23'); - const result = lockManager.acquireLock('propositions', '2026-03-23', 'wf-2'); - expect(result).toBe(true); - }); - }); - - describe('Lock Status', () => { - it('should report lock as held', () => { - lockManager.acquireLock('propositions', '2026-03-23', 'wf-123'); - expect(lockManager.isLocked('propositions', '2026-03-23')).toBe(true); - }); - - it('should report lock as not held', () => { - expect(lockManager.isLocked('propositions', '2026-03-23')).toBe(false); - }); - - it('should return null for non-existent lock info', () => { - expect(lockManager.getLockInfo('nonexistent', '2026-03-23')).toBeNull(); - }); - }); - - describe('Stale Lock Cleanup', () => { - it('should clean up stale locks older than timeout', () => { - // Create a stale lock manually - const lockPath = path.join(TEST_LOCK_DIR, 'stale-2026-03-23.lock'); - fs.mkdirSync(lockPath, { recursive: true }); - fs.writeFileSync(path.join(lockPath, 'info.json'), JSON.stringify({ - workflowId: 'old-wf', - acquiredAt: new Date(Date.now() - 60 * 60 * 1000).toISOString(), // 1 hour ago (> 45 min) - expiresAfterMs: LOCK_TIMEOUT_MS, - })); - - const cleaned = lockManager.cleanupStaleLocks(); - expect(cleaned).toBe(1); - expect(lockManager.isLocked('stale', '2026-03-23')).toBe(false); - }); - - it('should not clean up fresh locks', () => { - lockManager.acquireLock('fresh', '2026-03-23', 'wf-fresh'); - - const cleaned = lockManager.cleanupStaleLocks(); - expect(cleaned).toBe(0); - expect(lockManager.isLocked('fresh', '2026-03-23')).toBe(true); - }); - - it('should clean up orphaned lock directories without info.json', () => { - const lockPath = path.join(TEST_LOCK_DIR, 'orphan-2026-03-23.lock'); - fs.mkdirSync(lockPath, { recursive: true }); - // No info.json — orphaned - - const cleaned = lockManager.cleanupStaleLocks(); - expect(cleaned).toBe(1); - }); - - it('should clean up lock directories with corrupt info.json', () => { - const lockPath = path.join(TEST_LOCK_DIR, 'corrupt-2026-03-23.lock'); - fs.mkdirSync(lockPath, { recursive: true }); - fs.writeFileSync(path.join(lockPath, 'info.json'), 'CORRUPT JSON{{{', 'utf-8'); - - const cleaned = lockManager.cleanupStaleLocks(); - expect(cleaned).toBe(1); - expect(fs.existsSync(lockPath)).toBe(false); - }); - - it('should clean up lock directories with invalid acquiredAt timestamp', () => { - const lockPath = path.join(TEST_LOCK_DIR, 'invalid-time-2026-03-23.lock'); - fs.mkdirSync(lockPath, { recursive: true }); - fs.writeFileSync(path.join(lockPath, 'info.json'), JSON.stringify({ - workflowId: 'bad-time-wf', - acquiredAt: 'not-a-valid-date', - expiresAfterMs: LOCK_TIMEOUT_MS, - }), 'utf-8'); - - const cleaned = lockManager.cleanupStaleLocks(); - expect(cleaned).toBe(1); - expect(fs.existsSync(lockPath)).toBe(false); - }); - - it('should clean up lock directories with non-positive or non-finite expiry', () => { - const zeroExpiryLock = path.join(TEST_LOCK_DIR, 'zero-expiry-2026-03-23.lock'); - fs.mkdirSync(zeroExpiryLock, { recursive: true }); - fs.writeFileSync(path.join(zeroExpiryLock, 'info.json'), JSON.stringify({ - workflowId: 'zero-expiry-wf', - acquiredAt: new Date().toISOString(), - expiresAfterMs: 0, - }), 'utf-8'); - - const nanExpiryLock = path.join(TEST_LOCK_DIR, 'nan-expiry-2026-03-23.lock'); - fs.mkdirSync(nanExpiryLock, { recursive: true }); - fs.writeFileSync(path.join(nanExpiryLock, 'info.json'), JSON.stringify({ - workflowId: 'nan-expiry-wf', - acquiredAt: new Date().toISOString(), - // Persisted JSON cannot represent NaN (it serializes to null), so use - // a non-numeric explicit value to verify invalid-expiry cleanup. - expiresAfterMs: 'NaN', - }), 'utf-8'); - - const cleaned = lockManager.cleanupStaleLocks(); - expect(cleaned).toBe(2); - expect(fs.existsSync(zeroExpiryLock)).toBe(false); - expect(fs.existsSync(nanExpiryLock)).toBe(false); - }); - - it('should return 0 when no locks exist', () => { - expect(lockManager.cleanupStaleLocks()).toBe(0); - }); - - it('should return 0 when lock directory does not exist', () => { - const freshManager = new WorkflowLockManager(path.join(TEST_LOCK_DIR, 'nonexistent')); - expect(freshManager.cleanupStaleLocks()).toBe(0); - }); - }); -}); - -describe('Workflow State Coordinator - Significance Features', () => { - const TEST_SIG_STATE_FILE = path.join(__dirname, 'fixtures', 'test-sig-workflow-state.json'); - let coordinator: WorkflowStateCoordinator; - - beforeEach(() => { - coordinator = new WorkflowStateCoordinator(TEST_SIG_STATE_FILE); - // Ensure clean state - const dir = path.dirname(TEST_SIG_STATE_FILE); - if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true }); - if (fs.existsSync(TEST_SIG_STATE_FILE)) fs.unlinkSync(TEST_SIG_STATE_FILE); - }); - - afterEach(() => { - if (fs.existsSync(TEST_SIG_STATE_FILE)) fs.unlinkSync(TEST_SIG_STATE_FILE); - }); - - describe('Significance-Aware Deduplication', () => { - it('should store significance when adding article', async () => { - await coordinator.addRecentArticle({ - slug: 'test-en.html', - workflow: 'realtime', - title: 'Test Article', - topics: ['budget'], - mcpQueries: [], - significance: 75, - }); - - const articles = coordinator.getRecentArticles(); - expect(articles).toHaveLength(1); - expect(articles[0].significance).toBe(75); - }); - - it('should allow high-significance article to override low-significance duplicate', async () => { - // Add a low-significance article first - await coordinator.addRecentArticle({ - slug: 'budget-low-en.html', - workflow: 'realtime', - title: 'Budget discussion in parliament today', - topics: ['budget', 'finance'], - mcpQueries: ['search_voteringar'], - significance: 40, - }); - - // Check same-topic article with high significance (≥80) - const result: DuplicateCheckResult = await coordinator.checkDuplicateArticle( - 'Budget discussion in parliament today', - ['budget', 'finance'], - ['search_voteringar'], - 85, // high significance - ); - - // Should NOT be flagged as duplicate — high significance overrides - expect(result.isDuplicate).toBe(false); - }); - - it('should still flag duplicate when both have high significance', async () => { - // Add a high-significance article - await coordinator.addRecentArticle({ - slug: 'budget-high-en.html', - workflow: 'realtime', - title: 'Budget vote today in parliament', - topics: ['budget', 'vote'], - mcpQueries: ['search_voteringar'], - significance: 90, - }); - - // Check same-topic article also with high significance - const result: DuplicateCheckResult = await coordinator.checkDuplicateArticle( - 'Budget vote today in parliament', - ['budget', 'vote'], - ['search_voteringar'], - 85, - ); - - // Should be flagged — both are high significance, normal dedup applies - expect(result.isDuplicate).toBe(true); - }); - - it('should still flag duplicate when new article has low significance', async () => { - // Add a low-significance article - await coordinator.addRecentArticle({ - slug: 'routine-en.html', - workflow: 'article-gen', - title: 'Routine parliamentary session review', - topics: ['session'], - mcpQueries: [], - significance: 30, - }); - - // Check same-topic with low significance — no override - const result: DuplicateCheckResult = await coordinator.checkDuplicateArticle( - 'Routine parliamentary session review', - ['session'], - [], - 35, - ); - - expect(result.isDuplicate).toBe(true); - }); - - it('should handle missing significance on existing article (undefined)', async () => { - // Add article without significance (legacy entry) - await coordinator.addRecentArticle({ - slug: 'legacy-en.html', - workflow: 'realtime', - title: 'Legacy article about economic policy', - topics: ['economy'], - mcpQueries: [], - // no significance - }); - - // High-significance new article should override - const result: DuplicateCheckResult = await coordinator.checkDuplicateArticle( - 'Legacy article about economic policy', - ['economy'], - [], - 85, - ); - - expect(result.isDuplicate).toBe(false); - }); - - it('should still flag duplicate when another similar high-significance article exists', async () => { - await coordinator.addRecentArticle({ - slug: 'budget-low-detail-en.html', - workflow: 'realtime', - title: 'Budget discussion in parliament details', - topics: ['budget', 'finance'], - mcpQueries: ['search_voteringar'], - significance: 35, - }); - - await coordinator.addRecentArticle({ - slug: 'budget-high-keyvote-en.html', - workflow: 'realtime', - title: 'Budget discussion in parliament key vote', - topics: ['budget', 'finance'], - mcpQueries: ['search_voteringar'], - significance: 92, - }); - - const result: DuplicateCheckResult = await coordinator.checkDuplicateArticle( - 'Budget discussion in parliament today', - ['budget', 'finance'], - ['search_voteringar'], - 85, - ); - - expect(result.isDuplicate).toBe(true); - }); - - it('should still flag duplicate when high-significance duplicate is detected via topic Jaccard only', async () => { - await coordinator.addRecentArticle({ - slug: 'topic-only-high-en.html', - workflow: 'realtime', - title: 'Parliament housing affordability outcomes', - topics: ['housing', 'committee', 'analysis'], - mcpQueries: ['search_housing_reports'], - significance: 90, - }); - - const result: DuplicateCheckResult = await coordinator.checkDuplicateArticle( - 'Riksdag housing committee briefing today', - ['housing', 'committee', 'briefing'], - ['unrelated_query_key'], - 85, - ); - - // Duplicate by topic-Jaccard (2/4=0.5) should still block high-significance override - // when an existing similar article already has significance >= 80. - expect(result.isDuplicate).toBe(true); - }); - }); -});