diff --git a/scripts/fetch-calendar.ts b/scripts/fetch-calendar.ts index bf21f49df..391df4f6e 100644 --- a/scripts/fetch-calendar.ts +++ b/scripts/fetch-calendar.ts @@ -1,359 +1,872 @@ -#!/usr/bin/env tsx /** * @module scripts/fetch-calendar - * @description Fetch riksdag calendar events using a primary (MCP) → - * fallback (web fetch + HTML parsing) chain. + * @description Resilient Riksdag calendar fetcher with primary→fallback chain. * - * Usage: - * npx tsx scripts/fetch-calendar.ts --from 2026-04-27 --tom 2026-05-27 [--org UTSK] [--akt bet] [--persist] + * **Primary path**: calls `get_calendar_events` on the riksdag-regering MCP + * server via a lightweight JSON-RPC 2.0 POST. If the response is not valid + * JSON (e.g. the server returns an HTML error page) or the request fails, the + * module automatically retries and — after exhausting retries — falls back to + * scraping `https://www.riksdagen.se/sv/kalendarium/` directly. * - * Output: - * stdout — always written (JSON) - * analysis/data/calendar/{from}_{tom}.json — written only when --persist is set + * **Output**: both paths produce the same `CalendarEvent[]` shape and write a + * normalized JSON file to `data/calendar/{from}.json` so that week-ahead and + * month-ahead workflows can consume a single, reliable data source regardless + * of which transport succeeded. * - * Exit codes: - * 0 — success - * 1 — runtime / network error - * 2 — bad CLI arguments + * **Manifest**: every run records `path`, `eventCount`, any `error` message + * and a `fetchedAt` timestamp. Workflows append this to their + * `data-download-manifest.md` to satisfy the ICD-203 provenance requirement. + * + * Usage (CLI): + * tsx scripts/fetch-calendar.ts --from 2026-04-28 --to 2026-05-04 [--persist] + * tsx scripts/fetch-calendar.ts --from 2026-04-28 --to 2026-05-31 [--persist] + * + * @author Hack23 AB + * @license Apache-2.0 */ import fs from 'node:fs'; import path from 'node:path'; import { fileURLToPath } from 'node:url'; -import { MCPClient } from './mcp-client.js'; +import { decodeHtmlEntities } from './html-utils.js'; // --------------------------------------------------------------------------- -// Paths +// Types // --------------------------------------------------------------------------- -const __dirname = path.dirname(fileURLToPath(import.meta.url)); -const REPO_ROOT = path.resolve(__dirname, '..'); +/** + * A single normalized calendar event produced by either transport path. + * + * Fields map to the riksdag-regering `get_calendar_events` response envelope + * (`kalender[]`) so that the MCP path is loss-free and the web-fallback + * path produces maximally equivalent data. + */ +export interface CalendarEvent { + /** ISO 8601 date-time string (e.g. "2026-04-28T10:00:00"). */ + readonly dtstart: string; + /** ISO 8601 end date-time string, when available. */ + readonly dtend?: string; + /** Organ/committee code (e.g. "FiU", "KU", "kammaren"). */ + readonly org: string; + /** Activity type code (e.g. "debatt", "utskottsmöte", "votering", "beredning"). */ + readonly akt: string; + /** Human-readable event summary / title. */ + readonly summary: string; + /** Document references extracted from the event (dok_id or URLs). */ + readonly doc_refs: readonly string[]; + /** Which transport provided this event. */ + readonly source: 'mcp-primary' | 'web-fallback'; +} -// --------------------------------------------------------------------------- -// Types -// --------------------------------------------------------------------------- +/** + * Provenance manifest written alongside the normalized events JSON. + * Workflows append this to `data-download-manifest.md`. + */ +export interface CalendarFetchManifest { + /** Date of the run (ISO 8601 "from" value). */ + readonly date: string; + /** Date range end (ISO 8601 "to" value). */ + readonly dateTo: string; + /** Which path actually delivered the events. */ + readonly path: 'mcp-primary' | 'web-fallback' | 'none'; + /** Number of events returned. */ + readonly eventCount: number; + /** Error message from the failed primary path (when fallback was used). */ + readonly primaryError?: string; + /** Error message from the failed fallback path (when both failed). */ + readonly fallbackError?: string; + /** ISO 8601 timestamp of the fetch. */ + readonly fetchedAt: string; +} -export interface ParsedCalendarArgs { - readonly from: string; - readonly tom: string; - readonly org: string | null; - readonly akt: string | null; - readonly persist: boolean; +/** + * Result object returned by `fetchCalendarWithFallback`. + */ +export interface CalendarFetchResult { + readonly events: readonly CalendarEvent[]; + readonly manifest: CalendarFetchManifest; } -export interface CalendarEvent { - datum: string; - tid: string; - org: string; - titel: string; - typ: string; +/** + * Injectable configuration for `fetchCalendarWithFallback`. + * All network calls go through `fetchFn` so tests can substitute a mock. + */ +export interface CalendarFetchConfig { + /** + * MCP server endpoint URL. + * Defaults to the `MCP_SERVER_URL` env var or the public Render.com endpoint. + */ + readonly mcpUrl?: string; + /** + * Riksdag web kalendarium base URL (no trailing slash). + * Defaults to `https://www.riksdagen.se`. + */ + readonly webBaseUrl?: string; + /** Request timeout in ms. Default 15 000. */ + readonly timeout?: number; + /** Maximum MCP retry attempts before triggering the web fallback. Default 2. */ + readonly maxRetries?: number; + /** + * Injectable fetch function. Defaults to `globalThis.fetch`. + * Set in tests to control all network calls without live HTTP. + */ + readonly fetchFn?: typeof fetch; + /** + * Optional sleep function override (ms → Promise). + * Defaults to `setTimeout`-based sleep. Override in tests to skip delays. + */ + readonly sleepFn?: (ms: number) => Promise; } -export interface CalendarOutput { - from: string; - tom: string; - fetchedAt: string; - source: 'mcp' | 'web_fallback'; - events: CalendarEvent[]; +// --------------------------------------------------------------------------- +// Constants +// --------------------------------------------------------------------------- + +const DEFAULT_MCP_URL = + process.env['MCP_SERVER_URL'] ?? 'https://riksdag-regering-ai.onrender.com/mcp'; +const DEFAULT_WEB_BASE_URL = 'https://www.riksdagen.se'; +const DEFAULT_TIMEOUT = 15_000; +const DEFAULT_MAX_RETRIES = 2; +/** Retry base delay (ms); doubled on each subsequent attempt. */ +const RETRY_BASE_DELAY_MS = 1_000; + +// HTML detection: common HTML document / fragment leading tags. +const HTML_PREFIX_RE = /^\s*(?:])|/])|/])|/])|/])|/]))/i; + +// --------------------------------------------------------------------------- +// HTML detection +// --------------------------------------------------------------------------- + +/** + * Returns true when `text` looks like an HTML document rather than JSON. + * Used to detect when the MCP endpoint returns an error page instead of JSON. + */ +export function isHtmlErrorResponse(text: string): boolean { + return HTML_PREFIX_RE.test(text); } // --------------------------------------------------------------------------- -// CLI argument parsing +// MCP helper // --------------------------------------------------------------------------- -const DATE_RE = /^\d{4}-\d{2}-\d{2}$/; +/** Minimum JSON-RPC 2.0 envelope for a `tools/call` request. */ +interface JsonRpcRequest { + jsonrpc: '2.0'; + id: number; + method: 'tools/call'; + params: { name: string; arguments: Record }; +} -export interface CalendarParseArgsResult { - readonly args: ParsedCalendarArgs; - readonly error: string | null; +/** Partial shape of a JSON-RPC 2.0 response (only the fields we use). */ +interface JsonRpcResponse { + result?: { + content?: Array<{ text?: string }>; + kalender?: unknown[]; + events?: unknown[]; + [key: string]: unknown; + }; + error?: { message?: string; [key: string]: unknown }; + [key: string]: unknown; } -export function parseArgs(argv: readonly string[]): CalendarParseArgsResult { - const flags = new Map(); - const booleans = new Set(); +let _rpcId = 1; - for (let i = 0; i < argv.length; i++) { - const token = argv[i]; - if (!token.startsWith('--')) continue; - const key = token.slice(2); - const next = argv[i + 1]; - if (next !== undefined && !next.startsWith('--')) { - flags.set(key, next); - i++; - } else { - booleans.add(key); +/** + * Call the riksdag-regering MCP `get_calendar_events` tool via a single + * JSON-RPC 2.0 POST. Throws a typed `CalendarMcpError` on any transport, + * HTTP, or protocol error so callers can distinguish HTML responses from + * genuine tool failures. + */ +export async function callMcpCalendarEvents( + from: string, + tom: string, + config: Required>, +): Promise { + const body: JsonRpcRequest = { + jsonrpc: '2.0', + id: _rpcId++, + method: 'tools/call', + params: { name: 'get_calendar_events', arguments: { from, tom } }, + }; + + const controller = new AbortController(); + const tid = setTimeout(() => controller.abort(), config.timeout); + + let responseText: string; + try { + const response = await config.fetchFn(config.mcpUrl, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Accept: 'application/json, text/event-stream', + }, + body: JSON.stringify(body), + signal: controller.signal, + }); + + responseText = await response.text(); + + if (!response.ok) { + throw new CalendarMcpError( + `MCP HTTP error: ${response.status} ${response.statusText}`, + isHtmlErrorResponse(responseText) ? 'html' : 'http', + responseText, + ); } + } catch (err) { + clearTimeout(tid); + if (err instanceof CalendarMcpError) throw err; + const msg = err instanceof Error ? err.message : String(err); + throw new CalendarMcpError(`MCP fetch failed: ${msg}`, 'network'); + } finally { + clearTimeout(tid); } - const fromVal = flags.get('from'); - const tomVal = flags.get('tom'); - - if (!fromVal) { - return { - args: { from: '', tom: '', org: null, akt: null, persist: false }, - error: 'missing required flag --from', - }; + // Detect HTML error page masquerading as a 200 OK response. + if (isHtmlErrorResponse(responseText)) { + throw new CalendarMcpError( + 'MCP returned HTML instead of JSON', + 'html', + responseText, + ); } - if (!tomVal) { - return { - args: { from: fromVal, tom: '', org: null, akt: null, persist: false }, - error: 'missing required flag --tom', - }; + + // Parse JSON-RPC response. + let rpc: JsonRpcResponse; + try { + rpc = JSON.parse(responseText) as JsonRpcResponse; + } catch { + throw new CalendarMcpError( + `MCP response is not valid JSON: ${responseText.slice(0, 120)}`, + 'json', + ); } - if (!DATE_RE.test(fromVal)) { - return { - args: { from: '', tom: '', org: null, akt: null, persist: false }, - error: `--from must be YYYY-MM-DD, got: ${fromVal}`, - }; + if (rpc.error) { + const msg = rpc.error.message ?? JSON.stringify(rpc.error); + throw new CalendarMcpError(`MCP tool error: ${msg}`, 'tool'); } - if (!DATE_RE.test(tomVal)) { - return { - args: { from: fromVal, tom: '', org: null, akt: null, persist: false }, - error: `--tom must be YYYY-MM-DD, got: ${tomVal}`, - }; + + const result = rpc.result ?? {}; + + // Handle the content-envelope pattern used by the MCP server. + const content = result['content'] as Array<{ text?: string }> | undefined; + if (Array.isArray(content) && content[0]?.text) { + let inner: Record; + try { + inner = JSON.parse(content[0].text) as Record; + } catch { + throw new CalendarMcpError( + `MCP content text is not valid JSON: ${content[0].text.slice(0, 120)}`, + 'json', + ); + } + const events = inner['kalender'] ?? inner['events']; + if (Array.isArray(events)) return events as unknown[]; + return []; } - return { - args: { - from: fromVal, - tom: tomVal, - org: flags.get('org') ?? null, - akt: flags.get('akt') ?? null, - persist: booleans.has('persist'), - }, - error: null, - }; + const direct = result['kalender'] ?? result['events']; + if (Array.isArray(direct)) return direct as unknown[]; + + return []; +} + +/** Typed error for MCP transport / protocol failures. */ +export class CalendarMcpError extends Error { + /** Error category. */ + readonly kind: 'html' | 'http' | 'network' | 'json' | 'tool'; + /** Raw response body (only present for `html` / `http` kinds). */ + readonly responseText?: string; + + constructor( + message: string, + kind: CalendarMcpError['kind'], + responseText?: string, + ) { + super(message); + this.name = 'CalendarMcpError'; + this.kind = kind; + this.responseText = responseText; + } } // --------------------------------------------------------------------------- -// MCP primary path +// MCP event normalizer // --------------------------------------------------------------------------- -async function fetchViaMcp(client: MCPClient, args: ParsedCalendarArgs): Promise { - const raw = await client.fetchCalendarEvents(args.from, args.tom, args.org, args.akt); - return raw.map((item) => { - const r = item as Record; - return { - datum: String(r['datum'] ?? r['date'] ?? r['dtstart'] ?? ''), - tid: String(r['tid'] ?? r['time'] ?? r['starttid'] ?? ''), - org: String(r['org'] ?? r['organ'] ?? r['organisation'] ?? ''), - titel: String(r['titel'] ?? r['summary'] ?? r['title'] ?? r['rubrik'] ?? ''), - typ: String(r['typ'] ?? r['type'] ?? r['akt'] ?? r['aktivitet'] ?? ''), - }; - }); +/** + * Normalize a raw event object from the MCP `get_calendar_events` response + * into the canonical `CalendarEvent` shape. + * + * The riksdag-regering server uses the iCalendar field names (`DTSTART`, + * `DTEND`, `SUMMARY`, etc.) with either upper-case or lower-case keys — both + * are handled. + */ +export function normalizeMcpCalendarEvent(raw: unknown): CalendarEvent { + const r = (raw ?? {}) as Record; + + const dtstart = + String(r['dtstart'] ?? r['DTSTART'] ?? r['start'] ?? '').trim(); + const dtend = + String(r['dtend'] ?? r['DTEND'] ?? r['end'] ?? '').trim() || undefined; + const org = + String(r['organ'] ?? r['org'] ?? r['ORG'] ?? r['location'] ?? '').trim(); + const akt = + String(r['akt'] ?? r['AKT'] ?? r['type'] ?? r['kategori'] ?? '').trim(); + const summary = + String(r['summary'] ?? r['SUMMARY'] ?? r['titel'] ?? r['title'] ?? '').trim(); + + // Collect document references from various possible fields. + const docRefs: string[] = []; + for (const key of ['dok_id', 'dokid', 'url', 'href', 'beteckning', 'doc_id']) { + const val = r[key]; + if (typeof val === 'string' && val.trim()) { + docRefs.push(val.trim()); + } else if (Array.isArray(val)) { + for (const item of val) { + if (typeof item === 'string' && item.trim()) docRefs.push(item.trim()); + } + } + } + + return { + dtstart, + ...(dtend ? { dtend } : {}), + org, + akt, + summary, + doc_refs: docRefs, + source: 'mcp-primary', + }; } // --------------------------------------------------------------------------- -// Web fallback — parse riksdagen.se/sv/kalendarium/ HTML +// Web fallback HTML parser // --------------------------------------------------------------------------- -const RIKSDAGEN_CALENDAR_URL = 'https://www.riksdagen.se/sv/kalendarium/'; - /** - * Parse calendar events from riksdagen.se HTML using regex patterns. - * Since cheerio may not be available, we use Node's built-in fetch - * and regex-based HTML extraction. + * Parse the HTML returned by `https://www.riksdagen.se/sv/kalendarium/` and + * extract calendar events into the normalized `CalendarEvent` shape. + * + * The parser is intentionally defensive and regex-based (no external parser + * dependency) in the same style as `statskontoret-client.ts`. It handles + * the two primary markup patterns used by riksdagen.se (as of 2026): + * + * **Pattern A – article-per-event:** + * ```html + * + * ``` + * + * **Pattern B – list-item-per-event:** + * ```html + *
  • + * + * Utskottsmöte + * NU + * Näringspolitik - Bredbands + *
  • + * ``` */ -export function parseCalendarHtml(html: string): CalendarEvent[] { +export function parseRiksdagKalendariumHtml(html: string): CalendarEvent[] { const events: CalendarEvent[] = []; - // Pattern: extract event blocks. The page wraps events in article/li - // elements with class like "event-item", "event", "calendar-item". - // We extract: date, time, organ, title, type using several heuristics. + // Extract
    blocks, then retain only calendar-item articles (Pattern A). + const articleRe = /]*)>([\s\S]*?)<\/article>/gi; + for (const articleMatch of html.matchAll(articleRe)) { + const attrs = articleMatch[1] ?? ''; + if (!hasCalendarItemClass(attrs)) continue; + const body = articleMatch[2] ?? ''; + const event = parseCalendarArticle(attrs, body); + if (event) events.push(event); + } - // Strategy 1: JSON-LD structured data (most reliable) - const jsonLdRe = /]*type="application\/ld\+json"[^>]*>([\s\S]*?)<\/script>/gi; - for (const m of html.matchAll(jsonLdRe)) { - try { - const raw = m[1]; - if (!raw) continue; - const obj = JSON.parse(raw) as Record; - const items = Array.isArray(obj) ? obj : [obj]; - for (const item of items) { - if (typeof item !== 'object' || item === null) continue; - const ev = item as Record; - if (ev['@type'] === 'Event' || ev['@type'] === 'SocialEvent') { - events.push({ - datum: String(ev['startDate'] ?? ev['startdate'] ?? '').slice(0, 10), - tid: String(ev['startDate'] ?? '').slice(11, 16), - org: String( - (ev['organizer'] as Record)?.['name'] ?? '', - ), - titel: String(ev['name'] ?? ev['headline'] ?? ''), - typ: String(ev['eventType'] ?? ev['category'] ?? ''), - }); - } - } - } catch { - // JSON parse failed — skip this block + // If no articles found, try
  • blocks (Pattern B). + if (events.length === 0) { + const liRe = /]*class=(["'])[^"']*calendar[^"']*\2[^>]*)>([\s\S]*?)<\/li>/gi; + for (const liMatch of html.matchAll(liRe)) { + const attrs = liMatch[1] ?? ''; + const body = liMatch[3] ?? ''; + const event = parseCalendarListItem(attrs, body); + if (event) events.push(event); } } - if (events.length > 0) return events; + return events; +} - // Strategy 2: Scan for common HTML patterns in riksdagen.se - // Event title typically in or

    - const titleRe = /<(?:a|h[23])[^>]*class="[^"]*(?:event-title|calendar-title|event-name)[^"]*"[^>]*>([\s\S]*?)<\/(?:a|h[23])>/gi; - const dateRe = /(?:data-date|datetime)="(\d{4}-\d{2}-\d{2})"/gi; - const timeRe = /(\d{2}:\d{2})/g; +// --------------------------------------------------------------------------- +// HTML parser internals (exported for unit tests) +// --------------------------------------------------------------------------- - const dates = [...html.matchAll(dateRe)].map((m) => m[1] ?? ''); - const titles = [...html.matchAll(titleRe)].map((m) => - // Use [\s\S]*? to match newlines inside tags (prevents incomplete sanitization) - (m[1] ?? '').replace(/<[\s\S]*?>/g, '').trim(), - ); - // Pre-compute time matches once outside the loop (O(N) instead of - // O(titles × times)). Keep an index pointer (`timeCursor`) similar to - // `usedDates`'s sequential walk so each match is consumed at most once. - const allTimes = [...html.matchAll(timeRe)].map((m) => m[1] ?? ''); - - const usedDates = new Set(); - let timeCursor = 0; - - for (let i = 0; i < titles.length; i++) { - const title = titles[i]; - if (!title) continue; - - // Find nearest unused date - let datum = ''; - for (let d = i; d < dates.length; d++) { - if (!usedDates.has(d) && dates[d]) { - datum = dates[d]!; - usedDates.add(d); - break; - } - } +/** Parse an `
    ` calendar item block. */ +export function parseCalendarArticle(attrs: string, body: string): CalendarEvent | null { + const dtstart = extractDatetime(body); + if (!dtstart) return null; - // Consume the next available time match (linear pointer scan) - let tid = ''; - while (timeCursor < allTimes.length) { - const candidate = allTimes[timeCursor++]; - if (candidate) { - tid = candidate; - break; - } - } + const org = + extractDataAttr(attrs, 'organ') ?? + extractDataAttr(attrs, 'org') ?? + extractSpanText(body, 'organ') ?? + extractSpanText(body, 'committee') ?? + ''; - events.push({ datum, tid, org: '', titel: title, typ: '' }); - } + const akt = + extractDataAttr(attrs, 'akt') ?? + extractDataAttr(attrs, 'type') ?? + extractSpanText(body, 'type') ?? + extractSpanText(body, 'akt') ?? + ''; - return events; + const { summary, docRefs } = extractHeadingAndLinks(body); + + return { + dtstart, + org: normalizeOrgCode(decodeHtmlEntities(org)), + akt: normalizeAkt(decodeHtmlEntities(akt)), + summary: decodeHtmlEntities(stripTags(summary).trim()), + doc_refs: docRefs, + source: 'web-fallback', + }; } -async function fetchViaWeb(args: ParsedCalendarArgs): Promise { - const url = new URL(RIKSDAGEN_CALENDAR_URL); - if (args.from) url.searchParams.set('from', args.from); - if (args.tom) url.searchParams.set('tom', args.tom); - if (args.org) url.searchParams.set('org', args.org); - if (args.akt) url.searchParams.set('akt', args.akt); +/** Parse an `
  • ` calendar list item block. */ +export function parseCalendarListItem(attrs: string, body: string): CalendarEvent | null { + const dtstart = extractDatetime(body); + if (!dtstart) return null; - const response = await fetch(url.toString(), { - headers: { 'User-Agent': 'riksdagsmonitor/1.0 (+https://hack23.com)' }, - signal: AbortSignal.timeout(15_000), - }); + // organ can come from a dedicated span or a data attribute. + const org = + extractDataAttr(attrs, 'organ') ?? + extractSpanText(body, 'organ') ?? + extractSpanText(body, 'committee') ?? + ''; - if (!response.ok) { - throw new Error(`web_fallback: HTTP ${response.status} from ${url.toString()}`); - } + const akt = + extractDataAttr(attrs, 'akt') ?? + extractSpanText(body, 'type') ?? + extractSpanText(body, 'akt') ?? + ''; - const html = await response.text(); - return parseCalendarHtml(html); + const { summary, docRefs } = extractHeadingAndLinks(body); + + return { + dtstart, + org: normalizeOrgCode(decodeHtmlEntities(org)), + akt: normalizeAkt(decodeHtmlEntities(akt)), + summary: decodeHtmlEntities(stripTags(summary).trim()), + doc_refs: docRefs, + source: 'web-fallback', + }; } // --------------------------------------------------------------------------- -// Orchestrator — MCP primary → web fallback → graceful empty +// HTML extraction helpers // --------------------------------------------------------------------------- +/** Escape a string for safe use in a `new RegExp(...)` constructor. */ +function escapeRegex(s: string): string { + return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); +} + +/** Extract the `datetime` attribute from a `` anchor in an HTML fragment. */ +function extractFirstAnchorText(html: string): string { + const m = html.match(/]*>([\s\S]*?)<\/a>/i); + return m ? (m[1] ?? '') : ''; +} + +/** True when an href looks like a Riksdag document or proceedings link. */ +function isRiksdagDocumentHref(href: string): boolean { + return ( + href.includes('/dokument') || + href.includes('/betankanden') || + href.includes('/propositioner') || + href.includes('/motioner') || + href.includes('/interpellationer') + ); +} + +/** Remove all HTML tags from a string. */ +function stripTags(html: string): string { + return html.replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' '); +} + +/** Normalize committee/organ codes by collapsing whitespace and trimming only. */ +function normalizeOrgCode(raw: string): string { + return raw.replace(/\s+/g, ' ').trim(); +} +/** Normalize activity type strings to lower-case-with-hyphens. */ +function normalizeAkt(raw: string): string { + return raw + .toLowerCase() + .replace(/\s+/g, '-') + .replace(/[^a-z0-9-åäö]/g, '') + .trim(); +} + +// --------------------------------------------------------------------------- +// Web fallback fetcher +// --------------------------------------------------------------------------- + +/** + * Fetch the Riksdag web calendar for a date range and parse events. + * + * URL: `https://www.riksdagen.se/sv/kalendarium/?from={from}&tom={to}` + */ +export async function fetchWebCalendar( + from: string, + to: string, + config: Required>, +): Promise { + const url = `${config.webBaseUrl}/sv/kalendarium/?from=${encodeURIComponent(from)}&tom=${encodeURIComponent(to)}`; + + const controller = new AbortController(); + const tid = setTimeout(() => controller.abort(), config.timeout); + + let html: string; try { - events = await deps.fetchViaMcp(args); - log(`fetch-calendar: MCP returned ${events.length} event(s)`); - } catch (mcpErr) { - log(`fetch-calendar: MCP failed (${String(mcpErr)}), trying web fallback`); + const response = await config.fetchFn(url, { + signal: controller.signal, + headers: { + Accept: 'text/html,application/xhtml+xml', + 'Accept-Language': 'sv-SE,sv;q=0.9,en;q=0.8', + 'User-Agent': 'riksdagsmonitor-news-bot/1.0 (+https://riksdagsmonitor.com)', + }, + }); + + if (!response.ok) { + throw new Error(`Riksdag web calendar HTTP error: ${response.status} ${response.statusText}`); + } + + html = await response.text(); + } catch (err) { + clearTimeout(tid); + const msg = err instanceof Error ? err.message : String(err); + throw new Error(`Riksdag web calendar fetch failed: ${msg}`, { cause: err }); + } finally { + clearTimeout(tid); } - if (events.length === 0) { - source = 'web_fallback'; + return parseRiksdagKalendariumHtml(html); +} + +// --------------------------------------------------------------------------- +// Sleep helper +// --------------------------------------------------------------------------- + +function defaultSleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +// --------------------------------------------------------------------------- +// Primary → fallback orchestrator +// --------------------------------------------------------------------------- + +/** + * Fetch Riksdag calendar events for the given date range using a + * primary→fallback resilience chain: + * + * 1. **MCP primary**: call `get_calendar_events` on riksdag-regering. + * Retries up to `maxRetries` times on transient failures. + * 2. **Web fallback**: if every MCP attempt returns an HTML error page or a + * network failure, scrape `riksdagen.se/sv/kalendarium/` instead. + * + * @param from ISO 8601 date string (inclusive start, e.g. "2026-04-28"). + * @param to ISO 8601 date string (inclusive end, e.g. "2026-05-04"). + * @param config Optional overrides for URLs, timeout, retries, and fetch mock. + */ +export async function fetchCalendarWithFallback( + from: string, + to: string, + config: CalendarFetchConfig = {}, +): Promise { + const mcpUrl = config.mcpUrl ?? DEFAULT_MCP_URL; + const webBaseUrl = config.webBaseUrl ?? DEFAULT_WEB_BASE_URL; + const timeout = config.timeout ?? DEFAULT_TIMEOUT; + const maxRetries = config.maxRetries ?? DEFAULT_MAX_RETRIES; + const fetchFn = config.fetchFn ?? globalThis.fetch; + const sleepFn = config.sleepFn ?? defaultSleep; + const fetchedAt = new Date().toISOString(); + + const resolved = { mcpUrl, webBaseUrl, timeout, fetchFn, sleepFn }; + + // ── MCP primary path (with retry) ────────────────────────────────────── + let primaryError: string | undefined; + for (let attempt = 0; attempt <= maxRetries; attempt++) { + if (attempt > 0) { + const delay = Math.min(RETRY_BASE_DELAY_MS * Math.pow(2, attempt - 1), 30_000); + console.warn( + ` ⚠️ MCP calendar retry ${attempt}/${maxRetries} after ${delay} ms…`, + ); + await sleepFn(delay); + } + try { - events = await deps.fetchViaWeb(args); - log(`fetch-calendar: web_fallback returned ${events.length} event(s)`); - } catch (webErr) { - log(`fetch-calendar: web_fallback also failed (${String(webErr)}), returning empty`); - events = []; + console.error(` 🔄 [fetch-calendar] MCP primary attempt ${attempt + 1}/${maxRetries + 1}…`); + const raw = await callMcpCalendarEvents(from, to, resolved); + const events = raw.map(normalizeMcpCalendarEvent); + console.error(` ✅ [fetch-calendar] MCP primary succeeded — ${events.length} events`); + + return { + events, + manifest: { + date: from, + dateTo: to, + path: 'mcp-primary', + eventCount: events.length, + fetchedAt, + }, + }; + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + primaryError = msg; + const kind = err instanceof CalendarMcpError ? err.kind : 'unknown'; + console.warn(` ⚠️ [fetch-calendar] MCP attempt ${attempt + 1} failed (${kind}): ${msg.slice(0, 120)}`); + // HTML error is definitive — no point retrying the same endpoint. + if (err instanceof CalendarMcpError && err.kind === 'html') break; } } - return { from: args.from, tom: args.tom, fetchedAt, source, events }; + // ── Web fallback path ────────────────────────────────────────────────── + console.error(` 🔄 [fetch-calendar] Falling back to riksdagen.se/sv/kalendarium/…`); + let fallbackError: string | undefined; + try { + const events = await fetchWebCalendar(from, to, resolved); + console.error(` ✅ [fetch-calendar] Web fallback succeeded — ${events.length} events`); + + return { + events, + manifest: { + date: from, + dateTo: to, + path: 'web-fallback', + eventCount: events.length, + primaryError, + fetchedAt, + }, + }; + } catch (err) { + fallbackError = err instanceof Error ? err.message : String(err); + console.error(` ❌ [fetch-calendar] Web fallback also failed: ${fallbackError}`); + } + + // ── Both paths exhausted ─────────────────────────────────────────────── + return { + events: [], + manifest: { + date: from, + dateTo: to, + path: 'none', + eventCount: 0, + primaryError, + fallbackError, + fetchedAt, + }, + }; } // --------------------------------------------------------------------------- -// Main entry point +// Persistence // --------------------------------------------------------------------------- -async function main(): Promise { - const { args, error } = parseArgs(process.argv.slice(2)); - if (error) { - process.stderr.write(`fetch-calendar: ${error}\n`); - process.exit(2); - } +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const REPO_ROOT = path.resolve(__dirname, '..'); +const CALENDAR_DIR = path.join(REPO_ROOT, 'data', 'calendar'); - const { from, tom, persist } = args; +/** + * Write a `CalendarFetchResult` to `data/calendar/{from}_{to}.json`. + * + * The file is an object with `{ manifest, events }` so that consumers can + * load a single file and get both the data and the provenance record. + * Including `to` in the filename prevents collisions when the same `from` + * date is fetched with different ranges (e.g. week-ahead vs month-ahead). + */ +export function persistCalendarJson( + from: string, + result: CalendarFetchResult, + outputDir: string = CALENDAR_DIR, +): string { + fs.mkdirSync(outputDir, { recursive: true }); + const dateTo = result.manifest.dateTo ?? from; + const fileName = dateTo && dateTo !== from ? `${from}_${dateTo}.json` : `${from}.json`; + const outputPath = path.join(outputDir, fileName); + const payload = { + schema: 'riksdagsmonitor-calendar/1.0', + manifest: result.manifest, + events: result.events, + }; + fs.writeFileSync(outputPath, JSON.stringify(payload, null, 2), 'utf8'); + console.error(` 💾 [fetch-calendar] Persisted ${result.events.length} events → ${outputPath}`); + return outputPath; +} - const client = new MCPClient(); - const output = await fetchCalendarEvents(args, { - fetchViaMcp: (a) => fetchViaMcp(client, a), - fetchViaWeb, - logger: (msg) => process.stderr.write(`${msg}\n`), - }); +// --------------------------------------------------------------------------- +// CLI entry +// --------------------------------------------------------------------------- - process.stdout.write(JSON.stringify(output, null, 2) + '\n'); +/** + * Generate a manifest markdown snippet suitable for appending to + * `data-download-manifest.md`. + */ +export function formatManifestMarkdown(manifest: CalendarFetchManifest): string { + const pathLabel = + manifest.path === 'mcp-primary' + ? '✅ MCP primary (`get_calendar_events`)' + : manifest.path === 'web-fallback' + ? '⚠️ Web fallback (`riksdagen.se/sv/kalendarium/`)' + : '❌ None (both paths failed)'; + + const lines = [ + `## Calendar Fetch — ${manifest.date}`, + '', + `- **Path used**: ${pathLabel}`, + `- **Events**: ${manifest.eventCount}`, + `- **Fetched at**: ${manifest.fetchedAt}`, + ]; + if (manifest.primaryError) { + lines.push(`- **Primary error**: ${manifest.primaryError.slice(0, 200)}`); + } + if (manifest.fallbackError) { + lines.push(`- **Fallback error**: ${manifest.fallbackError.slice(0, 200)}`); + } + return lines.join('\n'); +} - if (persist) { - const calendarDir = path.join(REPO_ROOT, 'analysis', 'data', 'calendar'); - fs.mkdirSync(calendarDir, { recursive: true }); - const outFile = path.join(calendarDir, `${from}_${tom}.json`); - fs.writeFileSync(outFile, JSON.stringify(output, null, 2) + '\n', 'utf8'); - process.stderr.write(`fetch-calendar: persisted → ${path.relative(REPO_ROOT, outFile)}\n`); +/** Thrown by `parseCalendarArgs` for invalid CLI arguments (exit code 2). */ +export class CliArgsError extends Error { + constructor(message: string) { + super(message); + this.name = 'CliArgsError'; } } -// Run if this is the entry point -const isMain = - process.argv[1] !== undefined && - (process.argv[1].endsWith('fetch-calendar.ts') || - process.argv[1].endsWith('fetch-calendar.js')); +/** + * Parse CLI argv into `{ from, to, persist }`. + * + * Accepts `--to` (preferred) and `--tom` (Swedish alias, used in repo docs) + * as the end-date flag. Throws `CliArgsError` for invalid arguments. + */ +export function parseCalendarArgs(argv: readonly string[]): { + from: string; + to: string; + persist: boolean; +} { + const flags = new Map(); + const booleans = new Set(); + for (let i = 0; i < argv.length; i++) { + const token = argv[i]; + if (!token || !token.startsWith('--')) continue; + const key = token.slice(2); + const next = argv[i + 1]; + if (next !== undefined && !next.startsWith('--')) { + flags.set(key, next); + i++; + } else { + booleans.add(key); + } + } + const ISO_DATE_RE = /^\d{4}-\d{2}-\d{2}$/; + const from = flags.get('from') ?? ''; + // Accept both `--to` and `--tom` (Swedish alias used in repo docs). + const to = flags.get('to') ?? flags.get('tom') ?? ''; + if (!ISO_DATE_RE.test(from)) { + throw new CliArgsError(`--from must be an ISO 8601 date (YYYY-MM-DD), got: "${from}"`); + } + if (!ISO_DATE_RE.test(to)) { + throw new CliArgsError(`--to must be an ISO 8601 date (YYYY-MM-DD), got: "${to}"`); + } + return { from, to, persist: booleans.has('persist') }; +} -if (isMain) { - main().catch((err: unknown) => { - process.stderr.write(`fetch-calendar: fatal error: ${String(err)}\n`); +async function main(): Promise { + const args = parseCalendarArgs(process.argv.slice(2)); + console.error(`📅 [fetch-calendar] Fetching ${args.from} → ${args.to}`); + + const result = await fetchCalendarWithFallback(args.from, args.to); + + // Manifest is human-readable status info → stderr, not stdout. + console.error(formatManifestMarkdown(result.manifest)); + + if (args.persist) { + persistCalendarJson(args.from, result); + } else { + // Print JSON to stdout for piping / agentic workflow consumption. + process.stdout.write(`${JSON.stringify(result, null, 2)}\n`); + } + + if (result.manifest.path === 'none') { process.exit(1); + } +} + +// Guard: run `main()` only when this file is the direct entry point. +if (path.resolve(fileURLToPath(import.meta.url)) === path.resolve(process.argv[1] ?? '')) { + main().catch((err: unknown) => { + console.error('❌ [fetch-calendar] Fatal error:', err instanceof Error ? err.message : err); + // Bad CLI arguments → exit code 2 (per module header & repo convention). + process.exit(err instanceof CliArgsError ? 2 : 1); }); } diff --git a/tests/fetch-calendar.test.ts b/tests/fetch-calendar.test.ts index bc7f00ac9..cbe1cf9df 100644 --- a/tests/fetch-calendar.test.ts +++ b/tests/fetch-calendar.test.ts @@ -1,387 +1,989 @@ /** - * @file tests/fetch-calendar.test.ts - * @description Vitest unit tests for fetch-calendar.ts + * Tests for scripts/fetch-calendar.ts + * + * Covers: + * - Primary MCP path (JSON response with `kalender` array) + * - HTML-error response detection + fallback trigger + * - Fallback HTML parser (article and list-item patterns) + * - Retry exhaustion (primary) → fallback + * - Retry exhaustion on both paths → empty result with `path: 'none'` + * - `normalizeMcpCalendarEvent` – field extraction + * - `parseCalendarArgs` – CLI flag parsing + * - `formatManifestMarkdown` – manifest rendering + * - `isHtmlErrorResponse` – HTML detection + * + * No live network calls — all transport is controlled via `fetchFn` injection. + * + * @author Hack23 AB + * @license Apache-2.0 */ -import { describe, it, expect, vi } from 'vitest'; - +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; import { - parseArgs, - parseCalendarHtml, - fetchCalendarEvents, + isHtmlErrorResponse, + callMcpCalendarEvents, + CalendarMcpError, + normalizeMcpCalendarEvent, + parseRiksdagKalendariumHtml, + parseCalendarArticle, + parseCalendarListItem, + fetchCalendarWithFallback, + fetchWebCalendar, + formatManifestMarkdown, + parseCalendarArgs, + persistCalendarJson, + CliArgsError, + type CalendarFetchConfig, type CalendarEvent, - type CalendarOutput, - type FetchCalendarDeps, - type ParsedCalendarArgs, } from '../scripts/fetch-calendar.js'; // --------------------------------------------------------------------------- -// parseArgs tests +// Helpers // --------------------------------------------------------------------------- -describe('parseArgs — fetch-calendar', () => { - it('parses --from and --tom happy path', () => { - const { args, error } = parseArgs(['--from', '2026-04-27', '--tom', '2026-05-27']); - expect(error).toBeNull(); - expect(args.from).toBe('2026-04-27'); - expect(args.tom).toBe('2026-05-27'); - expect(args.org).toBeNull(); - expect(args.akt).toBeNull(); - expect(args.persist).toBe(false); +/** Build a minimal CalendarFetchConfig with all network calls mocked. */ +function makeConfig(overrides: Partial = {}): CalendarFetchConfig { + return { + mcpUrl: 'https://mcp.test/mcp', + webBaseUrl: 'https://riksdagen.test', + timeout: 3_000, + maxRetries: 1, + sleepFn: () => Promise.resolve(), // skip delays + ...overrides, + }; +} + +/** Stub a fetch call that returns a JSON body with the given status. */ +function jsonFetch(body: unknown, status = 200): typeof fetch { + return vi.fn(async (_url, _init) => { + const text = JSON.stringify(body); + return new Response(text, { + status, + headers: { 'Content-Type': 'application/json' }, + }); + }) as unknown as typeof fetch; +} + +/** Stub a fetch call that returns an HTML body (e.g. an error page). */ +function htmlFetch(html: string, status = 200): typeof fetch { + return vi.fn(async (_url, _init) => { + return new Response(html, { + status, + headers: { 'Content-Type': 'text/html' }, + }); + }) as unknown as typeof fetch; +} + +/** Stub a fetch call that always throws a network error. */ +function errorFetch(message = 'network error'): typeof fetch { + return vi.fn(async () => { + throw new Error(message); + }) as unknown as typeof fetch; +} + +// --------------------------------------------------------------------------- +// MCP JSON-RPC fixture helpers +// --------------------------------------------------------------------------- + +/** Wrap events in a valid MCP JSON-RPC 2.0 response envelope. */ +function mcpJsonRpcResponse(events: unknown[]): object { + return { + jsonrpc: '2.0', + id: 1, + result: { + content: [ + { + text: JSON.stringify({ kalender: events }), + }, + ], + }, + }; +} + +/** Build a valid calendar event as returned by the MCP server. */ +function mockMcpEvent(overrides: Record = {}): Record { + return { + dtstart: '2026-04-28T10:00:00', + organ: 'FiU', + akt: 'votering', + summary: 'Slutlig rösträkning — Budget 2026', + dok_id: 'H901FiU10', + ...overrides, + }; +} + +// --------------------------------------------------------------------------- +// isHtmlErrorResponse +// --------------------------------------------------------------------------- + +describe('isHtmlErrorResponse', () => { + it('returns true for a DOCTYPE HTML response', () => { + expect(isHtmlErrorResponse('')).toBe(true); }); - it('parses optional --org, --akt, --persist flags', () => { - const { args, error } = parseArgs([ - '--from', '2026-04-27', - '--tom', '2026-05-27', - '--org', 'UTSK', - '--akt', 'bet', - '--persist', - ]); - expect(error).toBeNull(); - expect(args.org).toBe('UTSK'); - expect(args.akt).toBe('bet'); - expect(args.persist).toBe(true); + it('returns true for a lower-case response', () => { + expect(isHtmlErrorResponse('')).toBe(true); }); - it('returns error when --from is missing', () => { - const { error } = parseArgs(['--tom', '2026-05-27']); - expect(error).not.toBeNull(); - expect(error).toMatch(/--from/); + it('returns true for a bare opening tag', () => { + expect(isHtmlErrorResponse('')).toBe(true); }); - it('returns error when --tom is missing', () => { - const { error } = parseArgs(['--from', '2026-04-27']); - expect(error).not.toBeNull(); - expect(error).toMatch(/--tom/); + it('returns true for an uppercase opening tag', () => { + expect(isHtmlErrorResponse('')).toBe(true); }); - it('returns error for invalid --from date format', () => { - const { error } = parseArgs(['--from', '04/27/2026', '--tom', '2026-05-27']); - expect(error).not.toBeNull(); - expect(error).toMatch(/YYYY-MM-DD/); + it('returns true for a leading tag fragment', () => { + expect(isHtmlErrorResponse('Error')).toBe(true); }); - it('returns error for invalid --tom date format', () => { - const { error } = parseArgs(['--from', '2026-04-27', '--tom', 'next-month']); - expect(error).not.toBeNull(); - expect(error).toMatch(/YYYY-MM-DD/); + it('returns true for a self-closing fragment', () => { + expect(isHtmlErrorResponse('')).toBe(true); }); - it('persist defaults to false when flag is absent', () => { - const { args } = parseArgs(['--from', '2026-04-27', '--tom', '2026-05-27']); - expect(args.persist).toBe(false); + it('returns false for a JSON response', () => { + expect(isHtmlErrorResponse('{"jsonrpc":"2.0","id":1}')).toBe(false); + }); + + it('returns false for an empty string', () => { + expect(isHtmlErrorResponse('')).toBe(false); + }); + + it('returns false for a leading whitespace + JSON response', () => { + expect(isHtmlErrorResponse(' \n{"result":{}} ')).toBe(false); + }); + + it('returns true for whitespace before DOCTYPE', () => { + expect(isHtmlErrorResponse(' \n')).toBe(true); + }); +}); + +// --------------------------------------------------------------------------- +// callMcpCalendarEvents +// --------------------------------------------------------------------------- + +describe('callMcpCalendarEvents', () => { + it('returns events from a valid MCP JSON-RPC `kalender` response', async () => { + const events = [mockMcpEvent(), mockMcpEvent({ dtstart: '2026-04-29T09:00:00' })]; + const config = { + mcpUrl: 'https://mcp.test/mcp', + timeout: 3_000, + fetchFn: jsonFetch(mcpJsonRpcResponse(events)), + }; + + const result = await callMcpCalendarEvents('2026-04-28', '2026-05-04', config); + expect(result).toHaveLength(2); + }); + + it('returns events from a direct `result.kalender` response (no content wrapper)', async () => { + const events = [mockMcpEvent()]; + const config = { + mcpUrl: 'https://mcp.test/mcp', + timeout: 3_000, + fetchFn: jsonFetch({ jsonrpc: '2.0', id: 1, result: { kalender: events } }), + }; + + const result = await callMcpCalendarEvents('2026-04-28', '2026-05-04', config); + expect(result).toHaveLength(1); + }); + + it('returns events from a `result.events` key', async () => { + const events = [mockMcpEvent()]; + const config = { + mcpUrl: 'https://mcp.test/mcp', + timeout: 3_000, + fetchFn: jsonFetch({ jsonrpc: '2.0', id: 1, result: { events } }), + }; + + const result = await callMcpCalendarEvents('2026-04-28', '2026-05-04', config); + expect(result).toHaveLength(1); + }); + + it('throws CalendarMcpError(html) when MCP returns an HTML document', async () => { + const config = { + mcpUrl: 'https://mcp.test/mcp', + timeout: 3_000, + fetchFn: htmlFetch('Error 503'), + }; + + await expect( + callMcpCalendarEvents('2026-04-28', '2026-05-04', config), + ).rejects.toThrow(CalendarMcpError); + + await expect( + callMcpCalendarEvents('2026-04-28', '2026-05-04', config), + ).rejects.toMatchObject({ kind: 'html' }); + }); + + it('throws CalendarMcpError(http) on a non-OK HTTP status with non-HTML body', async () => { + const config = { + mcpUrl: 'https://mcp.test/mcp', + timeout: 3_000, + fetchFn: jsonFetch({ error: 'internal' }, 500), + }; + + await expect( + callMcpCalendarEvents('2026-04-28', '2026-05-04', config), + ).rejects.toMatchObject({ kind: 'http' }); + }); + + it('throws CalendarMcpError(network) on a fetch network error', async () => { + const config = { + mcpUrl: 'https://mcp.test/mcp', + timeout: 3_000, + fetchFn: errorFetch('ECONNREFUSED'), + }; + + await expect( + callMcpCalendarEvents('2026-04-28', '2026-05-04', config), + ).rejects.toMatchObject({ kind: 'network' }); + }); + + it('throws CalendarMcpError(tool) when the JSON-RPC response has an `error` field', async () => { + const config = { + mcpUrl: 'https://mcp.test/mcp', + timeout: 3_000, + fetchFn: jsonFetch({ + jsonrpc: '2.0', + id: 1, + error: { code: -32000, message: 'Tool execution failed' }, + }), + }; + + await expect( + callMcpCalendarEvents('2026-04-28', '2026-05-04', config), + ).rejects.toMatchObject({ kind: 'tool' }); + }); + + it('returns an empty array when `result.kalender` is an empty array', async () => { + const config = { + mcpUrl: 'https://mcp.test/mcp', + timeout: 3_000, + fetchFn: jsonFetch(mcpJsonRpcResponse([])), + }; + + const result = await callMcpCalendarEvents('2026-04-28', '2026-05-04', config); + expect(result).toEqual([]); + }); + + it('returns an empty array when `result` has no recognised events key', async () => { + const config = { + mcpUrl: 'https://mcp.test/mcp', + timeout: 3_000, + fetchFn: jsonFetch({ jsonrpc: '2.0', id: 1, result: { something: 'else' } }), + }; + + const result = await callMcpCalendarEvents('2026-04-28', '2026-05-04', config); + expect(result).toEqual([]); }); }); // --------------------------------------------------------------------------- -// parseCalendarHtml tests +// normalizeMcpCalendarEvent // --------------------------------------------------------------------------- -describe('parseCalendarHtml', () => { - it('returns empty array for blank HTML', () => { - expect(parseCalendarHtml('')).toHaveLength(0); - expect(parseCalendarHtml('')).toHaveLength(0); +describe('normalizeMcpCalendarEvent', () => { + it('maps standard MCP event fields', () => { + const raw = { + dtstart: '2026-04-28T10:00:00', + organ: 'FiU', + akt: 'votering', + summary: 'Budget-omröstning', + dok_id: 'H901FiU10', + }; + const event = normalizeMcpCalendarEvent(raw); + expect(event.dtstart).toBe('2026-04-28T10:00:00'); + expect(event.org).toBe('FiU'); + expect(event.akt).toBe('votering'); + expect(event.summary).toBe('Budget-omröstning'); + expect(event.doc_refs).toContain('H901FiU10'); + expect(event.source).toBe('mcp-primary'); + }); + + it('handles upper-case DTSTART / SUMMARY keys', () => { + const raw = { + DTSTART: '2026-04-29T14:00:00', + SUMMARY: 'Utskottsmöte', + organ: 'NU', + akt: 'utskottsmöte', + }; + const event = normalizeMcpCalendarEvent(raw); + expect(event.dtstart).toBe('2026-04-29T14:00:00'); + expect(event.summary).toBe('Utskottsmöte'); + }); + + it('includes dtend when present', () => { + const raw = { + dtstart: '2026-04-28T10:00:00', + dtend: '2026-04-28T12:00:00', + organ: 'KU', + akt: 'beredning', + summary: 'Konstitutionsutskottets beredning', + }; + const event = normalizeMcpCalendarEvent(raw); + expect(event.dtend).toBe('2026-04-28T12:00:00'); + }); + + it('collects multiple doc_refs from array fields', () => { + const raw = { + dtstart: '2026-04-28T10:00:00', + organ: 'FiU', + akt: 'debatt', + summary: 'Plenidebatt', + url: ['https://riksdagen.se/dokument/H901FiU1', 'https://riksdagen.se/dokument/H901FiU2'], + }; + const event = normalizeMcpCalendarEvent(raw); + expect(event.doc_refs).toHaveLength(2); + }); + + it('handles null / undefined gracefully', () => { + const event = normalizeMcpCalendarEvent(null); + expect(event.dtstart).toBe(''); + expect(event.org).toBe(''); + expect(event.doc_refs).toEqual([]); + expect(event.source).toBe('mcp-primary'); }); - it('extracts events from JSON-LD structured data', () => { + it('omits dtend when not present in raw event', () => { + const raw = { + dtstart: '2026-04-28T10:00:00', + organ: 'KU', + akt: 'beredning', + summary: 'Konstitutionsutskottets möte', + // dtend intentionally absent + }; + const event = normalizeMcpCalendarEvent(raw); + expect(event.dtend).toBeUndefined(); + expect(event.dtstart).toBe('2026-04-28T10:00:00'); + }); +}); + +// --------------------------------------------------------------------------- +// parseRiksdagKalendariumHtml + parseCalendarArticle + parseCalendarListItem +// --------------------------------------------------------------------------- + +describe('parseRiksdagKalendariumHtml', () => { + it('parses article pattern events', () => { const html = ` - - - - - -`; - - const events = parseCalendarHtml(html); - expect(events.length).toBeGreaterThanOrEqual(1); - const ev = events[0]!; - expect(ev.titel).toBe('Finansutskottets öppna utfrågning'); - expect(ev.datum).toBe('2026-05-05'); - expect(ev.tid).toBe('10:00'); - expect(ev.org).toBe('Finansutskottet'); - expect(ev.typ).toBe('Utfrågning'); - }); - - it('extracts events from multiple JSON-LD Event objects', () => { + +
    + +

    Näringspolitik

    +
    + `; + const events = parseRiksdagKalendariumHtml(html); + expect(events).toHaveLength(2); + expect(events[0]?.dtstart).toBe('2026-04-28T10:00:00'); + expect(events[0]?.org).toBe('FiU'); + expect(events[0]?.akt).toBe('votering'); + expect(events[0]?.summary).toContain('Budget 2026'); + expect(events[0]?.doc_refs).toContain('/sv/dokument-och-lagar/utskottens-arbete/betankanden/H901FiU1/'); + expect(events[0]?.source).toBe('web-fallback'); + expect(events[1]?.dtstart).toBe('2026-04-28T13:00:00'); + expect(events[1]?.org).toBe('NU'); + }); + + it('falls back to list-item pattern when no articles found', () => { const html = ` -`; +
      +
    • + + KU + Beredning + KU-beredning +
    • +
    + `; + const events = parseRiksdagKalendariumHtml(html); + expect(events).toHaveLength(1); + expect(events[0]?.dtstart).toBe('2026-04-29T09:00:00'); + expect(events[0]?.org).toBe('KU'); + expect(events[0]?.source).toBe('web-fallback'); + }); - const events = parseCalendarHtml(html); - expect(events.length).toBeGreaterThanOrEqual(2); - const titles = events.map((e) => e.titel); - expect(titles).toContain('Event A'); - expect(titles).toContain('Event B'); + it('returns empty array for HTML with no recognisable calendar markup', () => { + const html = '

    No events today.

    '; + expect(parseRiksdagKalendariumHtml(html)).toEqual([]); }); - it('falls back gracefully when JSON-LD parse fails', () => { - // Malformed JSON-LD should not throw - const html = ``; - expect(() => parseCalendarHtml(html)).not.toThrow(); + it('ignores non-calendar article blocks even when they contain time elements', () => { + const html = ` +
    + +

    Pressmeddelande som inte är kalenderhändelse

    +
    + `; + expect(parseRiksdagKalendariumHtml(html)).toEqual([]); }); - it('extracts events using HTML title patterns when no JSON-LD present', () => { + it('parses calendar-item articles when class attribute uses single quotes', () => { const html = ` - -
    - Debatt om budgeten - -
    -`; - - // Should not throw; events may or may not be found depending on HTML pattern - expect(() => parseCalendarHtml(html)).not.toThrow(); - const events = parseCalendarHtml(html); - expect(Array.isArray(events)).toBe(true); +
    + +

    Finansdebatt

    +
    + `; + const events = parseRiksdagKalendariumHtml(html); + expect(events).toHaveLength(1); + expect(events[0]?.org).toBe('FiU'); + expect(events[0]?.summary).toContain('Finansdebatt'); + }); +}); + +describe('parseCalendarArticle', () => { + it('returns null when no datetime found', () => { + const result = parseCalendarArticle('data-akt="debatt"', '

    No time element

    '); + expect(result).toBeNull(); + }); + + it('extracts organ and akt from data attributes', () => { + const body = `

    Test

    `; + const event = parseCalendarArticle('data-organ="SoU" data-akt="debatt"', body); + expect(event?.org).toBe('SoU'); + expect(event?.akt).toBe('debatt'); + }); + + it('falls back to span text for org and akt when data attributes absent', () => { + const body = ` + + CU + Utskottsmöte +

    Civilutskottets möte

    + `; + const event = parseCalendarArticle('', body); + expect(event?.org).toBe('CU'); + }); +}); + +describe('parseCalendarListItem', () => { + it('returns null when no datetime found', () => { + const result = parseCalendarListItem('', 'FiU'); + expect(result).toBeNull(); + }); + + it('extracts all fields from a well-formed list item', () => { + const body = ` + + JuU + Votering + JuU-betänkande + `; + const event = parseCalendarListItem('', body); + expect(event?.dtstart).toBe('2026-05-02T14:00:00'); + expect(event?.org).toBe('JuU'); + expect(event?.doc_refs).toContain('/sv/dokument-och-lagar/betankanden/H901JuU10/'); + expect(event?.source).toBe('web-fallback'); }); }); // --------------------------------------------------------------------------- -// fetchCalendarEvents — orchestrator (real logic exercised via injected deps) +// fetchWebCalendar // --------------------------------------------------------------------------- -const baseArgs: ParsedCalendarArgs = { - from: '2026-04-27', - tom: '2026-05-27', - org: null, - akt: null, - persist: false, -}; +describe('fetchWebCalendar', () => { + it('fetches and parses a calendar page with article events', async () => { + const html = ` + + `; + const config = { + webBaseUrl: 'https://riksdagen.test', + timeout: 3_000, + fetchFn: htmlFetch(html), + }; + + const events = await fetchWebCalendar('2026-04-28', '2026-05-04', config); + expect(events).toHaveLength(1); + expect(events[0]?.dtstart).toBe('2026-04-28T10:00:00'); + expect(events[0]?.source).toBe('web-fallback'); + }); -function makeDeps(overrides: Partial): FetchCalendarDeps { - return { - fetchViaMcp: overrides.fetchViaMcp ?? (async () => []), - fetchViaWeb: overrides.fetchViaWeb ?? (async () => []), - now: overrides.now ?? (() => new Date('2026-04-27T00:00:00.000Z')), - logger: overrides.logger, - }; -} + it('throws on a non-OK HTTP response', async () => { + const config = { + webBaseUrl: 'https://riksdagen.test', + timeout: 3_000, + fetchFn: htmlFetch('Not Found', 404), + }; -describe('fetchCalendarEvents — MCP primary path', () => { - it('source is "mcp" when MCP returns events and web is never called', async () => { - const mcpEvents: CalendarEvent[] = [ - { datum: '2026-05-05', tid: '10:00', org: 'FiU', titel: 'Utfrågning', typ: 'Öppet' }, - ]; - const webSpy = vi.fn(async () => [] as CalendarEvent[]); - - const out = await fetchCalendarEvents( - baseArgs, - makeDeps({ fetchViaMcp: async () => mcpEvents, fetchViaWeb: webSpy }), - ); - - expect(out.source).toBe('mcp'); - expect(out.events).toEqual(mcpEvents); - expect(webSpy).not.toHaveBeenCalled(); - expect(out.fetchedAt).toMatch(/^\d{4}-\d{2}-\d{2}T/); - }); - - it('falls back to web when MCP throws', async () => { - const webEvents: CalendarEvent[] = [ - { datum: '2026-05-06', tid: '14:00', org: 'KU', titel: 'Möte', typ: '' }, - ]; - - const out = await fetchCalendarEvents( - baseArgs, - makeDeps({ - fetchViaMcp: async () => { - throw new Error('mcp down'); - }, - fetchViaWeb: async () => webEvents, - }), - ); + await expect( + fetchWebCalendar('2026-04-28', '2026-05-04', config), + ).rejects.toThrow(/HTTP error: 404/); + }); + + it('throws on a network fetch failure', async () => { + const config = { + webBaseUrl: 'https://riksdagen.test', + timeout: 3_000, + fetchFn: errorFetch('EHOSTUNREACH'), + }; - expect(out.source).toBe('web_fallback'); - expect(out.events).toEqual(webEvents); + await expect( + fetchWebCalendar('2026-04-28', '2026-05-04', config), + ).rejects.toThrow(/EHOSTUNREACH/); }); }); -describe('fetchCalendarEvents — web fallback path', () => { - it('source is "web_fallback" when MCP returns empty', async () => { - const webEvents: CalendarEvent[] = [ - { datum: '2026-05-07', tid: '09:00', org: 'AU', titel: 'Debatt', typ: '' }, - ]; - - const out = await fetchCalendarEvents( - baseArgs, - makeDeps({ - fetchViaMcp: async () => [], - fetchViaWeb: async () => webEvents, - }), - ); +// --------------------------------------------------------------------------- +// fetchCalendarWithFallback +// --------------------------------------------------------------------------- + +describe('fetchCalendarWithFallback', () => { + let sleepCount: number; + let sleepFn: (ms: number) => Promise; - expect(out.source).toBe('web_fallback'); - expect(out.events).toEqual(webEvents); + beforeEach(() => { + sleepCount = 0; + sleepFn = async () => { sleepCount++; }; }); - it('gracefully degrades to empty events array when web fetch fails', async () => { - const out = await fetchCalendarEvents( - baseArgs, - makeDeps({ - fetchViaMcp: async () => [], - fetchViaWeb: async () => { - throw new Error('web down'); - }, - }), - ); + afterEach(() => { + vi.clearAllMocks(); + }); + + it('succeeds via MCP primary path and records manifest correctly', async () => { + const rawEvents = [mockMcpEvent()]; + const config = makeConfig({ + fetchFn: jsonFetch(mcpJsonRpcResponse(rawEvents)), + sleepFn, + }); + + const result = await fetchCalendarWithFallback('2026-04-28', '2026-05-04', config); + + expect(result.manifest.path).toBe('mcp-primary'); + expect(result.manifest.eventCount).toBe(1); + expect(result.manifest.primaryError).toBeUndefined(); + expect(result.events).toHaveLength(1); + expect(result.events[0]?.source).toBe('mcp-primary'); + expect(result.events[0]?.org).toBe('FiU'); + }); + + it('triggers web fallback when MCP returns HTML error page', async () => { + // MCP returns an HTML error page; web returns a valid calendar HTML page. + const webHtml = ` +
    + +

    Utrikespolitik

    +
    + `; + + let callCount = 0; + const fetchFn = vi.fn(async (url: RequestInfo | URL) => { + callCount++; + const urlStr = String(url); + // Use exact hostname match rather than substring to avoid incomplete-URL checks. + const hostname = (() => { try { return new URL(urlStr).hostname; } catch { return ''; } })(); + if (hostname === 'mcp.test') { + // MCP endpoint returns HTML error + return new Response('503 Service Unavailable', { + status: 200, // MCP sometimes returns 200 with HTML body + headers: { 'Content-Type': 'text/html' }, + }); + } + // Web fallback endpoint returns calendar HTML + return new Response(webHtml, { + status: 200, + headers: { 'Content-Type': 'text/html' }, + }); + }) as unknown as typeof fetch; + + const config = makeConfig({ fetchFn, sleepFn }); + const result = await fetchCalendarWithFallback('2026-04-28', '2026-05-04', config); + + expect(result.manifest.path).toBe('web-fallback'); + expect(result.manifest.primaryError).toMatch(/HTML/i); + expect(result.events).toHaveLength(1); + expect(result.events[0]?.source).toBe('web-fallback'); + expect(result.events[0]?.org).toBe('UU'); + expect(callCount).toBeGreaterThanOrEqual(2); // at least 1 MCP + 1 web + }); + + it('retries MCP on network error before falling back', async () => { + const webHtml = ` +
    + +

    Budget

    +
    + `; + + let callCount = 0; + const fetchFn = vi.fn(async (url: RequestInfo | URL) => { + callCount++; + const urlStr = String(url); + const hostname = (() => { try { return new URL(urlStr).hostname; } catch { return ''; } })(); + if (hostname === 'mcp.test') { + throw new Error('ECONNREFUSED'); + } + return new Response(webHtml, { status: 200, headers: { 'Content-Type': 'text/html' } }); + }) as unknown as typeof fetch; + + const config = makeConfig({ fetchFn, sleepFn, maxRetries: 1 }); + const result = await fetchCalendarWithFallback('2026-04-28', '2026-05-04', config); + + // With maxRetries=1 there are 2 MCP attempts (attempt 0 + 1 retry) before fallback + expect(result.manifest.path).toBe('web-fallback'); + expect(result.manifest.primaryError).toBeDefined(); + expect(result.events).toHaveLength(1); + expect(sleepCount).toBeGreaterThanOrEqual(1); // at least one sleep between retries + }); + + it('returns path=none when both paths fail after retry exhaustion', async () => { + const fetchFn = errorFetch('ETIMEDOUT'); + const config = makeConfig({ fetchFn, sleepFn, maxRetries: 1 }); + + const result = await fetchCalendarWithFallback('2026-04-28', '2026-05-04', config); - expect(out.source).toBe('web_fallback'); - expect(out.events).toEqual([]); - expect(out.fetchedAt).toMatch(/^\d{4}-\d{2}-\d{2}T/); + expect(result.manifest.path).toBe('none'); + expect(result.manifest.eventCount).toBe(0); + expect(result.manifest.primaryError).toBeDefined(); + expect(result.manifest.fallbackError).toBeDefined(); + expect(result.events).toEqual([]); }); - it('preserves args.from / args.tom on the output', async () => { - const out = await fetchCalendarEvents( - { ...baseArgs, from: '2026-06-01', tom: '2026-06-30' }, - makeDeps({}), - ); - expect(out.from).toBe('2026-06-01'); - expect(out.tom).toBe('2026-06-30'); + it('manifest includes correct `date` and `dateTo` fields', async () => { + const config = makeConfig({ + fetchFn: jsonFetch(mcpJsonRpcResponse([])), + sleepFn, + }); + + const result = await fetchCalendarWithFallback('2026-05-01', '2026-05-31', config); + expect(result.manifest.date).toBe('2026-05-01'); + expect(result.manifest.dateTo).toBe('2026-05-31'); + expect(result.manifest.fetchedAt).toMatch(/^\d{4}-\d{2}-\d{2}T/); + }); + + it('does not sleep before the first MCP attempt', async () => { + const config = makeConfig({ + fetchFn: jsonFetch(mcpJsonRpcResponse([mockMcpEvent()])), + sleepFn, + }); + await fetchCalendarWithFallback('2026-04-28', '2026-05-04', config); + expect(sleepCount).toBe(0); + }); + + it('does not retry HTML errors (immediate fallback)', async () => { + let mcpCallCount = 0; + let webCallCount = 0; + const fetchFn = vi.fn(async (url: RequestInfo | URL) => { + const hostname = (() => { try { return new URL(String(url)).hostname; } catch { return ''; } })(); + if (hostname === 'mcp.test') { + mcpCallCount++; + return new Response('Error', { status: 200 }); + } + webCallCount++; + return new Response('No calendar', { status: 200 }); + }) as unknown as typeof fetch; + + const config = makeConfig({ fetchFn, sleepFn, maxRetries: 2 }); + const result = await fetchCalendarWithFallback('2026-04-28', '2026-05-04', config); + + // HTML error should trigger immediate fallback — no retries on MCP. + expect(mcpCallCount).toBe(1); + expect(webCallCount).toBe(1); + expect(result.manifest.path).toBe('web-fallback'); }); }); -// Backwards-compatibility shape assertions (object literal, no logic) -describe('CalendarOutput object shape', () => { - it('accepts a fully-populated CalendarOutput literal', () => { - const out: CalendarOutput = { - from: '2026-04-27', - tom: '2026-05-27', - fetchedAt: new Date().toISOString(), - source: 'mcp', - events: [], - }; - expect(out.source).toBe('mcp'); +// --------------------------------------------------------------------------- +// parseCalendarArgs +// --------------------------------------------------------------------------- + +describe('parseCalendarArgs', () => { + it('parses --from and --to flags', () => { + const args = parseCalendarArgs(['--from', '2026-04-28', '--to', '2026-05-04']); + expect(args.from).toBe('2026-04-28'); + expect(args.to).toBe('2026-05-04'); + expect(args.persist).toBe(false); + }); + + it('sets persist=true when --persist flag is present', () => { + const args = parseCalendarArgs(['--from', '2026-04-28', '--to', '2026-05-04', '--persist']); + expect(args.persist).toBe(true); + }); + + it('throws when --from is missing', () => { + expect(() => parseCalendarArgs(['--to', '2026-05-04'])).toThrow(/--from/); + }); + + it('throws when --to is missing', () => { + expect(() => parseCalendarArgs(['--from', '2026-04-28'])).toThrow(/--to/); + }); + + it('throws when date format is invalid', () => { + expect(() => + parseCalendarArgs(['--from', '28-04-2026', '--to', '2026-05-04']), + ).toThrow(/ISO 8601/); + }); + + it('accepts --tom as an alias for --to (Swedish, used in repo docs)', () => { + const args = parseCalendarArgs(['--from', '2026-04-28', '--tom', '2026-05-04']); + expect(args.from).toBe('2026-04-28'); + expect(args.to).toBe('2026-05-04'); + }); + + it('prefers --to over --tom when both are provided', () => { + const args = parseCalendarArgs([ + '--from', '2026-04-28', + '--to', '2026-05-04', + '--tom', '2026-05-31', + ]); + expect(args.to).toBe('2026-05-04'); + }); + + it('throws CliArgsError (typed) for invalid arguments', () => { + expect(() => parseCalendarArgs(['--to', '2026-05-04'])).toThrow(CliArgsError); + expect(() => + parseCalendarArgs(['--from', 'bogus', '--to', '2026-05-04']), + ).toThrow(CliArgsError); }); }); // --------------------------------------------------------------------------- -// HTML error response fixture +// formatManifestMarkdown // --------------------------------------------------------------------------- -describe('HTML error response handling', () => { - it('parseCalendarHtml handles 404-style HTML body gracefully', () => { - const notFoundHtml = ` - - -404 – Sidan hittades inte - -

    Sidan hittades inte

    -

    Den begärda sidan kunde inte hittas.

    - -`; - - // Should not throw; should return empty or near-empty events - expect(() => parseCalendarHtml(notFoundHtml)).not.toThrow(); - const events = parseCalendarHtml(notFoundHtml); - expect(Array.isArray(events)).toBe(true); - }); - - it('parseCalendarHtml handles server-error HTML gracefully', () => { - const errorHtml = ` - - -500 Internal Server Error - -

    Internal Server Error

    - -`; - - expect(() => parseCalendarHtml(errorHtml)).not.toThrow(); - const events = parseCalendarHtml(errorHtml); - expect(Array.isArray(events)).toBe(true); - }); - - it('parseCalendarHtml handles empty string without crashing', () => { - expect(() => parseCalendarHtml('')).not.toThrow(); - expect(parseCalendarHtml('')).toEqual([]); +describe('formatManifestMarkdown', () => { + it('formats a successful MCP primary manifest', () => { + const md = formatManifestMarkdown({ + date: '2026-04-28', + dateTo: '2026-05-04', + path: 'mcp-primary', + eventCount: 5, + fetchedAt: '2026-04-28T06:00:00Z', + }); + expect(md).toContain('MCP primary'); + expect(md).toContain('**Events**: 5'); + expect(md).not.toContain('error'); + }); + + it('formats a web fallback manifest with primary error', () => { + const md = formatManifestMarkdown({ + date: '2026-04-28', + dateTo: '2026-05-04', + path: 'web-fallback', + eventCount: 3, + primaryError: 'MCP returned HTML instead of JSON', + fetchedAt: '2026-04-28T06:00:00Z', + }); + expect(md).toContain('Web fallback'); + expect(md).toContain('Primary error'); + expect(md).toContain('MCP returned HTML'); + }); + + it('formats a none (both failed) manifest', () => { + const md = formatManifestMarkdown({ + date: '2026-04-28', + dateTo: '2026-05-04', + path: 'none', + eventCount: 0, + primaryError: 'ECONNREFUSED', + fallbackError: 'EHOSTUNREACH', + fetchedAt: '2026-04-28T06:00:00Z', + }); + expect(md).toContain('None'); + expect(md).toContain('Fallback error'); + }); +}); + +// --------------------------------------------------------------------------- +// CalendarMcpError +// --------------------------------------------------------------------------- + +describe('CalendarMcpError', () => { + it('has the correct name and kind', () => { + const err = new CalendarMcpError('test error', 'html', 'error'); + expect(err.name).toBe('CalendarMcpError'); + expect(err.kind).toBe('html'); + expect(err.responseText).toBe('error'); + expect(err).toBeInstanceOf(Error); + }); + + it('correctly identifies all error kinds', () => { + for (const kind of ['html', 'http', 'network', 'json', 'tool'] as const) { + const err = new CalendarMcpError(`${kind} error`, kind); + expect(err.kind).toBe(kind); + } }); }); // --------------------------------------------------------------------------- -// Output structure validation +// Edge-case integration: MCP succeeds on second attempt // --------------------------------------------------------------------------- -describe('CalendarOutput structure', () => { - it('output has all required fields', () => { - const output: CalendarOutput = { - from: '2026-04-27', - tom: '2026-05-27', - fetchedAt: '2026-04-27T10:00:00.000Z', - source: 'mcp', +describe('fetchCalendarWithFallback – MCP succeeds on retry', () => { + it('succeeds on the second MCP attempt without triggering fallback', async () => { + let callCount = 0; + const sleepFn = async () => {}; + const rawEvents = [mockMcpEvent()]; + const fetchFn = vi.fn(async () => { + callCount++; + if (callCount === 1) { + // First attempt: transient network error + throw new Error('ECONNRESET'); + } + // Second attempt: success + return new Response(JSON.stringify(mcpJsonRpcResponse(rawEvents)), { + status: 200, + headers: { 'Content-Type': 'application/json' }, + }); + }) as unknown as typeof fetch; + + const config = makeConfig({ fetchFn, sleepFn, maxRetries: 1 }); + const result = await fetchCalendarWithFallback('2026-04-28', '2026-05-04', config); + + expect(result.manifest.path).toBe('mcp-primary'); + expect(result.events).toHaveLength(1); + expect(callCount).toBe(2); + }); +}); + +// --------------------------------------------------------------------------- +// CalendarEvent shape validation +// --------------------------------------------------------------------------- + +describe('CalendarEvent shape', () => { + it('MCP-normalized events have all required fields', () => { + const raw = mockMcpEvent({ dtend: '2026-04-28T12:00:00' }); + const event: CalendarEvent = normalizeMcpCalendarEvent(raw); + + expect(typeof event.dtstart).toBe('string'); + expect(typeof event.org).toBe('string'); + expect(typeof event.akt).toBe('string'); + expect(typeof event.summary).toBe('string'); + expect(Array.isArray(event.doc_refs)).toBe(true); + expect(event.source).toBe('mcp-primary'); + expect(event.dtend).toBe('2026-04-28T12:00:00'); + }); + + it('web-fallback events have all required fields', () => { + const html = ` + + `; + const [event] = parseRiksdagKalendariumHtml(html); + expect(event).toBeDefined(); + if (!event) return; + expect(typeof event.dtstart).toBe('string'); + expect(typeof event.org).toBe('string'); + expect(typeof event.akt).toBe('string'); + expect(typeof event.summary).toBe('string'); + expect(Array.isArray(event.doc_refs)).toBe(true); + expect(event.source).toBe('web-fallback'); + }); +}); + +// --------------------------------------------------------------------------- +// persistCalendarJson – filesystem persistence +// --------------------------------------------------------------------------- + +describe('persistCalendarJson', () => { + let tmpDir: string; + + beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'fetch-calendar-test-')); + }); + + afterEach(() => { + fs.rmSync(tmpDir, { recursive: true, force: true }); + }); + + it('creates the output directory and writes a JSON file', () => { + const outputDir = path.join(tmpDir, 'calendar'); + const result = { + manifest: { + path: 'mcp-primary' as const, + date: '2026-04-28', + dateTo: '2026-04-28', + eventCount: 1, + fetchedAt: '2026-04-28T00:00:00.000Z', + }, events: [ { - datum: '2026-05-05', - tid: '10:00', + dtstart: '2026-04-28T10:00:00', org: 'FiU', - titel: 'Utfrågning om statsbudgeten', - typ: 'Öppet', + akt: 'debatt', + summary: 'Test event', + doc_refs: [], + source: 'mcp-primary' as const, }, ], }; - expect(output).toHaveProperty('from'); - expect(output).toHaveProperty('tom'); - expect(output).toHaveProperty('fetchedAt'); - expect(output).toHaveProperty('source'); - expect(output).toHaveProperty('events'); - expect(Array.isArray(output.events)).toBe(true); - }); - - it('event has all required fields', () => { - const event = { - datum: '2026-05-05', - tid: '10:00', - org: 'FiU', - titel: 'Utfrågning', - typ: 'Öppet', + const outPath = persistCalendarJson('2026-04-28', result, outputDir); + + expect(fs.existsSync(outPath)).toBe(true); + expect(outPath).toBe(path.join(outputDir, '2026-04-28.json')); + }); + + it('written file contains correct schema, manifest, and events', () => { + const outputDir = path.join(tmpDir, 'calendar'); + const event: CalendarEvent = { + dtstart: '2026-04-28T10:00:00', + org: 'KU', + akt: 'votering', + summary: 'Omröstning', + doc_refs: ['/sv/dokument-och-lagar/betankanden/KU10/'], + source: 'web-fallback', + }; + const result = { + manifest: { + path: 'web-fallback' as const, + date: '2026-04-28', + dateTo: '2026-04-28', + eventCount: 1, + fetchedAt: '2026-04-28T01:00:00.000Z', + primaryError: 'HTML error page', + }, + events: [event], }; - expect(event).toHaveProperty('datum'); - expect(event).toHaveProperty('tid'); - expect(event).toHaveProperty('org'); - expect(event).toHaveProperty('titel'); - expect(event).toHaveProperty('typ'); + persistCalendarJson('2026-04-28', result, outputDir); + + const content = JSON.parse( + fs.readFileSync(path.join(outputDir, '2026-04-28.json'), 'utf8'), + ) as Record; + expect(content['schema']).toBe('riksdagsmonitor-calendar/1.0'); + expect(content['manifest']).toEqual(result.manifest); + expect(content['events']).toEqual([event]); }); - it('source must be "mcp" or "web_fallback"', () => { - const validSources: string[] = ['mcp', 'web_fallback']; - const output: CalendarOutput = { - from: '2026-04-27', - tom: '2026-05-27', - fetchedAt: new Date().toISOString(), - source: 'mcp', + it('returns the output file path', () => { + const outputDir = path.join(tmpDir, 'calendar'); + const result = { + manifest: { path: 'none' as const, date: '2026-05-01', dateTo: '2026-05-01', eventCount: 0, fetchedAt: '2026-04-28T00:00:00.000Z' }, events: [], }; - expect(validSources).toContain(output.source); + const outPath = persistCalendarJson('2026-05-01', result, outputDir); + expect(outPath).toBe(path.join(outputDir, '2026-05-01.json')); }); - it('fetchedAt is a valid ISO timestamp', () => { - const output: CalendarOutput = { - from: '2026-04-27', - tom: '2026-05-27', - fetchedAt: new Date().toISOString(), - source: 'web_fallback', + it('uses {from}_{dateTo}.json when range spans multiple days', () => { + const outputDir = path.join(tmpDir, 'calendar'); + const result = { + manifest: { + path: 'mcp-primary' as const, + date: '2026-04-28', + dateTo: '2026-05-04', + eventCount: 0, + fetchedAt: '2026-04-28T00:00:00.000Z', + }, events: [], }; - expect(output.fetchedAt).toMatch(/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}/); - // `new Date(string)` never throws (it returns Invalid Date), so use - // Date.parse to actually validate the timestamp. - expect(Number.isNaN(Date.parse(output.fetchedAt))).toBe(false); + const outPath = persistCalendarJson('2026-04-28', result, outputDir); + expect(outPath).toBe(path.join(outputDir, '2026-04-28_2026-05-04.json')); + expect(fs.existsSync(outPath)).toBe(true); }); });