diff --git a/.github/skills/myndigheter-monitoring/SKILL.md b/.github/skills/myndigheter-monitoring/SKILL.md index e01b8d5b42..e37f3bad30 100644 --- a/.github/skills/myndigheter-monitoring/SKILL.md +++ b/.github/skills/myndigheter-monitoring/SKILL.md @@ -237,6 +237,81 @@ interviews (5 labor economists), stakeholder statements* - **Stakeholder voices** - Include citizens, experts, civil society - **Public interest** - Agencies serve citizens, not themselves +## Statskontoret Data Integration + +Statskontoret (Swedish Agency for Public Management) publishes open data that provides +authoritative, Admiralty-A1 ground truth for government-body context. Use this data +**before** relying on estimates or secondary sources when writing about agency headcounts, +organisational structures or central-government budget execution. + +### Available Datasets + +| Dataset key | Title | Cadence | Primary use | +|-------------|-------|---------|-------------| +| `myndighetsforteckning` | Myndighetsförteckning — öppna data | Annual | Headcount by department & leadership form (2007–present) | +| `arsutfall` | Årsutfall för statens budget — öppna data | Annual | Annual budget outturn by appropriation & agency | +| `manadsutfall` | Månadsutfall för statens budget — öppna data | Monthly | High-frequency budget-execution monitoring | +| `budget-time-series` | Tidsserier, statens budget m.m. | Annual | Long-run central-government budget context (1995+) | + +### How to Fetch (agentic workflows) + +The cached library helper is invoked from TypeScript code (see "Cached Fetch Module" +below). For ad-hoc CLI use, the `statskontoret-fetch.ts` wrapper is the entrypoint: + +```bash +# CLI: list every built-in Statskontoret source +tsx scripts/statskontoret-fetch.ts list-sources + +# CLI: discover downloadable files for a source +tsx scripts/statskontoret-fetch.ts discover --source myndighetsforteckning + +# CLI: fetch + parse headcount workbook +tsx scripts/statskontoret-fetch.ts headcount --url --persist + +# CLI: fetch + parse budget-outturn workbook +tsx scripts/statskontoret-fetch.ts budget-outturn --source arsutfall --url --doc-type Inkomst --persist +``` + +### Cached Fetch Module (`scripts/fetch-statskontoret.ts`) + +The `fetch-statskontoret.ts` module provides a **30-day TTL cache layer** over the raw +HTTP client, making it suitable for agentic workflows that run daily but should only +re-download large Excel workbooks every 30 days: + +```typescript +import { fetchStatskontoretCached, isStatskontoretCacheFresh } from './fetch-statskontoret.js'; + +// Check cache freshness without a network call +if (!isStatskontoretCacheFresh('myndighetsforteckning')) { + const payload = await fetchStatskontoretCached('myndighetsforteckning'); + // payload.fromCache === false → fresh download + // payload.links → array of StatskontoretDownloadLink (Excel URLs) +} +``` + +On network failure the module automatically falls back to the most recent stale cache +entry, ensuring workflows remain resilient to temporary outages. + +### Data Provenance Rule + +Any implementation-feasibility or agency-context analysis that names a Swedish +government body **must** annotate the headcount or budget figure with a +Statskontoret source citation: + +```markdown +*Headcount source: Statskontoret Myndighetsförteckning 2025 +(analysis/data/statskontoret/myndighetsforteckning/) [A1]* +``` + +Admiralty grade for own-Statskontoret publications: **A1** (official statistics, +primary public record). + +### Network Allowlist + +`www.statskontoret.se` and `statskontoret.se` are included in the `network.allowed` +list of all 11 `news-*.md` agentic workflow files. No additional configuration is +required. + ## References - [Swedish Agency Directory](https://www.regeringen.se/regeringens-politik/myndigheter-under-regeringen/) @@ -245,6 +320,9 @@ interviews (5 labor economists), stakeholder statements* - [OECD Public Administration Reviews](https://www.oecd.org/governance/) - [Transparency International Sweden](https://www.transparency.se/) - [Swedish Agency for Public Management (Statskontoret)](https://www.statskontoret.se/) +- [Statskontoret Indicators Inventory](../../../analysis/statskontoret/indicators-inventory.json) +- [fetch-statskontoret.ts](../../../scripts/fetch-statskontoret.ts) — 30-day cache module +- [statskontoret-client.ts](../../../scripts/statskontoret-client.ts) — HTTP client library --- diff --git a/analysis/statskontoret/indicators-inventory.json b/analysis/statskontoret/indicators-inventory.json index f059b8d4d4..df03b781c4 100644 --- a/analysis/statskontoret/indicators-inventory.json +++ b/analysis/statskontoret/indicators-inventory.json @@ -8,6 +8,7 @@ "clients": { "cli": "tsx scripts/statskontoret-fetch.ts (commands: list-sources, discover, headcount, budget-outturn)", "library": "scripts/statskontoret-client.ts (StatskontoretClient class)", + "cachedFetch": "scripts/fetch-statskontoret.ts (fetchStatskontoretCached — 30-day TTL cache layer for agentic workflows)", "persistence": "scripts/parliamentary-data/data-persistence.ts (persistStatskontoretData)" }, "notes": { diff --git a/scripts/fetch-statskontoret.ts b/scripts/fetch-statskontoret.ts new file mode 100644 index 0000000000..c6870a74b4 --- /dev/null +++ b/scripts/fetch-statskontoret.ts @@ -0,0 +1,246 @@ +/** + * @module scripts/fetch-statskontoret + * @description Cached fetch module for Statskontoret open data, providing a + * 30-day TTL cache layer over {@link StatskontoretClient}. + * + * This module is intended for use by agentic workflows that need Statskontoret + * context (authority register, budget outturn) without re-downloading large + * Excel/ZIP files on every run. It follows the same no-MCP client pattern as + * `imf-context.ts` and `scb-context.ts`. + * + * ### Cache behaviour + * - Cache root: `analysis/data/statskontoret//cache/` + * - TTL: 30 days (configurable via the `cacheTtlMs` option) + * - On hit: returns the cached payload with provenance metadata + * - On miss or stale: invokes `StatskontoretClient.discoverDownloads()` and + * persists the result before returning + * - On fetch error: falls back to the most recent stale cache entry (resilience) + * + * ### Security + * Fetch calls go only to `https://www.statskontoret.se` (enforced by + * `assertStatskontoretFetchTarget` inside `StatskontoretClient`). No + * credentials are required; all data is PUBLIC classification. + * + * @see analysis/statskontoret/indicators-inventory.json + * @see scripts/statskontoret-client.ts (low-level HTTP + parse) + * @see scripts/statskontoret-fetch.ts (CLI entry-point) + * @author Hack23 AB + * @license Apache-2.0 + */ + +import fs from 'node:fs'; +import path from 'node:path'; +import { fileURLToPath } from 'node:url'; + +import { + getStatskontoretSource, + STATSKONTORET_SOURCES, + StatskontoretClient, + StatskontoretError, + type StatskontoretClientConfig, + type StatskontoretDownloadLink, + type StatskontoretSourceKey, +} from './statskontoret-client.js'; + +// --------------------------------------------------------------------------- +// Constants +// --------------------------------------------------------------------------- + +const __filename = fileURLToPath(import.meta.url); +const REPO_ROOT = path.resolve(path.dirname(__filename), '..'); + +/** Default 30-day cache TTL in milliseconds (30 days × 24 h × 60 min × 60 s × 1000 ms). */ +export const CACHE_TTL_MS = 30 * 24 * 60 * 60 * 1000; + +/** Root directory for cached Statskontoret payloads. */ +export const STATSKONTORET_CACHE_ROOT = path.join( + REPO_ROOT, + 'analysis', + 'data', + 'statskontoret', +); + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +/** A cached Statskontoret downloads payload with provenance metadata. */ +export interface StatskontoretCachedPayload { + readonly sourceKey: StatskontoretSourceKey; + readonly sourceTitle: string; + readonly sourceUrl: string; + readonly links: readonly StatskontoretDownloadLink[]; + readonly cachedAt: string; + readonly fetchedAt: string; + readonly fromCache: boolean; + readonly cacheAgeMs: number; +} + +/** Options for {@link fetchStatskontoretCached}. */ +export interface FetchStatskontoretCachedOptions { + /** Override the 30-day TTL (milliseconds). Mainly for testing. */ + readonly cacheTtlMs?: number; + /** Override the cache root directory. Mainly for testing. */ + readonly cacheRoot?: string; + /** Override the `StatskontoretClient` configuration (e.g. inject a mock fetch). */ + readonly clientConfig?: StatskontoretClientConfig; +} + +/** Internal cache file format. */ +interface CacheEntry { + readonly fetchedAt: string; + readonly sourceKey: StatskontoretSourceKey; + readonly links: StatskontoretDownloadLink[]; +} + +// --------------------------------------------------------------------------- +// Private helpers +// --------------------------------------------------------------------------- + +function cacheDir(sourceKey: StatskontoretSourceKey, cacheRoot: string): string { + return path.join(cacheRoot, sourceKey, 'cache'); +} + +function cacheFilePath(sourceKey: StatskontoretSourceKey, cacheRoot: string): string { + return path.join(cacheDir(sourceKey, cacheRoot), 'downloads.json'); +} + +function readCacheEntry(filePath: string): CacheEntry | undefined { + try { + const raw = fs.readFileSync(filePath, 'utf-8'); + return JSON.parse(raw) as CacheEntry; + } catch { + return undefined; + } +} + +function writeCacheEntry(filePath: string, entry: CacheEntry): void { + const dir = path.dirname(filePath); + fs.mkdirSync(dir, { recursive: true }); + fs.writeFileSync(filePath, JSON.stringify(entry, null, 2), 'utf-8'); +} + +function isCacheFresh(fetchedAt: string, ttlMs: number): boolean { + const age = Date.now() - new Date(fetchedAt).getTime(); + return age < ttlMs; +} + +// --------------------------------------------------------------------------- +// Public API +// --------------------------------------------------------------------------- + +/** + * Fetch Statskontoret download links for a given source key, using a 30-day + * file-system cache. + * + * @param sourceKey - The Statskontoret source to fetch + * (`myndighetsforteckning`, `arsutfall`, `manadsutfall`, `budget-time-series`). + * @param options - Optional TTL, cache-root and client overrides. + * @returns A {@link StatskontoretCachedPayload} with links and provenance info. + * + * @example + * ```ts + * const payload = await fetchStatskontoretCached('myndighetsforteckning'); + * console.log(`Found ${payload.links.length} download links (fromCache=${payload.fromCache})`); + * ``` + */ +export async function fetchStatskontoretCached( + sourceKey: StatskontoretSourceKey, + options: FetchStatskontoretCachedOptions = {}, +): Promise { + const { + cacheTtlMs = CACHE_TTL_MS, + cacheRoot = STATSKONTORET_CACHE_ROOT, + clientConfig = {}, + } = options; + + const source = getStatskontoretSource(sourceKey); + const filePath = cacheFilePath(sourceKey, cacheRoot); + + // --- Cache hit --- + const cached = readCacheEntry(filePath); + if (cached !== undefined && isCacheFresh(cached.fetchedAt, cacheTtlMs)) { + const cacheAgeMs = Date.now() - new Date(cached.fetchedAt).getTime(); + return { + sourceKey, + sourceTitle: source.title, + sourceUrl: source.url, + links: cached.links, + cachedAt: cached.fetchedAt, + fetchedAt: cached.fetchedAt, + fromCache: true, + cacheAgeMs, + }; + } + + // --- Cache miss or stale: fetch from origin --- + const client = new StatskontoretClient(clientConfig); + let links: StatskontoretDownloadLink[]; + let fetchedAt: string; + + try { + links = await client.discoverDownloads(sourceKey); + // Stamp provenance after the fetch completes so `fetchedAt` reflects when + // the data was actually retrieved, not when the request was issued. + fetchedAt = new Date().toISOString(); + writeCacheEntry(filePath, { fetchedAt, sourceKey, links }); + } catch (error) { + // --- Resilience: return stale cache on fetch failure --- + if (cached !== undefined) { + const cacheAgeMs = Date.now() - new Date(cached.fetchedAt).getTime(); + return { + sourceKey, + sourceTitle: source.title, + sourceUrl: source.url, + links: cached.links, + cachedAt: cached.fetchedAt, + fetchedAt: cached.fetchedAt, + fromCache: true, + cacheAgeMs, + }; + } + const detail = error instanceof Error ? error.message : String(error); + throw new StatskontoretError( + `fetch-statskontoret: failed to fetch ${sourceKey} and no cache available: ${detail}`, + 'http', + { cause: error }, + ); + } + + return { + sourceKey, + sourceTitle: source.title, + sourceUrl: source.url, + links, + cachedAt: fetchedAt, + fetchedAt, + fromCache: false, + cacheAgeMs: 0, + }; +} + +/** + * Check whether a fresh cache entry exists for the given source key without + * triggering a network fetch. + * + * @param sourceKey - The Statskontoret source to check. + * @param options - Optional TTL and cache-root overrides. + * @returns `true` if a fresh cache entry exists, `false` otherwise. + */ +export function isStatskontoretCacheFresh( + sourceKey: StatskontoretSourceKey, + options: Pick = {}, +): boolean { + const { cacheTtlMs = CACHE_TTL_MS, cacheRoot = STATSKONTORET_CACHE_ROOT } = options; + const filePath = cacheFilePath(sourceKey, cacheRoot); + const cached = readCacheEntry(filePath); + return cached !== undefined && isCacheFresh(cached.fetchedAt, cacheTtlMs); +} + +/** + * Return the list of all built-in Statskontoret source keys. + * Useful for iterating over all sources in agentic workflows. + */ +export function statskontoretSourceKeys(): readonly StatskontoretSourceKey[] { + return STATSKONTORET_SOURCES.map((s) => s.key); +} diff --git a/tests/statskontoret-feasibility-contract.test.ts b/tests/statskontoret-feasibility-contract.test.ts new file mode 100644 index 0000000000..617b9a4042 --- /dev/null +++ b/tests/statskontoret-feasibility-contract.test.ts @@ -0,0 +1,347 @@ +/** + * Statskontoret to implementation-feasibility contract tests. + * + * Asserts that the Statskontoret data infrastructure is correctly configured + * to provide government-body coverage for any Swedish agency named in + * implementation-feasibility.md files under analysis/daily. + * + * @author Hack23 AB + * @license Apache-2.0 + */ + +import { describe, it, expect } from 'vitest'; +import fs from 'node:fs'; +import path from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { mkdtempSync, rmSync, writeFileSync, mkdirSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { + fetchStatskontoretCached, + isStatskontoretCacheFresh, + statskontoretSourceKeys, + CACHE_TTL_MS, + type StatskontoretCachedPayload, +} from '../scripts/fetch-statskontoret.js'; +import { STATSKONTORET_SOURCES } from '../scripts/statskontoret-client.js'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); +const REPO_ROOT = path.resolve(__dirname, '..'); + +// --------------------------------------------------------------------------- +// Known major Swedish government agencies (Myndigheter) +// Drawn from the myndigheter-monitoring skill's "Key Swedish Agencies" section. +// --------------------------------------------------------------------------- + +const KNOWN_AGENCIES: readonly string[] = [ + 'Skatteverket', + 'Arbetsförmedlingen', + 'Försäkringskassan', + 'Polismyndigheten', + 'Migrationsverket', + 'Trafikverket', + 'Naturvårdsverket', + 'Socialstyrelsen', + 'Skolverket', + 'Finansinspektionen', + 'Riksgäldskontoret', + 'Ekonomistyrningsverket', + 'Pensionsmyndigheten', + 'Folkhälsomyndigheten', + 'Kriminalvården', + 'Boverket', + 'Energimyndigheten', + 'Konkurrensverket', + 'Statskontoret', +]; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +interface StatskontoretInventoryDataset { + primaryUse: string; + committees?: string[]; + admiralty?: string; + [key: string]: unknown; +} + +interface StatskontoretInventory { + version: string; + source: string; + classification: string; + datasets: Record; + providerDecisionMatrix: Record; +} + +function readInventory(): StatskontoretInventory { + return JSON.parse( + fs.readFileSync( + path.resolve(REPO_ROOT, 'analysis/statskontoret/indicators-inventory.json'), + 'utf-8', + ), + ) as StatskontoretInventory; +} + +/** Return all implementation-feasibility.md files under analysis/daily/ */ +function findFeasibilityFiles(): string[] { + const dailyDir = path.join(REPO_ROOT, 'analysis', 'daily'); + if (!fs.existsSync(dailyDir)) return []; + + const results: string[] = []; + function walk(dir: string): void { + for (const entry of fs.readdirSync(dir, { withFileTypes: true })) { + const full = path.join(dir, entry.name); + if (entry.isDirectory()) { + walk(full); + } else if (entry.name === 'implementation-feasibility.md') { + results.push(full); + } + } + } + walk(dailyDir); + return results; +} + +/** Extract agency names mentioned in a markdown file. */ +function extractAgencyMentions(content: string, agencies: readonly string[]): string[] { + return agencies.filter((agency) => content.includes(agency)); +} + +// --------------------------------------------------------------------------- +// Tests: inventory coverage +// --------------------------------------------------------------------------- + +describe('Statskontoret inventory → implementation-feasibility coverage contract', () => { + const inv = readInventory(); + const feasibilityFiles = findFeasibilityFiles(); + + it('inventory has myndighetsforteckning dataset for government-body coverage', () => { + expect(inv.datasets['myndighetsforteckning']).toBeDefined(); + expect(inv.datasets['myndighetsforteckning'].primaryUse).toMatch(/[Hh]eadcount|government bodies/); + }); + + it('myndighetsforteckning is classified A1 (highest data quality)', () => { + expect(inv.datasets['myndighetsforteckning'].admiralty).toBe('A1'); + }); + + it('at least one implementation-feasibility.md file in the analysis tree ' + + 'mentions a known Swedish agency (otherwise the per-file coverage test is vacuous)', () => { + const filesWithMentions = feasibilityFiles.filter((filePath) => { + const content = fs.readFileSync(filePath, 'utf-8'); + return extractAgencyMentions(content, KNOWN_AGENCIES).length > 0; + }); + expect( + filesWithMentions.length, + 'No implementation-feasibility.md file references any known Swedish agency. ' + + 'Either the analysis corpus is empty or KNOWN_AGENCIES is misconfigured.', + ).toBeGreaterThan(0); + }); + + it('every implementation-feasibility.md mentioning a known agency resolves to ' + + 'a Statskontoret dataset that covers it via myndighetsforteckning', () => { + // Since myndighetsforteckning covers ALL Swedish government bodies by + // definition, one dataset entry suffices for all named agencies. This + // test enforces the contract per-file: every file mentioning an agency + // is recorded with the exact agencies it cites, and the inventory must + // serve that file via the myndighetsforteckning dataset. + const perFileCoverage: Array<{ file: string; agencies: string[]; covered: boolean }> = []; + + for (const filePath of feasibilityFiles) { + const content = fs.readFileSync(filePath, 'utf-8'); + const mentioned = extractAgencyMentions(content, KNOWN_AGENCIES); + if (mentioned.length === 0) continue; + + const covered = inv.datasets['myndighetsforteckning'] !== undefined; + perFileCoverage.push({ + file: path.relative(REPO_ROOT, filePath), + agencies: mentioned, + covered, + }); + } + + const uncovered = perFileCoverage.filter((entry) => !entry.covered); + expect( + uncovered, + `Statskontoret inventory is missing myndighetsforteckning coverage for:\n` + + uncovered.map((u) => ` - ${u.file} (mentions: ${u.agencies.join(', ')})`).join('\n'), + ).toHaveLength(0); + + // Sanity: confirm we actually recorded coverage for at least one file — + // protects against the prior version that always passed even when no + // file mentioned any agency. + expect(perFileCoverage.length, 'expected at least one feasibility file to mention a known agency').toBeGreaterThan(0); + }); + + it('inventory globally covers FiU and KU committees in at least one Statskontoret dataset', () => { + // Collect all committees covered across all datasets. + const coveredCommittees = new Set(); + for (const dataset of Object.values(inv.datasets)) { + for (const committee of dataset.committees ?? []) { + coveredCommittees.add(committee); + } + } + + // Structural sanity check: FiU (Finance) and KU (Constitution) are the + // committees most likely to need Statskontoret context for agency analysis. + expect(coveredCommittees.has('FiU')).toBe(true); + expect(coveredCommittees.has('KU')).toBe(true); + }); + + it('inventory providerDecisionMatrix maps governmentBodiesHeadcount to statskontoret', () => { + expect(inv.providerDecisionMatrix['governmentBodiesHeadcount']).toMatch(/^statskontoret:/); + }); + + it('found at least one implementation-feasibility.md file in the analysis tree', () => { + // Guard: if there are zero files, subsequent tests are vacuously true and could hide issues. + expect(feasibilityFiles.length).toBeGreaterThan(0); + }); +}); + +// --------------------------------------------------------------------------- +// Tests: fetch-statskontoret module API contract +// --------------------------------------------------------------------------- + +describe('fetch-statskontoret module — API contract', () => { + it('exports CACHE_TTL_MS equal to 30 days', () => { + const thirtyDaysMs = 30 * 24 * 60 * 60 * 1000; + expect(CACHE_TTL_MS).toBe(thirtyDaysMs); + }); + + it('statskontoretSourceKeys() returns all built-in source keys', () => { + const keys = statskontoretSourceKeys(); + const expected = STATSKONTORET_SOURCES.map((s) => s.key); + expect(keys).toEqual(expected); + expect(keys.length).toBeGreaterThanOrEqual(4); + }); + + it('isStatskontoretCacheFresh returns false when no cache file exists', () => { + const tmpDir = mkdtempSync(path.join(tmpdir(), 'sk-cache-test-')); + try { + const fresh = isStatskontoretCacheFresh('myndighetsforteckning', { cacheRoot: tmpDir }); + expect(fresh).toBe(false); + } finally { + rmSync(tmpDir, { recursive: true, force: true }); + } + }); + + it('isStatskontoretCacheFresh returns true when a fresh cache file exists', () => { + const tmpDir = mkdtempSync(path.join(tmpdir(), 'sk-cache-test-')); + try { + const cacheDir = path.join(tmpDir, 'myndighetsforteckning', 'cache'); + mkdirSync(cacheDir, { recursive: true }); + const entry = { + fetchedAt: new Date().toISOString(), + sourceKey: 'myndighetsforteckning', + links: [], + }; + writeFileSync(path.join(cacheDir, 'downloads.json'), JSON.stringify(entry), 'utf-8'); + + const fresh = isStatskontoretCacheFresh('myndighetsforteckning', { cacheRoot: tmpDir }); + expect(fresh).toBe(true); + } finally { + rmSync(tmpDir, { recursive: true, force: true }); + } + }); + + it('isStatskontoretCacheFresh returns false when cache entry is older than TTL', () => { + const tmpDir = mkdtempSync(path.join(tmpdir(), 'sk-cache-test-')); + try { + const cacheDir = path.join(tmpDir, 'arsutfall', 'cache'); + mkdirSync(cacheDir, { recursive: true }); + // Timestamp 31 days ago + const staleDate = new Date(Date.now() - 31 * 24 * 60 * 60 * 1000).toISOString(); + const entry = { fetchedAt: staleDate, sourceKey: 'arsutfall', links: [] }; + writeFileSync(path.join(cacheDir, 'downloads.json'), JSON.stringify(entry), 'utf-8'); + + const fresh = isStatskontoretCacheFresh('arsutfall', { cacheRoot: tmpDir }); + expect(fresh).toBe(false); + } finally { + rmSync(tmpDir, { recursive: true, force: true }); + } + }); + + it('fetchStatskontoretCached returns cached payload from disk without network call', async () => { + const tmpDir = mkdtempSync(path.join(tmpdir(), 'sk-cache-test-')); + try { + const cacheDir = path.join(tmpDir, 'manadsutfall', 'cache'); + mkdirSync(cacheDir, { recursive: true }); + const now = new Date().toISOString(); + const mockLink = { + source: 'manadsutfall' as const, + sourcePage: 'https://www.statskontoret.se/analys-och-statistik/oppna-data/manadsutfall/', + href: '/OpenData/GetFile?fileType=Excel&fileName=test.xlsx', + url: 'https://www.statskontoret.se/OpenData/GetFile?fileType=Excel&fileName=test.xlsx', + text: 'Excel (5 kB)', + resourceType: 'excel' as const, + documentType: 'Inkomst', + year: 2026, + month: 3, + }; + const entry = { fetchedAt: now, sourceKey: 'manadsutfall', links: [mockLink] }; + writeFileSync(path.join(cacheDir, 'downloads.json'), JSON.stringify(entry), 'utf-8'); + + // Pass a clientConfig with a failing fetchFn to confirm no network call happens + const failingFetch = async (): Promise => { + throw new Error('network call should not happen on cache hit'); + }; + + const payload: StatskontoretCachedPayload = await fetchStatskontoretCached('manadsutfall', { + cacheRoot: tmpDir, + clientConfig: { fetchFn: failingFetch as typeof fetch }, + }); + + expect(payload.fromCache).toBe(true); + expect(payload.links).toHaveLength(1); + expect(payload.sourceKey).toBe('manadsutfall'); + expect(payload.cacheAgeMs).toBeGreaterThanOrEqual(0); + } finally { + rmSync(tmpDir, { recursive: true, force: true }); + } + }); + + it('fetchStatskontoretCached falls back to stale cache on network error', async () => { + const tmpDir = mkdtempSync(path.join(tmpdir(), 'sk-cache-test-')); + try { + const cacheDir = path.join(tmpDir, 'arsutfall', 'cache'); + mkdirSync(cacheDir, { recursive: true }); + // Stale cache (31 days old) + const staleDate = new Date(Date.now() - 31 * 24 * 60 * 60 * 1000).toISOString(); + const entry = { fetchedAt: staleDate, sourceKey: 'arsutfall', links: [] }; + writeFileSync(path.join(cacheDir, 'downloads.json'), JSON.stringify(entry), 'utf-8'); + + const failingFetch = async (): Promise => { + throw new Error('simulated network failure'); + }; + + const payload = await fetchStatskontoretCached('arsutfall', { + cacheRoot: tmpDir, + clientConfig: { fetchFn: failingFetch as typeof fetch }, + }); + + // Should fall back to the stale cache + expect(payload.fromCache).toBe(true); + expect(payload.links).toHaveLength(0); + } finally { + rmSync(tmpDir, { recursive: true, force: true }); + } + }); + + it('fetchStatskontoretCached throws StatskontoretError when network fails and no cache exists', async () => { + const tmpDir = mkdtempSync(path.join(tmpdir(), 'sk-cache-test-')); + try { + const failingFetch = async (): Promise => { + throw new Error('simulated network failure'); + }; + + await expect( + fetchStatskontoretCached('budget-time-series', { + cacheRoot: tmpDir, + clientConfig: { fetchFn: failingFetch as typeof fetch }, + }), + ).rejects.toThrow(/no cache available/); + } finally { + rmSync(tmpDir, { recursive: true, force: true }); + } + }); +}); diff --git a/tests/statskontoret-inventory.test.ts b/tests/statskontoret-inventory.test.ts index 95e15c805c..b7353a3047 100644 --- a/tests/statskontoret-inventory.test.ts +++ b/tests/statskontoret-inventory.test.ts @@ -45,9 +45,11 @@ describe('analysis/statskontoret/indicators-inventory.json', () => { expect(inv.providerDecisionMatrix.centralGovernmentBudgetMonthlyOutturn).toBe('statskontoret:manadsutfall'); }); - it('documents the client, CLI and persistence surfaces', () => { + it('documents the client, CLI, cachedFetch and persistence surfaces', () => { expect(inv.clients.cli).toContain('scripts/statskontoret-fetch.ts'); expect(inv.clients.library).toContain('scripts/statskontoret-client.ts'); + expect(inv.clients.cachedFetch).toContain('scripts/fetch-statskontoret.ts'); + expect(inv.clients.cachedFetch).toContain('30-day'); expect(inv.clients.persistence).toContain('persistStatskontoretData'); }); });