|
15 | 15 | * Usage: |
16 | 16 | * npx tsx scripts/download-parliamentary-data.ts [--date YYYY-MM-DD] [--limit N] |
17 | 17 | * npx tsx scripts/download-parliamentary-data.ts --aggregate weekly [--date YYYY-WNN] |
| 18 | + * npx tsx scripts/download-parliamentary-data.ts --auto-full-text-top-n 2 |
18 | 19 | * |
19 | 20 | * @see analysis/methodologies/ai-driven-analysis-guide.md |
20 | 21 | * @author Hack23 AB |
@@ -62,6 +63,7 @@ export function parseArgs(argv: string[]): { |
62 | 63 | rm: string | null; |
63 | 64 | docType: DocumentTypeKey | null; |
64 | 65 | documentIds: string[]; |
| 66 | + autoFullTextTopN: number | null; |
65 | 67 | } { |
66 | 68 | const args = argv.slice(2); |
67 | 69 | const get = (flag: string): string | null => { |
@@ -146,7 +148,21 @@ export function parseArgs(argv: string[]): { |
146 | 148 | }) |
147 | 149 | : []; |
148 | 150 |
|
149 | | - return { date: isoDate, aggregate, limit, weekLabel, rm, docType, documentIds }; |
| 151 | + // --auto-full-text-top-n: Override the per-type full-text enrichment limit. |
| 152 | + // When set, only the top N documents per type receive fetchDocumentDetails |
| 153 | + // (full-text) enrichment, enabling more targeted significance-scoring input. |
| 154 | + // Defaults to MAX_ENRICHMENT_PER_TYPE when omitted (null → caller uses default). |
| 155 | + const autoFullTextTopNArg = get('--auto-full-text-top-n'); |
| 156 | + let autoFullTextTopN: number | null = null; |
| 157 | + if (autoFullTextTopNArg !== null) { |
| 158 | + const parsed = Number(autoFullTextTopNArg); |
| 159 | + if (!Number.isInteger(parsed) || parsed < 0) { |
| 160 | + throw new Error(`Invalid --auto-full-text-top-n value: ${autoFullTextTopNArg}. Expected a non-negative integer.`); |
| 161 | + } |
| 162 | + autoFullTextTopN = parsed; |
| 163 | + } |
| 164 | + |
| 165 | + return { date: isoDate, aggregate, limit, weekLabel, rm, docType, documentIds, autoFullTextTopN }; |
150 | 166 | } |
151 | 167 |
|
152 | 168 | function isoWeekNumber(date: Date): number { |
@@ -372,8 +388,9 @@ async function runPreArticleAnalysis(opts: { |
372 | 388 | rm: string | null; |
373 | 389 | docType: DocumentTypeKey | null; |
374 | 390 | documentIds: string[]; |
| 391 | + autoFullTextTopN: number | null; |
375 | 392 | }): Promise<void> { |
376 | | - const { date, limit, aggregate, weekLabel, rm, docType, documentIds } = opts; |
| 393 | + const { date, limit, aggregate, weekLabel, rm, docType, documentIds, autoFullTextTopN } = opts; |
377 | 394 |
|
378 | 395 | if (aggregate && weekLabel) { |
379 | 396 | console.log(`\n📅 Running weekly data summary for: ${weekLabel}`); |
@@ -403,10 +420,17 @@ async function runPreArticleAnalysis(opts: { |
403 | 420 | const client = new MCPClient(); |
404 | 421 | const resolvedRm = rm ?? riksMoteFromDate(date); |
405 | 422 |
|
406 | | - const downloadOpts: { limit: number; rm: string; docTypes?: DocumentTypeKey[] } = { limit, rm: resolvedRm }; |
| 423 | + const downloadOpts: { limit: number; rm: string; docTypes?: DocumentTypeKey[]; enrichLimit?: number } = { limit, rm: resolvedRm }; |
407 | 424 | if (docType) { |
408 | 425 | downloadOpts.docTypes = [docType]; |
409 | 426 | } |
| 427 | + // --auto-full-text-top-n wires the CLI flag into the per-type enrichment |
| 428 | + // limit, enabling more targeted full-text fetching for significance scoring. |
| 429 | + // When null, downloadAllDocuments uses MAX_ENRICHMENT_PER_TYPE (5) by default. |
| 430 | + if (autoFullTextTopN !== null) { |
| 431 | + downloadOpts.enrichLimit = autoFullTextTopN; |
| 432 | + console.log(` 📝 Full-text enrichment: top ${autoFullTextTopN} documents per type (--auto-full-text-top-n=${autoFullTextTopN})`); |
| 433 | + } |
410 | 434 |
|
411 | 435 | const { data, manifest } = await downloadAllDocuments(client, downloadOpts); |
412 | 436 | const flattenedDocs = flattenDocuments(data); |
@@ -537,6 +561,11 @@ async function runPreArticleAnalysis(opts: { |
537 | 561 | console.log(' - analysis/methodologies/ai-driven-analysis-guide.md'); |
538 | 562 | console.log(' - analysis/templates/ (per-file analysis templates)'); |
539 | 563 | console.log(' - npx tsx scripts/catalog-downloaded-data.ts --pending-only'); |
| 564 | + if (autoFullTextTopN !== null && autoFullTextTopN > 0) { |
| 565 | + console.log(` ℹ️ Significance-scoring note: top-${autoFullTextTopN} documents per type have full text`); |
| 566 | + console.log(' available (contentFetched=true) — AI significance-scoring step'); |
| 567 | + console.log(' should prioritise those documents for deeper analysis.'); |
| 568 | + } |
540 | 569 | } |
541 | 570 |
|
542 | 571 | // --------------------------------------------------------------------------- |
|
0 commit comments