riksdagsmonitor/analysis/methodologies/reference-quality-thresholds.json at main · Hack23/riksdagsmonitor · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
{
  "version": "1.5",
  "effectiveDate": "2026-05-03",
  "description": "Per-artifact minimum line-count floors used to enforce Rule 22 (Per-Artifact Depth Floors) of analysis/methodologies/ai-driven-analysis-guide.md for Riksdagsmonitor agentic news workflows. Values are calibrated to the DIW Output Matrix and the Riksdag 23-artifact catalog (see artifact-catalog.md). v1.5 (2026-05-03) adds the aiFirst block (citationDensity, bannedPhrases pointer, pass2Attestation, wepLanguageCeiling, exemplarPolicy) consumed by the AI-FIRST Methodology Card in every analysis/methodologies/*.md and by the Pass-2 self-audit in methodology-reflection.md. v1.4 (2026-05-01) adds quarter-ahead / year-ahead / election-cycle blocks for the long-horizon-forecast workflows; values derived proportionally from month-ahead × the tier-C multiplier (1.7×, 2.0×, 2.5×) defined in analysis/article-types.json. A file below its threshold MUST trigger a Pass-2 rewrite; it MUST NOT be shipped with a '[truncated]' or 'AI_MUST_REPLACE' note. When an articleType entry is absent the gate falls back to the default floor. When a per-artifact entry is present the effective floor is max(perArtifact, default), so the default can only raise, never lower, a configured floor.",
  "tradecraftQualitySignals": {
    "note": "ADDITIVE quality signals — NOT enforced by the line-count validator. They represent content-quality expectations that the Pass-2 self-audit in methodology-reflection.md must check. Every probabilistic artifact MUST also comply with osint-tradecraft-standards.md: (1) WEP band + time horizon on every headline judgement, (2) Admiralty grade on every external source, (3) confidence-in-evidence tracked separately from WEP probability, (4) >= 10 SATs applied per run (attested in methodology-reflection.md §Evidence audit). Non-compliant artifacts trigger a Pass-2 rewrite even if the line-count floor is met.",
    "wepBandRequired": [
      "synthesis-summary.md",
      "scenario-analysis.md",
      "threat-analysis.md",
      "risk-assessment.md",
      "intelligence-assessment.md",
      "forward-indicators.md",
      "cross-run-diff.md",
      "cross-session-intelligence.md"
    ],
    "admiraltyGradeRequired": [
      "synthesis-summary.md",
      "scenario-analysis.md",
      "threat-analysis.md",
      "risk-assessment.md",
      "intelligence-assessment.md",
      "devils-advocate.md",
      "comparative-international.md",
      "cross-run-diff.md"
    ],
    "icd203BlufRequired": [
      "synthesis-summary.md",
      "intelligence-assessment.md",
      "methodology-reflection.md"
    ],
    "satDocumentationRequired": [
      "methodology-reflection.md",
      "devils-advocate.md"
    ],
    "diwScoreRequired": [
      "significance-scoring.md",
      "documents/{dok_id}-analysis.md"
    ],
    "partyNeutralityArithmeticRequired": [
      "synthesis-summary.md",
      "swot-analysis.md",
      "risk-assessment.md",
      "stakeholder-impact.md",
      "media-framing-analysis.md",
      "voter-segmentation.md",
      "coalition-mathematics.md"
    ]
  },
  "referenceBenchmark": {
    "description": "Gold-standard reference run used to calibrate depth floors. Re-selected quarterly from the highest-scoring production run of the preceding quarter as measured by reference-analysis-quality.md.",
    "runDir": "analysis/daily/[TBD-reference-benchmark]/",
    "runId": "[TBD]",
    "date": "2026-Q2-benchmark-pending",
    "nextCalibration": "2026-07-31"
  },
  "toleranceRule": "All floors derived from the benchmark are rounded down to the nearest 5 lines with a 10% tolerance applied, to tolerate minor variation while still rejecting thin output.",
  "defaults": {
    "description": "Fallback floors applied when an articleType or perArtifact entry is absent.",
    "coreArtifactFloor": 120,
    "perDocumentFloorByTier": {
      "L1_surface": 40,
      "L2_strategic": 80,
      "L2_plus_priority": 140,
      "L3_intelligence_grade": 220
    },
    "clusterFloor": 100,
    "supplementaryFloor": 120
  },
  "thresholds": {
    "breaking": {
      "synthesis-summary.md": 205,
      "cross-reference-map.md": 110,
      "significance-scoring.md": 150,
      "political-classification.md": 140,
      "swot-analysis.md": 160,
      "risk-assessment.md": 180,
      "threat-analysis.md": 180,
      "stakeholder-impact.md": 220,
      "data-download-manifest.md": 60,
      "scenario-analysis.md": 180,
      "comparative-international.md": 150,
      "devils-advocate.md": 160,
      "intelligence-assessment.md": 160,
      "methodology-reflection.md": 200,
      "executive-brief.md": 120,
      "election-2026-analysis.md": 150,
      "voter-segmentation.md": 140,
      "coalition-mathematics.md": 135,
      "historical-parallels.md": 130,
      "media-framing-analysis.md": 200,
      "implementation-feasibility.md": 140,
      "forward-indicators.md": 150,
      "analysis-index.md": 120,
      "reference-analysis-quality.md": 120,
      "mcp-reliability-audit.md": 150,
      "workflow-audit.md": 120,
      "cross-run-diff.md": 130,
      "cross-session-intelligence.md": 140,
      "session-baseline.md": 140
    },
    "evening-analysis": {
      "synthesis-summary.md": 220,
      "risk-assessment.md": 200,
      "threat-analysis.md": 200,
      "stakeholder-impact.md": 240,
      "forward-indicators.md": 170,
      "media-framing-analysis.md": 220,
      "methodology-reflection.md": 210,
      "executive-brief.md": 150
    },
    "realtime-monitor": {
      "synthesis-summary.md": 170,
      "risk-assessment.md": 150,
      "threat-analysis.md": 150,
      "stakeholder-impact.md": 190,
      "forward-indicators.md": 140,
      "media-framing-analysis.md": 170,
      "methodology-reflection.md": 180,
      "executive-brief.md": 100
    },
    "propositions": {
      "synthesis-summary.md": 230,
      "political-classification.md": 160,
      "swot-analysis.md": 180,
      "risk-assessment.md": 210,
      "implementation-feasibility.md": 170,
      "stakeholder-impact.md": 250,
      "media-framing-analysis.md": 210,
      "methodology-reflection.md": 215,
      "executive-brief.md": 130
    },
    "motions": {
      "synthesis-summary.md": 220,
      "significance-scoring.md": 170,
      "political-classification.md": 155,
      "swot-analysis.md": 175,
      "stakeholder-impact.md": 240,
      "comparative-international.md": 160,
      "media-framing-analysis.md": 210,
      "methodology-reflection.md": 210,
      "executive-brief.md": 130
    },
    "committee-reports": {
      "synthesis-summary.md": 215,
      "swot-analysis.md": 170,
      "risk-assessment.md": 195,
      "implementation-feasibility.md": 165,
      "stakeholder-impact.md": 235,
      "media-framing-analysis.md": 200,
      "methodology-reflection.md": 205,
      "executive-brief.md": 130
    },
    "interpellations": {
      "synthesis-summary.md": 205,
      "swot-analysis.md": 160,
      "threat-analysis.md": 180,
      "stakeholder-impact.md": 220,
      "media-framing-analysis.md": 220,
      "methodology-reflection.md": 200,
      "executive-brief.md": 110
    },
    "week-ahead": {
      "synthesis-summary.md": 200,
      "forward-indicators.md": 170,
      "media-framing-analysis.md": 200,
      "methodology-reflection.md": 195,
      "executive-brief.md": 130
    },
    "month-ahead": {
      "synthesis-summary.md": 230,
      "forward-indicators.md": 200,
      "scenario-analysis.md": 200,
      "media-framing-analysis.md": 240,
      "methodology-reflection.md": 215,
      "executive-brief.md": 150
    },
    "quarter-ahead": {
      "_comment": "Quarter-ahead floors = month-ahead × 1.13 (proportional to multiplier 1.7 / 1.5), rounded down to nearest 5. Mandates 4 scenarios + 2 counterfactuals; cross-horizon citation of week-ahead + month-ahead required. Note: election-2026-analysis.md and election-cycle-analysis.md are filename-variant aliases (see scripts/render-lib/aggregator/order.ts → FILENAME_ALIASES); the depth floor below applies to whichever filename is present in a given run folder.",
      "synthesis-summary.md": 260,
      "forward-indicators.md": 225,
      "scenario-analysis.md": 245,
      "comparative-international.md": 165,
      "methodology-reflection.md": 240,
      "executive-brief.md": 170,
      "intelligence-assessment.md": 200,
      "devils-advocate.md": 165,
      "election-2026-analysis.md": 180,
      "election-cycle-analysis.md": 180
    },
    "year-ahead": {
      "_comment": "Year-ahead floors = month-ahead × 1.33 (proportional to multiplier 2.0 / 1.5), rounded down to nearest 5. Mandates 4 scenarios + 5 wildcards + PESTLE; cross-horizon citation of quarter-ahead × 2 + monthly-review × 4. Note: election-2026-analysis.md and election-cycle-analysis.md are filename-variant aliases.",
      "synthesis-summary.md": 305,
      "forward-indicators.md": 265,
      "scenario-analysis.md": 290,
      "comparative-international.md": 195,
      "pestle-analysis.md": 200,
      "wildcards-blackswans.md": 200,
      "quantitative-swot.md": 180,
      "intelligence-assessment.md": 240,
      "devils-advocate.md": 195,
      "methodology-reflection.md": 285,
      "executive-brief.md": 200,
      "election-2026-analysis.md": 225,
      "election-cycle-analysis.md": 225
    },
    "election-cycle": {
      "_comment": "Election-cycle floors = month-ahead × 1.66 (proportional to multiplier 2.5 / 1.5), rounded down to nearest 5. Mandates 4 scenarios × 3 governing-coalition branches + 5 wildcards + 3 counterfactuals + PESTLE + STRIDE; cross-horizon citation of year-ahead × 2 + monthly-review × 12. Also blocks on cycle-trajectory.md (24th artifact for this type only). Note: election-2026-analysis.md and election-cycle-analysis.md are filename-variant aliases.",
      "synthesis-summary.md": 380,
      "forward-indicators.md": 330,
      "scenario-analysis.md": 360,
      "comparative-international.md": 245,
      "pestle-analysis.md": 250,
      "wildcards-blackswans.md": 250,
      "quantitative-swot.md": 225,
      "political-stride-assessment.md": 250,
      "cycle-trajectory.md": 280,
      "intelligence-assessment.md": 295,
      "devils-advocate.md": 240,
      "methodology-reflection.md": 350,
      "executive-brief.md": 250,
      "election-2026-analysis.md": 275,
      "election-cycle-analysis.md": 275,
      "coalition-mathematics.md": 230,
      "historical-parallels.md": 215
    },
    "weekly-review": {
      "synthesis-summary.md": 240,
      "cross-session-intelligence.md": 180,
      "session-baseline.md": 170,
      "cross-run-diff.md": 160,
      "media-framing-analysis.md": 220,
      "methodology-reflection.md": 220,
      "executive-brief.md": 180
    },
    "monthly-review": {
      "synthesis-summary.md": 260,
      "cross-session-intelligence.md": 220,
      "session-baseline.md": 200,
      "cross-run-diff.md": 180,
      "comparative-international.md": 180,
      "media-framing-analysis.md": 240,
      "methodology-reflection.md": 235,
      "executive-brief.md": 200
    },
    "analyticalSupplementary": {
      "_comment": "Analytical supplementary templates — never blocking. Floors apply only when the template is produced. Sister-methodology: analytical-supplementary-methodology.md.",
      "pestle-analysis.md": {
        "standard": 100,
        "deep": 150,
        "comprehensive": 220,
        "rowFloor": {
          "dimensionTables": 4,
          "crossInteractions": 3
        }
      },
      "political-stride-assessment.md": {
        "standard": 110,
        "deep": 160,
        "comprehensive": 240,
        "rowFloor": {
          "stridePerDimension": 3,
          "attackTrees": 2,
          "ttpRows": 5
        }
      },
      "wildcards-blackswans.md": {
        "standard": 110,
        "deep": 160,
        "comprehensive": 240,
        "rowFloor": {
          "wildcards": 8,
          "blackSwans": 3,
          "consequenceTrees": 2,
          "resilienceDimensions": 5
        }
      },
      "quantitative-swot.md": {
        "standard": 110,
        "deep": 160,
        "comprehensive": 240,
        "rowFloor": {
          "perQuadrant": 3,
          "sensitivityRuns": 3,
          "towsActionsPerQuadrant": 1
        }
      }
    }
  },
  "tierCMultipliers": {
    "description": "Tier-C aggregation workflows apply these period-scope multipliers on top of the base thresholds. See ext/tier-c-aggregation.md.",
    "week-aggregation": 1.15,
    "month-aggregation": 1.35,
    "quarter-aggregation": 1.6
  },
  "enforcement": {
    "gateChecks": [
      "Check 3 in .github/prompts/05-analysis-gate.md consumes thresholds[articleType][artifact] with fallback to defaults.coreArtifactFloor.",
      "A file below floor MUST be returned to Pass 2 via the phase-04-pass2 checkpoint.",
      "Family-E per-document files use defaults.perDocumentFloorByTier keyed by the file's DIW tier tag in significance-scoring.md.",
      "Supplementary artifacts (S1–S7) use defaults.supplementaryFloor unless listed explicitly per articleType."
    ],
    "waivers": "Thresholds may be waived only by the CEO (document owner) via a merged PR updating this file. Ad-hoc waivers in commit messages are rejected."
  },
  "aiFirst": {
    "description": "AI-FIRST quality thresholds (per .github/copilot-instructions.md §5 and per-file AI-FIRST Methodology Cards in analysis/methodologies/*.md). These are ADDITIVE quality signals; the gate's blocking checks are 1–11 in 05-analysis-gate.md. The thresholds in this block are consumed by the Pass-2 self-audit documented in methodology-reflection.md.",
    "minimumIterations": 2,
    "passDiscipline": {
      "pass1": "Creation pass — produce minimal viable artifact; satisfy template structure + at least one evidence anchor per claim.",
      "pass2": "Read-back & improve pass — eliminate banned phrases, add second-order effects / cui-bono / counterfactuals, tighten WEP, strengthen Mermaid theming, raise citation density."
    },
    "citationDensity": {
      "default": {
        "anchorsPerWords": 120,
        "rule": "≥ 1 evidence anchor (dok_id, vote id, named MP, primary-source URL host) per ~120 words of analytical claim."
      },
      "perArticle": {
        "_note": "Per-article-type density thresholds for the aggregated article.md (words-per-anchor maximum). These are less strict than per-artifact thresholds because articles contain structural text (Reader Intelligence Guide tables, headings) that dilute density.",
        "breaking": 180,
        "evening-analysis": 180,
        "realtime-monitor": 200,
        "propositions": 160,
        "motions": 170,
        "committee-reports": 170,
        "interpellations": 180,
        "week-ahead": 200,
        "month-ahead": 180,
        "quarter-ahead": 180,
        "year-ahead": 180,
        "election-cycle": 170,
        "weekly-review": 200,
        "monthly-review": 190
      },
      "perArtifact": {
        "synthesis-summary.md": 100,
        "swot-analysis.md": 80,
        "risk-assessment.md": 100,
        "threat-analysis.md": 100,
        "intelligence-assessment.md": 110,
        "scenario-analysis.md": 110,
        "devils-advocate.md": 110,
        "executive-brief.md": 90,
        "documents/{dok_id}-analysis.md": 80,
        "comparative-international.md": 130,
        "media-framing-analysis.md": 130,
        "forward-indicators.md": 90,
        "coalition-mathematics.md": 90,
        "implementation-feasibility.md": 110
      },
      "_note": "Values are 'words per anchor' — lower = denser citation. SWOT and per-document files have the densest target because every quadrant entry / bullet must be evidence-backed. Comparator and framing artifacts can run thinner because their structure is table-driven."
    },
    "bannedPhrases": {
      "source": "analysis/methodologies/political-style-guide.json",
      "sourceMarkdown": "analysis/methodologies/political-style-guide.md",
      "block": "BEGIN BANNED-PHRASES … END BANNED-PHRASES",
      "matchMode": "literal-substring-case-insensitive",
      "extractionRule": "Consumers MUST use the canonical JSON file (political-style-guide.json) for automated enforcement. The JSON `allPhrases` array contains the flat list of literal banned substrings. Case-insensitive matching is the default.",
      "enforcement": "validate-article.ts Check 7 (banned-phrase-detected) enforces this as a blocking CI check. The agent's Pass-2 self-audit also enforces it at authoring time.",
      "removalPolicy": "Append-only between major versions. Removals require a major-version bump and a justification entry in methodology-reflection.md."
    },
    "pass2Attestation": {
      "rule": "Every Family A / B / C / D artifact (except data-download-manifest.md) MUST be re-read after creation and improved. The gate enforces this via mtime diff (final mtime > creation + 3 min) per Check 6 in 05-analysis-gate.md.",
      "attestationLocation": "methodology-reflection.md §Pass-2 audit summary"
    },
    "wepLanguageCeiling": {
      "horizon-72h": "very likely / very unlikely",
      "horizon-week": "likely / unlikely",
      "horizon-month": "likely / unlikely",
      "horizon-quarter": "roughly even / about even",
      "horizon-year": "roughly even (likely allowed only with ≥ 3 cycle-aged sources)",
      "horizon-cycle": "roughly even (likely allowed only with ≥ 3 cycle-aged sources)",
      "horizon-election": "scenario-driven; coalition outcomes never above 'likely'",
      "_source": ".github/prompts/ext/long-horizon-forecasting.md"
    },
    "exemplarPolicy": {
      "rule": "Every methodology file in analysis/methodologies/ MUST carry an Exemplar (good — pattern-match this) and an Anti-exemplar (failure mode — never ship this) snippet ≤ 25 lines each, inside the AI-FIRST Methodology Card.",
      "validatedBy": "methodology-reflection.md §Style audit and (optionally) a markdown-lint sanity script."
    }
  },
  "meta": {
    "version": "1.5",
    "lastUpdated": "2026-05-03",
    "changelog": [
      "v1.5 (2026-05-03) — Added aiFirst block (citationDensity, bannedPhrases pointer to political-style-guide.md, pass2Attestation rule, wepLanguageCeiling per horizon, exemplarPolicy). Pairs with the AI-FIRST Methodology Card inserted into every analysis/methodologies/*.md file.",
      "v1.4 (2026-05-01) — Added quarter-ahead / year-ahead / election-cycle blocks for the long-horizon-forecast workflows.",
      "v1.1 (2026-04-23) — Added analyticalSupplementary section (pestle-analysis · political-stride-assessment · wildcards-blackswans · quantitative-swot); non-blocking."
    ]
  }
}