Skip to content

Commit 912e106

Browse files
claude: add QUARTO_PDF_STANDARD env var and pdf analysis tools
Add environment variable fallback for pdf-standard option so any document without an explicit pdf-standard setting inherits from QUARTO_PDF_STANDARD (comma-separated, e.g. "ua-1" or "a-2b,ua-1"). Also add tools/find-tests.ts to find test documents by format and tools/filter-pdf-errors.ts to extract and summarize PDF validation errors from render logs. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 8faf55f commit 912e106

6 files changed

Lines changed: 333 additions & 4 deletions

File tree

src/command/render/output-tex.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import {
1818
kPdfStandard,
1919
kPdfStandardApplied,
2020
kTargetFormat,
21+
pdfStandardEnv,
2122
} from "../../config/constants.ts";
2223
import { Format } from "../../config/types.ts";
2324
import { asArray } from "../../core/array.ts";
@@ -90,7 +91,8 @@ export function texToPdfOutputRecipe(
9091
const pdfStandards = asArray(
9192
pandocOptions.format.metadata?.[kPdfStandardApplied] ??
9293
format.render?.[kPdfStandard] ??
93-
format.metadata?.[kPdfStandard],
94+
format.metadata?.[kPdfStandard] ??
95+
pdfStandardEnv(),
9496
) as string[];
9597
if (pdfStandards.length > 0) {
9698
await validatePdfStandards(pdfOutput, pdfStandards, {

src/command/render/output-typst.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ import {
3434
kOutputFile,
3535
kPdfStandard,
3636
kVariant,
37+
pdfStandardEnv,
3738
} from "../../config/constants.ts";
3839
import { error, warning } from "../../deno_ral/log.ts";
3940
import { ErrorEx } from "../../core/lib/error.ts";
@@ -158,7 +159,8 @@ export function typstPdfOutputRecipe(
158159
),
159160
pdfStandard: normalizePdfStandardForTypst(
160161
asArray(
161-
format.render?.[kPdfStandard] ?? format.metadata?.[kPdfStandard],
162+
format.render?.[kPdfStandard] ?? format.metadata?.[kPdfStandard] ??
163+
pdfStandardEnv(),
162164
),
163165
),
164166
};
@@ -185,7 +187,8 @@ export function typstPdfOutputRecipe(
185187

186188
// Validate PDF against specified standards using verapdf (if available)
187189
const pdfStandards = asArray(
188-
format.render?.[kPdfStandard] ?? format.metadata?.[kPdfStandard],
190+
format.render?.[kPdfStandard] ?? format.metadata?.[kPdfStandard] ??
191+
pdfStandardEnv(),
189192
) as string[];
190193
if (pdfStandards.length > 0) {
191194
await validatePdfStandards(pdfOutput, pdfStandards, {

src/config/constants.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,15 @@ export const kKeepTex = "keep-tex";
8888
export const kKeepTyp = "keep-typ";
8989
export const kPdfStandard = "pdf-standard";
9090
export const kPdfStandardApplied = "pdf-standard-applied";
91+
92+
/** Read QUARTO_PDF_STANDARD env var as a fallback for pdf-standard option. */
93+
export function pdfStandardEnv(): string[] | undefined {
94+
const val = Deno.env.get("QUARTO_PDF_STANDARD");
95+
if (val) {
96+
return val.split(",").map((s) => s.trim()).filter((s) => s.length > 0);
97+
}
98+
return undefined;
99+
}
91100
export const kKeepIpynb = "keep-ipynb";
92101
export const kKeepSource = "keep-source";
93102
export const kVariant = "variant";

src/format/pdf/format-pdf.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ import {
3838
kTblCapLoc,
3939
kTopLevelDivision,
4040
kWarning,
41+
pdfStandardEnv,
4142
} from "../../config/constants.ts";
4243
import { warning } from "../../deno_ral/log.ts";
4344
import { asArray } from "../../core/array.ts";
@@ -326,7 +327,8 @@ function createPdfFormat(
326327

327328
// Handle pdf-standard option for PDF/A, PDF/UA, PDF/X conformance
328329
const pdfStandard = asArray(
329-
format.render?.[kPdfStandard] ?? format.metadata?.[kPdfStandard],
330+
format.render?.[kPdfStandard] ?? format.metadata?.[kPdfStandard] ??
331+
pdfStandardEnv(),
330332
);
331333
if (pdfStandard.length > 0) {
332334
const { version, standards, needsTagging } =

tools/filter-pdf-errors.ts

Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
#!/usr/bin/env -S quarto run
2+
/**
3+
* filter-pdf-errors.ts
4+
*
5+
* Parse a quarto render log and extract PDF validation errors,
6+
* showing which files failed and why. Aggregates errors by type at the end.
7+
*
8+
* Handles two error formats:
9+
* - Typst compiler errors: "error: PDF/UA-1 error: missing alt text"
10+
* - verapdf validation failures: "WARN: PDF validation failed for ua-2:\n<rule text>"
11+
*
12+
* Usage:
13+
* quarto run tools/filter-pdf-errors.ts <logfile>
14+
*
15+
* Reads from stdin if no file is given.
16+
*/
17+
18+
// Strip ANSI escape codes
19+
function stripAnsi(s: string): string {
20+
return s.replace(/\x1b\[[0-9;]*m/g, "");
21+
}
22+
23+
// Unescape HTML entities from verapdf output
24+
function unescapeHtml(s: string): string {
25+
return s
26+
.replace(/&lt;/g, "<")
27+
.replace(/&gt;/g, ">")
28+
.replace(/&amp;/g, "&")
29+
.replace(/&quot;/g, '"')
30+
.replace(/&#39;/g, "'");
31+
}
32+
33+
interface RenderBlock {
34+
inputFile: string;
35+
outputFile: string;
36+
lines: string[];
37+
}
38+
39+
interface ErrorEntry {
40+
file: string;
41+
errorType: string;
42+
context: string[];
43+
}
44+
45+
function extractErrors(block: RenderBlock): {
46+
seenErrors: Set<string>;
47+
context: string[];
48+
} {
49+
const context: string[] = [];
50+
let inStack = false;
51+
const seenErrors = new Set<string>();
52+
53+
for (let i = 0; i < block.lines.length; i++) {
54+
const line = block.lines[i];
55+
56+
// Skip download lines
57+
if (/^Download /.test(line.trim())) continue;
58+
// Skip blank-ish lines at start
59+
if (context.length === 0 && line.trim() === "") continue;
60+
61+
// Detect start of stack trace
62+
if (/^Stack trace:/.test(line.trim())) {
63+
inStack = true;
64+
continue;
65+
}
66+
if (inStack) {
67+
if (/^\s+at /.test(line)) continue;
68+
inStack = false;
69+
}
70+
71+
// Skip duplicate ERROR: lines from typst
72+
if (/^ERROR: error: PDF\//.test(line.trim())) continue;
73+
if (/^ERROR: Typst compilation failed/.test(line.trim())) continue;
74+
75+
// Typst compiler errors: "error: PDF/UA-1 error: missing alt text"
76+
const typstMatch = line.match(/error: (PDF\/\S+ error: .+)/);
77+
if (typstMatch) {
78+
const errType = typstMatch[1].trim();
79+
seenErrors.add(errType);
80+
}
81+
82+
// verapdf failures: "WARN: PDF validation failed for <standard>:"
83+
// followed by one or more rule description lines until a blank line or "Output created"
84+
const verapdfMatch = line.match(
85+
/^WARN: PDF validation failed for ([\w-]+):$/,
86+
);
87+
if (verapdfMatch) {
88+
const standard = verapdfMatch[1];
89+
// Collect the rule lines that follow
90+
for (let j = i + 1; j < block.lines.length; j++) {
91+
const ruleLine = block.lines[j].trim();
92+
if (ruleLine === "" || /^Output created/.test(ruleLine)) break;
93+
const errType = `${standard}: ${ruleLine}`;
94+
seenErrors.add(errType);
95+
}
96+
}
97+
98+
context.push(line);
99+
}
100+
101+
// Trim trailing blank lines
102+
while (context.length > 0 && context[context.length - 1].trim() === "") {
103+
context.pop();
104+
}
105+
106+
return { seenErrors, context };
107+
}
108+
109+
async function main() {
110+
const path = Deno.args[0];
111+
let text: string;
112+
if (path) {
113+
text = await Deno.readTextFile(path);
114+
} else {
115+
const buf = await new Response(Deno.stdin.readable).text();
116+
text = buf;
117+
}
118+
119+
const rawLines = text.split("\n");
120+
const lines = rawLines.map((l) => unescapeHtml(stripAnsi(l)));
121+
122+
// Parse into render blocks. Each block starts with either a
123+
// "Rendering <path>.qmd" line or a "pandoc" header.
124+
const blocks: RenderBlock[] = [];
125+
let current: RenderBlock | null = null;
126+
let pendingInputFile = "";
127+
128+
for (const line of lines) {
129+
// "Rendering docs/smoke-all/.../foo.qmd" precedes the pandoc block
130+
const renderMatch = line.match(/^Rendering\s+(\S+\.qmd)\s*$/);
131+
if (renderMatch) {
132+
pendingInputFile = renderMatch[1];
133+
continue;
134+
}
135+
136+
if (/^pandoc\s*$/.test(line.trim())) {
137+
if (current) blocks.push(current);
138+
current = { inputFile: pendingInputFile, outputFile: "", lines: [] };
139+
pendingInputFile = "";
140+
current.lines.push(line);
141+
continue;
142+
}
143+
if (current) {
144+
current.lines.push(line);
145+
const m = line.match(/^\s*output-file:\s*(.+)/);
146+
if (m) {
147+
current.outputFile = m[1].trim();
148+
}
149+
}
150+
}
151+
if (current) blocks.push(current);
152+
153+
// Extract errors from each block
154+
const errors: ErrorEntry[] = [];
155+
const errorCounts = new Map<string, number>();
156+
const errorFiles = new Map<string, string[]>();
157+
158+
for (const block of blocks) {
159+
// Check for either error format
160+
const hasError = block.lines.some(
161+
(l) =>
162+
l.includes("error: PDF/") ||
163+
l.includes("ERROR: error: PDF/") ||
164+
l.includes("PDF validation failed"),
165+
);
166+
if (!hasError) continue;
167+
168+
const { seenErrors, context } = extractErrors(block);
169+
const displayFile = block.inputFile || block.outputFile;
170+
171+
for (const errType of seenErrors) {
172+
errorCounts.set(errType, (errorCounts.get(errType) || 0) + 1);
173+
const files = errorFiles.get(errType) || [];
174+
if (!files.includes(displayFile)) {
175+
files.push(displayFile);
176+
}
177+
errorFiles.set(errType, files);
178+
179+
errors.push({
180+
file: displayFile,
181+
errorType: errType,
182+
context,
183+
});
184+
}
185+
}
186+
187+
// Print per-file errors
188+
const printedFiles = new Set<string>();
189+
for (const err of errors) {
190+
if (printedFiles.has(err.file)) continue;
191+
printedFiles.add(err.file);
192+
193+
const fileErrors = errors.filter((e) => e.file === err.file);
194+
const types = [...new Set(fileErrors.map((e) => e.errorType))];
195+
196+
console.log("─".repeat(72));
197+
console.log(`FILE: ${err.file}`);
198+
console.log(`ERRORS: ${types.join(", ")}`);
199+
console.log("");
200+
for (const line of err.context) {
201+
console.log(" " + line);
202+
}
203+
console.log("");
204+
}
205+
206+
// Print summary
207+
console.log("═".repeat(72));
208+
console.log("SUMMARY");
209+
console.log("═".repeat(72));
210+
console.log("");
211+
console.log(`Total files with errors: ${printedFiles.size}`);
212+
console.log(`Total files rendered: ${blocks.length}`);
213+
console.log("");
214+
215+
// Sort by count descending
216+
const sorted = [...errorCounts.entries()].sort((a, b) => b[1] - a[1]);
217+
for (const [errType, count] of sorted) {
218+
console.log(` ${count.toString().padStart(4)} ${errType}`);
219+
}
220+
console.log("");
221+
222+
// List files per error type
223+
for (const [errType] of sorted) {
224+
const files = errorFiles.get(errType) || [];
225+
console.log(`${errType} (${files.length} files):`);
226+
for (const f of files) {
227+
console.log(` - ${f}`);
228+
}
229+
console.log("");
230+
}
231+
}
232+
233+
main();

tools/find-tests.ts

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
/**
2+
* find-tests.ts
3+
*
4+
* Find all .qmd files under a directory that target a given format.
5+
*
6+
* Usage:
7+
* quarto run --dev tools/find-tests.ts <format> <directory>
8+
*
9+
* A document matches if the format name appears as:
10+
* - The value of `format:` (string) in its YAML front matter
11+
* - A key under `format:` (object) in its YAML front matter
12+
* - A key under `_quarto.tests:` in its YAML front matter
13+
*/
14+
15+
import { walk } from "../src/deno_ral/fs.ts";
16+
import { readYamlFromMarkdown, readYaml } from "../src/core/yaml.ts";
17+
import { dirname, join, relative } from "../src/deno_ral/path.ts";
18+
import { existsSync } from "../src/deno_ral/fs.ts";
19+
20+
const [format, dir] = Deno.args;
21+
if (!format || !dir) {
22+
console.error("Usage: quarto run tools/find-tests.ts <format> <directory>");
23+
Deno.exit(1);
24+
}
25+
26+
function hasFormat(
27+
yaml: Record<string, unknown>,
28+
format: string,
29+
): boolean {
30+
// Check format: <string> or format: { <format>: ... }
31+
const fmt = yaml["format"];
32+
if (typeof fmt === "string" && fmt === format) {
33+
return true;
34+
}
35+
if (fmt && typeof fmt === "object" && format in (fmt as Record<string, unknown>)) {
36+
return true;
37+
}
38+
39+
// Check _quarto.tests.<format>
40+
const quarto = yaml["_quarto"] as Record<string, unknown> | undefined;
41+
if (quarto) {
42+
const tests = quarto["tests"] as Record<string, unknown> | undefined;
43+
if (tests && format in tests) {
44+
return true;
45+
}
46+
}
47+
48+
return false;
49+
}
50+
51+
for await (const entry of walk(dir, { exts: [".qmd"], includeDirs: false })) {
52+
try {
53+
const content = Deno.readTextFileSync(entry.path);
54+
const yaml = readYamlFromMarkdown(content) as Record<string, unknown>;
55+
if (hasFormat(yaml, format)) {
56+
console.log(relative(Deno.cwd(), entry.path));
57+
continue;
58+
}
59+
60+
// Check _quarto.yml in the same directory and ancestors up to dir
61+
let current = dirname(entry.path);
62+
const root = Deno.realPathSync(dir);
63+
while (true) {
64+
const quartoYml = join(current, "_quarto.yml");
65+
if (existsSync(quartoYml)) {
66+
const projYaml = readYaml(quartoYml) as Record<string, unknown>;
67+
if (hasFormat(projYaml, format)) {
68+
console.log(relative(Deno.cwd(), entry.path));
69+
break;
70+
}
71+
}
72+
if (Deno.realPathSync(current) === root) break;
73+
const parent = dirname(current);
74+
if (parent === current) break;
75+
current = parent;
76+
}
77+
} catch {
78+
// skip files that can't be parsed
79+
}
80+
}

0 commit comments

Comments
 (0)