Skip to content

Commit a6e7ac1

Browse files
authored
Merge pull request #95 from zack-dev-cm/codex/debug-clawpatch-site
[codex] fix site crawler checks
2 parents 9dfc86a + 7a8898a commit a6e7ac1

8 files changed

Lines changed: 115 additions & 3 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
- Added a release-prep checklist for auditing changelog, package metadata, and dry-run package contents without publishing.
2222
- Improved bounded source grouping so large flat directories split repeated filename families like command, plugin, doctor, and runtime files into more coherent review slices.
2323
- Fixed acpx provider error reporting by reading the terminal `result.stopReason` envelope and surfacing non-`end_turn` reasons as typed `ClawpatchError` codes (`agent-cancelled`, `agent-refused`, `agent-truncated`) instead of opaque `malformed-output`, thanks @coletebou.
24+
- Added website crawler artifacts and a static smoke check for metadata, anchors, and social-card dimensions, thanks @zack-dev-cm.
2425
- Improved OpenCode malformed JSON diagnostics with output length, event kinds, and a bounded preview, thanks @rohitjavvadi.
2526
- Fixed finding signatures so equivalent evidence remains stable across re-reviews, thanks @rohitjavvadi.
2627
- Fixed provider exit-code classification for stdout-only authentication and quota failures, thanks @rohitjavvadi.

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
"lint": "oxlint . --config oxlint.json",
2020
"format": "oxfmt --write .",
2121
"format:check": "oxfmt --check .",
22+
"website:smoke": "node scripts/website-smoke.mjs",
2223
"test": "vitest run",
2324
"pack:smoke": "node scripts/package-smoke.mjs"
2425
},

scripts/website-smoke.mjs

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
import { readFile, stat } from "node:fs/promises";
2+
import { join } from "node:path";
3+
4+
const root = process.cwd();
5+
const website = join(root, "website");
6+
const failures = [];
7+
8+
function fail(message) {
9+
failures.push(message);
10+
}
11+
12+
async function mustRead(relativePath) {
13+
try {
14+
return await readFile(join(root, relativePath), "utf8");
15+
} catch {
16+
fail(`missing ${relativePath}`);
17+
return "";
18+
}
19+
}
20+
21+
function stripTags(value) {
22+
return value
23+
.replace(/<br\s*\/?>/giu, " ")
24+
.replace(/<[^>]+>/gu, "")
25+
.replace(/\s+/gu, " ")
26+
.trim();
27+
}
28+
29+
function extractFirst(html, pattern, label) {
30+
const match = html.match(pattern);
31+
if (!match) {
32+
fail(`missing ${label}`);
33+
return "";
34+
}
35+
return match[1] || "";
36+
}
37+
38+
const html = await mustRead("website/index.html");
39+
const robots = await mustRead("website/robots.txt");
40+
const sitemap = await mustRead("website/sitemap.xml");
41+
42+
const title = stripTags(extractFirst(html, /<title>([\s\S]*?)<\/title>/iu, "title"));
43+
if (title !== "Clawpatch — Automated Code Review") {
44+
fail(`unexpected title: ${title}`);
45+
}
46+
47+
const description = html.match(/<meta\s+name="description"\s+content="([^"]+)"/iu)?.[1] || "";
48+
if (!description.includes("Automated code review that lands fixes")) {
49+
fail("meta description does not contain the product promise");
50+
}
51+
52+
const h1 = stripTags(extractFirst(html, /<h1>([\s\S]*?)<\/h1>/iu, "h1"));
53+
if (h1 !== "Code review with explicit fixes") {
54+
fail(`unexpected h1 text: ${h1}`);
55+
}
56+
57+
const ids = new Set([...html.matchAll(/\sid="([^"]+)"/giu)].map((match) => match[1]));
58+
const anchorLinks = [...html.matchAll(/href="#([^"]+)"/giu)].map((match) => match[1]);
59+
for (const id of anchorLinks) {
60+
if (!ids.has(id)) fail(`missing anchor target: #${id}`);
61+
}
62+
63+
if (!robots.includes("Sitemap: https://clawpatch.ai/sitemap.xml")) {
64+
fail("robots.txt missing sitemap reference");
65+
}
66+
67+
if (!sitemap.includes("<loc>https://clawpatch.ai/</loc>")) {
68+
fail("sitemap.xml missing canonical homepage loc");
69+
}
70+
71+
const socialCard = await readFile(join(website, "social-card.png"));
72+
if (socialCard.toString("ascii", 1, 4) !== "PNG") {
73+
fail("social-card.png is not a PNG");
74+
} else {
75+
const width = socialCard.readUInt32BE(16);
76+
const height = socialCard.readUInt32BE(20);
77+
if (width !== 1200 || height !== 630) {
78+
fail(`social-card.png dimensions are ${width}x${height}, expected 1200x630`);
79+
}
80+
}
81+
82+
for (const file of ["website/favicon.svg", "website/CNAME", "website/.nojekyll"]) {
83+
try {
84+
await stat(join(root, file));
85+
} catch {
86+
fail(`missing ${file}`);
87+
}
88+
}
89+
90+
if (failures.length) {
91+
console.error(failures.join("\n"));
92+
process.exit(1);
93+
}
94+
95+
console.log("Website smoke checks passed.");

src/exec.test.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ describe("runCommandArgs", () => {
9292
"import { writeFileSync } from 'node:fs';",
9393
"process.on('SIGTERM', () => {});",
9494
"process.send?.('ready');",
95-
`setTimeout(() => writeFileSync(${JSON.stringify(marker)}, 'alive'), 2500);`,
95+
`setTimeout(() => writeFileSync(${JSON.stringify(marker)}, 'alive'), 4500);`,
9696
"setInterval(() => {}, 1000);",
9797
].join("\n"),
9898
"utf8",
@@ -111,7 +111,7 @@ describe("runCommandArgs", () => {
111111
);
112112

113113
const result = await runCommandArgs(process.execPath, [parentScript], dir, undefined, {
114-
timeoutMs: 1000,
114+
timeoutMs: 3000,
115115
});
116116
await new Promise((resolve) => setTimeout(resolve, 1200));
117117

website/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ Files:
88
- `favicon.svg`: browser icon
99
- `social-card.svg`: link preview card
1010
- `social-card.png`: raster link preview card for Open Graph/Twitter
11+
- `robots.txt`: crawler policy with sitemap reference
12+
- `sitemap.xml`: canonical single-page sitemap
1113

1214
Preview:
1315

website/index.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1057,7 +1057,7 @@ <h2>Reference</h2>
10571057
<main>
10581058
<header class="home-hero">
10591059
<p class="eyebrow">Automated Code Review · Explicit Fixes</p>
1060-
<h1>Code review with<br />explicit fixes</h1>
1060+
<h1>Code review with <br />explicit fixes</h1>
10611061
<p class="lede">
10621062
Clawpatch maps codebases into semantic feature slices, reviews them for bugs and quality
10631063
issues, and records explicit fix attempts with validation.

website/robots.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
User-agent: *
2+
Allow: /
3+
4+
Sitemap: https://clawpatch.ai/sitemap.xml

website/sitemap.xml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
3+
<url>
4+
<loc>https://clawpatch.ai/</loc>
5+
<lastmod>2026-05-20</lastmod>
6+
<changefreq>weekly</changefreq>
7+
<priority>1.0</priority>
8+
</url>
9+
</urlset>

0 commit comments

Comments
 (0)