Skip to content

Commit 28b16fb

Browse files
committed
fix: address review findings — XSS, OOM, missing dep, error exposure
- escHtml: escape quotes for attribute context (XSS prevention) - /manifest: add LIMIT 100K to prevent OOM on large result sets - classification: add missing sentencepiece dependency for xlm-roberta - worker error handlers: return generic message instead of raw errors
1 parent d8a29aa commit 28b16fb

3 files changed

Lines changed: 7 additions & 7 deletions

File tree

apps/web/index.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -986,7 +986,7 @@ <h1>The largest classified corpus of Word documents</h1>
986986

987987
function escHtml(s) {
988988
if (!s) return '';
989-
return s.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;');
989+
return s.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;').replace(/"/g, '&quot;').replace(/'/g, '&#39;');
990990
}
991991

992992
// ---------- preview ----------

apps/web/worker/src/index.ts

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,7 @@ async function handleDocuments(url: URL, env: Env, origin: string): Promise<Resp
288288
} catch (err) {
289289
const message = err instanceof Error ? err.message : String(err);
290290
console.error("handleDocuments error:", message);
291-
return json({ error: message }, 500, origin);
291+
return json({ error: "Internal server error" }, 500, origin);
292292
}
293293
}
294294

@@ -299,11 +299,10 @@ const R2_BASE = "https://docxcorp.us/documents/";
299299
async function handleManifest(url: URL, env: Env, origin: string): Promise<Response> {
300300
try {
301301
const sql = neon(env.DATABASE_URL);
302-
const { where, params } = buildFilters(url);
303-
302+
const { where, params, paramIndex } = buildFilters(url);
304303
const rows = await sql.query(
305-
`SELECT id FROM documents WHERE ${where} ORDER BY id`,
306-
params
304+
`SELECT id FROM documents WHERE ${where} ORDER BY id LIMIT $${paramIndex}`,
305+
[...params, 100000]
307306
) as { id: string }[];
308307

309308
const body = rows.map((r) => `${R2_BASE}${r.id}.docx`).join("\n") + "\n";
@@ -329,6 +328,6 @@ async function handleManifest(url: URL, env: Env, origin: string): Promise<Respo
329328
} catch (err) {
330329
const message = err instanceof Error ? err.message : String(err);
331330
console.error("handleManifest error:", message);
332-
return json({ error: message }, 500, origin);
331+
return json({ error: "Internal server error" }, 500, origin);
333332
}
334333
}

scripts/classification/pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,5 @@ dependencies = [
1515
"datasets>=3.0.0",
1616
"scikit-learn>=1.5.0",
1717
"accelerate>=1.0.0",
18+
"sentencepiece>=0.1.99",
1819
]

0 commit comments

Comments
 (0)