Skip to content

Commit fe97f78

Browse files
Copilothotlong
andcommitted
fix(security): replace regex HTML stripping with entity encoding in sanitize middleware
HTML entity encoding (&lt;, &gt;, &amp;, &quot;, &#x27;) is more robust than regex-based tag stripping and eliminates CodeQL js/bad-tag-filter and js/incomplete-multi-character-sanitization alerts. Co-authored-by: hotlong <50353452+hotlong@users.noreply.github.com>
1 parent 4e42934 commit fe97f78

File tree

1 file changed

+30
-21
lines changed

1 file changed

+30
-21
lines changed

api/middleware/sanitize.ts

Lines changed: 30 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,44 @@
11
/**
22
* XSS Sanitization Middleware for Hono
33
*
4-
* Strips dangerous HTML / script content from JSON request bodies on
5-
* mutation methods (POST, PUT, PATCH). The sanitized body is stored on
6-
* the Hono context variable `sanitizedBody` so downstream handlers can
7-
* retrieve it via `c.get('sanitizedBody')`.
4+
* Encodes dangerous HTML characters in JSON request body strings on
5+
* mutation methods (POST, PUT, PATCH). Uses HTML entity encoding
6+
* (`<` → `&lt;`, `>` → `&gt;`) which is safe for both storage and
7+
* HTML rendering contexts.
8+
*
9+
* The sanitized body is stored on the Hono context variable
10+
* `sanitizedBody` so downstream handlers can retrieve it via
11+
* `c.get('sanitizedBody')`.
812
*
913
* @module api/middleware/sanitize
1014
* @see docs/guide/technical-debt-resolution.md — TD-4
1115
*/
1216
import type { MiddlewareHandler } from 'hono';
1317

14-
/** Recursively sanitize a value (string → strip tags, object → recurse). */
18+
/**
19+
* HTML entity encoding map.
20+
* Covers the minimal set of characters required to prevent XSS
21+
* in both HTML element and attribute contexts.
22+
*/
23+
const HTML_ENTITIES: Record<string, string> = {
24+
'&': '&amp;',
25+
'<': '&lt;',
26+
'>': '&gt;',
27+
'"': '&quot;',
28+
"'": '&#x27;',
29+
};
30+
31+
const ENTITY_RE = /[&<>"']/g;
32+
33+
/** Encode HTML-significant characters to their entity equivalents. */
34+
function encodeEntities(str: string): string {
35+
return str.replace(ENTITY_RE, (ch) => HTML_ENTITIES[ch] ?? ch);
36+
}
37+
38+
/** Recursively sanitize a value (string → entity-encode, object → recurse). */
1539
export function sanitizeValue(value: unknown): unknown {
1640
if (typeof value === 'string') {
17-
// Multi-pass approach for robust HTML/script removal:
18-
// 1. Remove script tags (including variations with whitespace)
19-
// 2. Remove event handler attributes
20-
// 3. Strip remaining HTML tags
21-
// 4. Re-run HTML strip to catch tags reconstructed from fragments
22-
let result = value;
23-
// Loop until stable — handles nested/reconstructed patterns
24-
let previous: string;
25-
do {
26-
previous = result;
27-
result = result
28-
.replace(/<\s*script[\s\S]*?<\s*\/\s*script\s*>/gi, '')
29-
.replace(/\bon\w+\s*=\s*["'][^"']*["']/gi, '')
30-
.replace(/<\/?[a-z][\s\S]*?>/gi, '');
31-
} while (result !== previous);
32-
return result;
41+
return encodeEntities(value);
3342
}
3443
if (Array.isArray(value)) {
3544
return value.map(sanitizeValue);

0 commit comments

Comments
 (0)