|
1 | 1 | /** |
2 | 2 | * XSS Sanitization Middleware for Hono |
3 | 3 | * |
4 | | - * Strips dangerous HTML / script content from JSON request bodies on |
5 | | - * mutation methods (POST, PUT, PATCH). The sanitized body is stored on |
6 | | - * the Hono context variable `sanitizedBody` so downstream handlers can |
7 | | - * retrieve it via `c.get('sanitizedBody')`. |
| 4 | + * Encodes dangerous HTML characters in JSON request body strings on |
| 5 | + * mutation methods (POST, PUT, PATCH). Uses HTML entity encoding |
| 6 | + * (`<` → `<`, `>` → `>`) which is safe for both storage and |
| 7 | + * HTML rendering contexts. |
| 8 | + * |
| 9 | + * The sanitized body is stored on the Hono context variable |
| 10 | + * `sanitizedBody` so downstream handlers can retrieve it via |
| 11 | + * `c.get('sanitizedBody')`. |
8 | 12 | * |
9 | 13 | * @module api/middleware/sanitize |
10 | 14 | * @see docs/guide/technical-debt-resolution.md — TD-4 |
11 | 15 | */ |
12 | 16 | import type { MiddlewareHandler } from 'hono'; |
13 | 17 |
|
14 | | -/** Recursively sanitize a value (string → strip tags, object → recurse). */ |
| 18 | +/** |
| 19 | + * HTML entity encoding map. |
| 20 | + * Covers the minimal set of characters required to prevent XSS |
| 21 | + * in both HTML element and attribute contexts. |
| 22 | + */ |
| 23 | +const HTML_ENTITIES: Record<string, string> = { |
| 24 | + '&': '&', |
| 25 | + '<': '<', |
| 26 | + '>': '>', |
| 27 | + '"': '"', |
| 28 | + "'": ''', |
| 29 | +}; |
| 30 | + |
| 31 | +const ENTITY_RE = /[&<>"']/g; |
| 32 | + |
| 33 | +/** Encode HTML-significant characters to their entity equivalents. */ |
| 34 | +function encodeEntities(str: string): string { |
| 35 | + return str.replace(ENTITY_RE, (ch) => HTML_ENTITIES[ch] ?? ch); |
| 36 | +} |
| 37 | + |
| 38 | +/** Recursively sanitize a value (string → entity-encode, object → recurse). */ |
15 | 39 | export function sanitizeValue(value: unknown): unknown { |
16 | 40 | if (typeof value === 'string') { |
17 | | - // Multi-pass approach for robust HTML/script removal: |
18 | | - // 1. Remove script tags (including variations with whitespace) |
19 | | - // 2. Remove event handler attributes |
20 | | - // 3. Strip remaining HTML tags |
21 | | - // 4. Re-run HTML strip to catch tags reconstructed from fragments |
22 | | - let result = value; |
23 | | - // Loop until stable — handles nested/reconstructed patterns |
24 | | - let previous: string; |
25 | | - do { |
26 | | - previous = result; |
27 | | - result = result |
28 | | - .replace(/<\s*script[\s\S]*?<\s*\/\s*script\s*>/gi, '') |
29 | | - .replace(/\bon\w+\s*=\s*["'][^"']*["']/gi, '') |
30 | | - .replace(/<\/?[a-z][\s\S]*?>/gi, ''); |
31 | | - } while (result !== previous); |
32 | | - return result; |
| 41 | + return encodeEntities(value); |
33 | 42 | } |
34 | 43 | if (Array.isArray(value)) { |
35 | 44 | return value.map(sanitizeValue); |
|
0 commit comments