|
| 1 | +import type { Transform } from 'node:stream' |
| 2 | +import { promisify } from 'node:util' |
| 3 | +import zlib from 'node:zlib' |
| 4 | +import type { MessageCodecHandler, MessageCodecRegistration } from './messageCodec.ts' |
| 5 | +import { MessageCodecEnum } from './messageCodec.ts' |
| 6 | + |
| 7 | +const ZSTD_UNSUPPORTED_MSG = |
| 8 | + 'zlib.zstdCompress and zlib.zstdDecompress are not available in this Node.js version. ' + |
| 9 | + 'Message compression requires Node.js >=22.15.0 or >=23.8.0.' |
| 10 | + |
| 11 | +/** |
| 12 | + * Default upper bound on the decompressed size of a single message, in bytes (100 MiB). |
| 13 | + * |
| 14 | + * Protects consumers from decompression-bomb inputs: a tiny compressed envelope can |
| 15 | + * otherwise expand to gigabytes of highly-repetitive data and exhaust process memory. |
| 16 | + * 100 MiB is far above any realistic queue message (SQS/SNS cap bodies at 256 KiB, and |
| 17 | + * even offloaded payloads are typically single-digit MiB) while still bounding the blast |
| 18 | + * radius of a malicious or corrupt frame. Override via the {@link ZstdCodecHandler} |
| 19 | + * constructor if you legitimately handle larger messages. |
| 20 | + */ |
| 21 | +export const DEFAULT_MAX_DECOMPRESSED_BYTES = 100 * 1024 * 1024 |
| 22 | + |
| 23 | +// Resolved lazily — undefined on Node versions that lack zstd support. |
| 24 | +// Keeping these lazy means importing core never throws on older Node; only an |
| 25 | +// actual compress/decompress call does, and only when zstd is genuinely used. |
| 26 | +const zstdCompress = |
| 27 | + typeof zlib.zstdCompress === 'function' ? promisify(zlib.zstdCompress) : undefined |
| 28 | +const zstdDecompress = |
| 29 | + typeof zlib.zstdDecompress === 'function' ? promisify(zlib.zstdDecompress) : undefined |
| 30 | + |
| 31 | +export class ZstdCodecHandler implements MessageCodecHandler { |
| 32 | + private readonly maxDecompressedBytes: number |
| 33 | + |
| 34 | + /** |
| 35 | + * @param maxDecompressedBytes upper bound on a single decompressed message, in bytes. |
| 36 | + * Defaults to {@link DEFAULT_MAX_DECOMPRESSED_BYTES} (100 MiB). Decompression of an |
| 37 | + * input that would exceed this limit is rejected before the full payload is buffered. |
| 38 | + */ |
| 39 | + constructor(maxDecompressedBytes: number = DEFAULT_MAX_DECOMPRESSED_BYTES) { |
| 40 | + this.maxDecompressedBytes = maxDecompressedBytes |
| 41 | + } |
| 42 | + |
| 43 | + compress(data: Buffer): Promise<Buffer> { |
| 44 | + if (!zstdCompress) throw new Error(ZSTD_UNSUPPORTED_MSG) |
| 45 | + return zstdCompress(data) |
| 46 | + } |
| 47 | + |
| 48 | + decompress(data: Buffer): Promise<Buffer> { |
| 49 | + if (!zstdDecompress) throw new Error(ZSTD_UNSUPPORTED_MSG) |
| 50 | + // maxOutputLength caps the decompressed size: zstdDecompress rejects with a |
| 51 | + // RangeError once the limit is exceeded, guarding against decompression bombs. |
| 52 | + return zstdDecompress(data, { maxOutputLength: this.maxDecompressedBytes }) |
| 53 | + } |
| 54 | + |
| 55 | + createCompressStream(): Transform { |
| 56 | + if (typeof zlib.createZstdCompress !== 'function') throw new Error(ZSTD_UNSUPPORTED_MSG) |
| 57 | + return zlib.createZstdCompress() |
| 58 | + } |
| 59 | +} |
| 60 | + |
| 61 | +const ZSTD_HANDLER = new ZstdCodecHandler() |
| 62 | + |
| 63 | +/** |
| 64 | + * Allowed characters for a custom codec name: ASCII letters, digits, hyphens, underscores. |
| 65 | + * This keeps the name JSON-safe without escaping and makes it a recognisable identifier. |
| 66 | + */ |
| 67 | +const SAFE_CODEC_NAME_RE = /^[A-Za-z0-9_-]+$/ |
| 68 | + |
| 69 | +/** |
| 70 | + * Returns the name string that will be written into the `__mqtCodec` field of every envelope. |
| 71 | + * Throws for custom (object-form) registrations whose name contains characters that would |
| 72 | + * produce invalid JSON when interpolated raw into the envelope string. |
| 73 | + */ |
| 74 | +export function getCodecName(codec: MessageCodecRegistration): string { |
| 75 | + if (typeof codec === 'object') { |
| 76 | + if (!SAFE_CODEC_NAME_RE.test(codec.name)) { |
| 77 | + throw new Error( |
| 78 | + `Invalid codec name "${codec.name}": only ASCII letters, digits, hyphens, and underscores are allowed`, |
| 79 | + ) |
| 80 | + } |
| 81 | + return codec.name |
| 82 | + } |
| 83 | + return codec |
| 84 | +} |
| 85 | + |
| 86 | +/** |
| 87 | + * Resolves the {@link MessageCodecHandler} for the given codec registration. |
| 88 | + * |
| 89 | + * - String form (`MessageCodec`): returns the built-in handler for that codec. |
| 90 | + * - Object form (`{ name, handler }`): returns the provided handler directly. |
| 91 | + */ |
| 92 | +export function resolveCodecHandler(codec: MessageCodecRegistration): MessageCodecHandler { |
| 93 | + if (typeof codec === 'object') return codec.handler |
| 94 | + if (codec === MessageCodecEnum.ZSTD) return ZSTD_HANDLER |
| 95 | + throw new Error(`Unsupported codec: ${codec}`) |
| 96 | +} |
| 97 | + |
| 98 | +/** |
| 99 | + * Wraps an already-compressed buffer in a codec envelope string. |
| 100 | + * Use this when you have pre-compressed bytes and want to avoid compressing twice. |
| 101 | + * |
| 102 | + * `preservedFields`, when provided, are emitted as plaintext siblings of the codec |
| 103 | + * fields (`{ ...preserved, __mqtCodec, __mqtData }`). Publishers use this to keep |
| 104 | + * identity/routing fields (`id`, `type`, …) visible on the wire so broker-side |
| 105 | + * filtering (e.g. SNS body-scoped FilterPolicy) still works on compressed messages — |
| 106 | + * the same fields an offloaded-payload pointer carries. The codec fields are written |
| 107 | + * last, so a colliding preserved key can never corrupt the envelope; consumers ignore |
| 108 | + * the preserved siblings and decode `__mqtData` only. |
| 109 | + * |
| 110 | + * Without `preservedFields` the fast path uses string concatenation instead of |
| 111 | + * JSON.stringify, avoiding an intermediate object — the base64 string and the |
| 112 | + * envelope string are the only two allocations on the inline path. |
| 113 | + * |
| 114 | + * `codecName` must already be a JSON-safe identifier (see {@link getCodecName}, |
| 115 | + * which is enforced for every registration before it reaches this function). |
| 116 | + */ |
| 117 | +export function buildCodecEnvelope( |
| 118 | + compressed: Buffer, |
| 119 | + codecName: string, |
| 120 | + preservedFields?: Record<string, unknown>, |
| 121 | +): string { |
| 122 | + const data = compressed.toString('base64') |
| 123 | + if (!preservedFields || Object.keys(preservedFields).length === 0) { |
| 124 | + return `{"__mqtCodec":"${codecName}","__mqtData":"${data}"}` |
| 125 | + } |
| 126 | + // Preserved fields present: a single JSON.stringify handles all value escaping. |
| 127 | + // Codec fields are listed last so they always win over any colliding preserved key. |
| 128 | + return JSON.stringify({ ...preservedFields, __mqtCodec: codecName, __mqtData: data }) |
| 129 | +} |
0 commit comments