diff --git a/.gitignore b/.gitignore index f9c5983..31492af 100644 --- a/.gitignore +++ b/.gitignore @@ -96,6 +96,8 @@ deps/* # Claude Code local settings .claude/ +.claude-recovery.md +.workflow-state.json # Serena MCP cache .serena/ diff --git a/TODO.md b/TODO.md index d1446de..df12489 100644 --- a/TODO.md +++ b/TODO.md @@ -2,7 +2,7 @@ ## In Progress -_None_ +- [ ] 🟑 [Refactor] REFACTOR-BIOME-2026-04-29 (branch `fix/biome-warnings-sweep`, 8 commits) β€” 63β†’1 biome warnings (-98.4%). Phases 1-5b-2 all green: pre-push opus senior-review pending then push + PR + Copilot. Started 2026-04-29. ## Pending - HIGH @@ -14,7 +14,9 @@ _None._ ## Pending - LOW (Nice to Have) -- [ ] [Lint] Biome warnings sweep (6 total, all `pnpm exec biome lint` exits 0 β€” warnings only, ran via `rtk proxy biome lint` workaround on 2026-04-28). Two from PR #111: `test/wasm/decompress-memlimit.test.ts:30` (`useTemplate` β€” string concat β†’ template literal, biome FIXABLE) + `test/wasm/decompress-memlimit.test.ts:141` (`noNonNullAssertion` on `result!` in callback success test, replace with assertion guard). Four pre-existing on master: `src/errors.ts:176` `noNonNullAssertion` (`messages[errno]!`), `src/lzma.ts:63` + `src/pool.ts:20` `noImportCycles` (lzma↔pool re-export cycle, already noted in MEMORY.md as "benign β€” ESM resolves at runtime"), `src/pool.ts:166` `noNonNullAssertion` (`this.queue.shift()!` after empty-check). Priority: L (cosmetic; can batch with another pass). Note: lint pipeline is silently broken until RTK biome bug fixes (workaround: `rtk proxy biome ...`). +- [ ] [Lint] Single residual biome warning: `test/node-api.spec.ts:249` (`suppressions/unused` β€” pre-existing biome-ignore that no longer suppresses anything). Cosmetic 1-line cleanup for a future PR. + + ## Completed diff --git a/docs/plans/WIN32-TOCTOU-2026-04-29.md b/docs/plans/WIN32-TOCTOU-2026-04-29.md index 4b4260b..fb7af82 100644 --- a/docs/plans/WIN32-TOCTOU-2026-04-29.md +++ b/docs/plans/WIN32-TOCTOU-2026-04-29.md @@ -23,13 +23,17 @@ doc-meta: ## Β§1 Scope **What changes:** Replace the by-path Win32 fallback in -`packages/tar-xz/src/node/file.ts:328-346` with a fail-closed +`packages/tar-xz/src/node/file.ts` (originally lines 328-346 at PR #114 +merge `b24040d`; post-REFACTOR-BIOME-2026-04-29 the fail-closed path +lives in private helpers `openFileExclusive` and `writeFileEntryWin32`, +delegated by `writeFileEntry` from `extractFile`) with a fail-closed JS-pure handle-based path that uses Node's `'wx'` flag (atomic exclusive create) plus an unlink-then-retry pattern for legitimate overwrite. fd-based `chmod` and `utimes` replace by-path equivalents. **What does NOT change:** -- POSIX path (lines 292-327) β€” unchanged. Already uses `O_NOFOLLOW`. +- POSIX path (originally lines 292-327; now in `writeFileEntryPosix`) + β€” unchanged. Already uses `O_NOFOLLOW`. - Leaf-symlink lstat check upstream β€” unchanged. Still rejects pre-existing symlink targets before this code runs. - Public API of `extract()` / `extractFile()` β€” unchanged. diff --git a/examples/browser/main.ts b/examples/browser/main.ts index 9127cff..92b736f 100644 --- a/examples/browser/main.ts +++ b/examples/browser/main.ts @@ -21,7 +21,9 @@ import { // --- Logging helpers --- -const logEl = document.getElementById('log')!; +const logElRaw = document.getElementById('log'); +if (!logElRaw) throw new Error('Missing required DOM element: #log'); +const logEl = logElRaw; function log(msg: string, cls: string = '') { const span = document.createElement('span'); diff --git a/packages/nxz/src/nxz.ts b/packages/nxz/src/nxz.ts index 3b4ed7d..b17e0a3 100644 --- a/packages/nxz/src/nxz.ts +++ b/packages/nxz/src/nxz.ts @@ -111,7 +111,8 @@ function parseCliArgs(args: string[]): CliOptions { for (const arg of args) { const presetMatch = arg.match(/^-(\d)$/); if (presetMatch) { - presetLevel = Number.parseInt(presetMatch[1]!, 10); + const digit = presetMatch[1]; + presetLevel = digit !== undefined ? Number.parseInt(digit, 10) : 6; } else { filteredArgs.push(arg); } @@ -707,14 +708,19 @@ async function listTarFile(filename: string, options: CliOptions): Promise p.split('/')); const common: string[] = []; - const first = parts[0]!; + const first = parts[0]; + if (first === undefined) return process.cwd(); for (let i = 0; i < first.length; i++) { const segment = first[i]; - if (parts.every((p) => p[i] === segment)) { - common.push(segment!); + if (segment !== undefined && parts.every((p) => p[i] === segment)) { + common.push(segment); } else { break; } @@ -733,7 +739,17 @@ function resolveTarOutput( ): string | null { let outputFile = options.output; if (!outputFile) { - const base = pathModule.basename(files[0]!).replace(/\/$/, ''); + // Invariant violation must fail-fast: an empty `files` array would + // otherwise silently produce an output named `.tar.xz` (just the + // suffix). Throw with a descriptive message so callers learn the + // contract instead of inheriting a degenerate output name. + const firstFile = files[0]; + if (firstFile === undefined) { + throw new Error( + 'resolveTarOutput requires at least one input file when no output path is provided' + ); + } + const base = pathModule.basename(firstFile).replace(/\/$/, ''); outputFile = `${base}.tar.xz`; } @@ -752,16 +768,37 @@ function resolveArchiveCwd( resolvedFiles: string[], pathModule: typeof import('node:path') ): string { - if (resolvedFiles.length === 1 && statSync(resolvedFiles[0]!).isDirectory()) { - return resolvedFiles[0]!; + const firstFile = resolvedFiles[0]; + if (resolvedFiles.length === 1 && firstFile !== undefined && statSync(firstFile).isDirectory()) { + return firstFile; } const parents = resolvedFiles.map((f) => (statSync(f).isDirectory() ? f : pathModule.dirname(f))); - return parents.length === 1 ? parents[0]! : findCommonParent(parents); + if (parents.length === 1) { + const parent = parents[0]; + return parent !== undefined ? parent : findCommonParent(resolvedFiles); + } + return findCommonParent(parents); } /** * Collect all files to include in a tar archive, relative to cwd. */ + +/** + * Build the relative archive path for a directory entry. + */ +function buildEntryPath( + entry: { parentPath: string; name: string }, + dirAbsPath: string, + dirRelative: string +): string { + const entryPath = + entry.parentPath === dirAbsPath + ? entry.name + : `${entry.parentPath.slice(dirAbsPath.length + 1)}/${entry.name}`; + return dirRelative ? `${dirRelative}/${entryPath}` : entryPath; +} + async function collectArchiveFiles( resolvedFiles: string[], cwd: string, @@ -775,11 +812,7 @@ async function collectArchiveFiles( const dirRelative = pathModule.relative(cwd, file); for (const entry of entries) { if (entry.isFile()) { - const entryPath = - entry.parentPath === file - ? entry.name - : `${entry.parentPath.slice(file.length + 1)}/${entry.name}`; - filesToArchive.push(dirRelative ? `${dirRelative}/${entryPath}` : entryPath); + filesToArchive.push(buildEntryPath(entry, file, dirRelative)); } } } else { @@ -991,8 +1024,16 @@ async function main(): Promise { process.exit(exitCode); } - // Check for tar-create mode: -T with files that aren't .tar.xz archives - const mode = determineMode(options, options.files[0]!); + // Check for tar-create mode: -T with files that aren't .tar.xz archives. + // The stdin check above already exits when files.length === 0, so reaching + // here means files[0] is defined. Fail-fast on a future invariant breach + // (e.g. argument-parsing changes) instead of silently dispatching with an + // empty filename. + const firstFile = options.files[0]; + if (firstFile === undefined) { + throw new Error('Internal error: expected at least one input file after stdin handling'); + } + const mode = determineMode(options, firstFile); if (mode === 'tar-create') { const exitCode = await createTarFile(options.files, options); process.exit(exitCode); diff --git a/packages/tar-xz/src/browser/extract.ts b/packages/tar-xz/src/browser/extract.ts index 3661277..3726e12 100644 --- a/packages/tar-xz/src/browser/extract.ts +++ b/packages/tar-xz/src/browser/extract.ts @@ -25,6 +25,35 @@ import { /** * Parse TAR data into entries */ + +/** + * Handle a PAX_HEADER block: parse its attributes and advance the offset. + * Returns the updated offset and the parsed PAX attributes. + */ +function parsePaxHeaderBlock( + data: Uint8Array, + offset: number, + size: number +): { offset: number; paxAttrs: PaxAttributes } { + const paxData = data.subarray(offset, offset + size); + const newOffset = offset + size + calculatePadding(size); + return { offset: newOffset, paxAttrs: parsePaxData(paxData) }; +} + +/** + * Extract entry content bytes and advance the offset past the content + padding. + * Returns the updated offset and the content Uint8Array. + */ +function extractEntryContent( + data: Uint8Array, + offset: number, + size: number +): { offset: number; contentData: Uint8Array } { + const contentData = size > 0 ? data.subarray(offset, offset + size) : new Uint8Array(0); + const newOffset = offset + size + calculatePadding(size); + return { offset: newOffset, contentData }; +} + function parseTar(data: Uint8Array): Array { const entries: Array = []; let offset = 0; @@ -54,10 +83,7 @@ function parseTar(data: Uint8Array): Array { // Handle PAX headers if (entry.type === TarEntryType.PAX_HEADER) { - const paxSize = entry.size; - const paxData = data.subarray(offset, offset + paxSize); - offset += paxSize + calculatePadding(paxSize); - paxAttrs = parsePaxData(paxData); + ({ offset, paxAttrs } = parsePaxHeaderBlock(data, offset, entry.size)); continue; } @@ -73,10 +99,8 @@ function parseTar(data: Uint8Array): Array { } // Extract content - const contentData = - entry.size > 0 ? data.subarray(offset, offset + entry.size) : new Uint8Array(0); - - offset += entry.size + calculatePadding(entry.size); + let contentData: Uint8Array; + ({ offset, contentData } = extractEntryContent(data, offset, entry.size)); entries.push({ ...entry, data: contentData }); } diff --git a/packages/tar-xz/src/node/extract.ts b/packages/tar-xz/src/node/extract.ts index 0ca7926..9b56d07 100644 --- a/packages/tar-xz/src/node/extract.ts +++ b/packages/tar-xz/src/node/extract.ts @@ -102,6 +102,152 @@ function makeTarEntryWithData( }; } +/** + * Pull the next event from `parser`, respecting any pending lookahead. + * + * Extracted from `extract()` to reduce cognitive complexity. Reads from + * `lookaheadRef.value` first; clears it on consumption. + */ +async function nextParseEvent( + parser: AsyncGenerator, + lookaheadRef: { value: ParseEvent | null } +): Promise> { + if (lookaheadRef.value !== null) { + const ev = lookaheadRef.value; + lookaheadRef.value = null; + return { value: ev, done: false }; + } + return parser.next(); +} + +/** + * Drain all remaining 'chunk' events for the current entry from `parser`. + * + * The terminating 'entry' or 'end' event is stored in `lookaheadRef.value`. + * Extracted from `extract()` to reduce cognitive complexity; behavior is + * byte-identical. + */ +async function drainEntryChunks( + parser: AsyncGenerator, + lookaheadRef: { value: ParseEvent | null } +): Promise { + while (true) { + const result = await parser.next(); + if (result.done) return; + if (result.value.kind !== 'chunk') { + lookaheadRef.value = result.value; + return; + } + } +} + +/** + * Drain remaining chunks for an entry that the consumer did not fully read, + * swallowing non-fatal errors per the D-2 policy. + * + * TAR_PARSER_INVARIANT errors always re-throw (D-5) β€” they represent a corrupt + * archive or internal invariant violation, not a recoverable I/O error. + * All other errors are swallowed: they arise from decoding/IO on data the + * consumer intentionally skipped, so propagating them would be surprising. + * + * Extracted from `extract()` to reduce cognitive complexity; behavior is + * byte-identical to the inline drain-with-swallow block. + */ +async function drainSkippedEntry( + parser: AsyncGenerator, + lookaheadRef: { value: ParseEvent | null } +): Promise { + try { + await drainEntryChunks(parser, lookaheadRef); + } catch (err) { + // Decode/IO error during skipped data β€” swallow per D-2. + // TAR_PARSER_INVARIANT always re-throws per D-5. + if ((err as { code?: string }).code === 'TAR_PARSER_INVARIANT') { + throw err; + } + // Swallow other errors from skipped data per D-2. + } +} + +/** + * Build a per-entry data-pull factory that reads 'chunk' events from `parser`. + * + * Returns a `make()` function (same contract as the inline `makeDataGen` it + * replaces) that, when called, returns a single-use `AsyncGenerator` + * backed by the outer `parseTar` generator. When chunks are exhausted, the + * terminating 'entry' or 'end' event is stored in `lookaheadRef.value`. + * A `dataGenInFlight` closure flag guards against concurrent iteration of the + * same entry's data (D-5 / TAR_PARSER_INVARIANT contract). + * + * Extracted from `extract()` to reduce cognitive complexity; behavior is + * byte-identical. The outer generator is suspended at `yield entryWithData` + * while the consumer iterates the returned generator β€” natural backpressure. + */ +function createEntryDataPull( + parser: AsyncGenerator, + lookaheadRef: { value: ParseEvent | null } +): () => AsyncGenerator { + let dataGenInFlight = false; + return function makeDataGen(): AsyncGenerator { + if (dataGenInFlight) { + // D-5 invariant violation: triggered if the same entry's data generator + // is created twice (`makeDataGen()` called more than once for one + // entry). The current `makeTarEntryWithData` design calls `dataPull()` + // exactly once per entry and never exposes `makeDataGen` to consumers, + // so this branch is effectively unreachable in production. The + // `code: 'TAR_PARSER_INVARIANT'` attribute matches the convention used + // by other invariant errors in this module (e.g. stray-chunk in extract, + // size-mismatch in bytes()) and keeps downstream filters consistent. + const err = new Error('concurrent entry.data iteration is not supported') as Error & { + code?: string; + }; + err.code = 'TAR_PARSER_INVARIANT'; + throw err; + } + dataGenInFlight = true; + return (async function* () { + try { + while (true) { + const r = await parser.next(); + if (r.done) return; + if (r.value.kind === 'chunk') { + yield r.value.data; + } else { + // 'entry' or 'end' β€” store for outer loop. + lookaheadRef.value = r.value; + return; + } + } + } finally { + dataGenInFlight = false; + } + })(); + }; +} + +/** + * Apply the user-supplied `filter` predicate, if any, to decide if an entry + * should be skipped. Returns true when filter is provided AND the predicate + * returns ANY falsy value (false / 0 / '' / null / undefined). This preserves + * the historical `filter && !filter(entry)` semantics β€” a stricter `=== false` + * check would treat non-false falsies as "include", which diverges from both + * the documented contract and the browser-side implementation. + * + * Extracted from `extract()` to keep the outer loop under biome's cognitive + * complexity threshold. + */ +function isExcludedByFilter( + entry: TarEntry, + filter: ((e: TarEntry) => boolean) | undefined +): boolean { + // `typeof === 'function'` over `!== undefined` is mandatory for null-safe + // semantics. The original `filter && !filter(entry)` short-circuited on ANY + // falsy filter (false / 0 / '' / null / undefined). A `null !== undefined` + // check would be true β†’ call filter(null) β†’ runtime crash. The `typeof` + // gate matches the browser-side `extract()` and the documented contract. + return typeof filter === 'function' && !filter(entry); +} + /** * Extract a tar.xz archive. * @@ -119,7 +265,6 @@ function makeTarEntryWithData( * } * ``` */ -// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: streaming generator with strip/filter/drain logic β€” complexity is intrinsic export async function* extract( input: TarInputNode, options: ExtractOptions = {} @@ -132,34 +277,13 @@ export async function* extract( // Lookahead: an event pulled from parseTar that hasn't been processed yet. // This allows the data-generator to consume chunks and then "return" the // terminating event (entry/end) for the outer loop to process. - let lookahead: ParseEvent | null = null; - - /** Pull next event from parser, respecting any pending lookahead. */ - async function nextEvent(): Promise> { - if (lookahead !== null) { - const ev = lookahead; - lookahead = null; - return { value: ev, done: false }; - } - return parser.next(); - } - - /** Drain all remaining 'chunk' events for the current entry from parseTar. - * The terminating 'entry' or 'end' event is stored in `lookahead`. */ - async function drainChunks(): Promise { - while (true) { - const result = await parser.next(); - if (result.done) return; - if (result.value.kind !== 'chunk') { - lookahead = result.value; - return; - } - } - } + // Wrapped in a ref-object so top-level helpers can read/write it without + // capturing `extract`'s local scope (which would add nesting complexity). + const lookaheadRef: { value: ParseEvent | null } = { value: null }; try { while (true) { - const result = await nextEvent(); + const result = await nextParseEvent(parser, lookaheadRef); if (result.done) break; const ev = result.value; @@ -175,44 +299,21 @@ export async function* extract( const rawEntry = ev.entry; const strippedName = stripPath(rawEntry.name, strip); if (!strippedName) { - await drainChunks(); + await drainEntryChunks(parser, lookaheadRef); continue; } const strippedEntry = { ...rawEntry, name: strippedName }; - if (filter && !filter(strippedEntry)) { - await drainChunks(); + if (isExcludedByFilter(strippedEntry, filter)) { + await drainEntryChunks(parser, lookaheadRef); continue; } // Build a data generator that pulls 'chunk' events from the parseTar stream. - // When chunks are exhausted it stores the next 'entry'/'end' in `lookahead`. + // When chunks are exhausted it stores the next 'entry'/'end' in lookaheadRef. // The outer generator is suspended at `yield entryWithData` while the consumer // iterates this β€” natural backpressure. - let dataGenInFlight = false; - function makeDataGen(): AsyncGenerator { - if (dataGenInFlight) { - throw new Error('concurrent entry.data iteration is not supported'); - } - dataGenInFlight = true; - return (async function* () { - try { - while (true) { - const r = await parser.next(); - if (r.done) return; - if (r.value.kind === 'chunk') { - yield r.value.data; - } else { - // 'entry' or 'end' β€” store for outer loop. - lookahead = r.value; - return; - } - } - } finally { - dataGenInFlight = false; - } - })(); - } + const makeDataGen = createEntryDataPull(parser, lookaheadRef); const entryWithData = makeTarEntryWithData(strippedEntry, makeDataGen); yield entryWithData; @@ -220,19 +321,9 @@ export async function* extract( // After the consumer advances past this entry, drain any remaining chunks // that the consumer did not read (S-08 auto-drain, Case A per Β§12.4). // If the data generator was fully consumed, lookahead is already set. - // If not, drain now. - if (lookahead === null) { - // Consumer did not fully iterate entry.data β€” drain remaining chunks. - try { - await drainChunks(); - } catch (err) { - // Decode/IO error during skipped data β€” swallow per D-2. - // TAR_PARSER_INVARIANT always re-throws per D-5. - if ((err as { code?: string }).code === 'TAR_PARSER_INVARIANT') { - throw err; - } - // Swallow other errors from skipped data per D-2. - } + // If not, drain now (swallowing non-fatal errors per D-2, re-throwing D-5). + if (lookaheadRef.value === null) { + await drainSkippedEntry(parser, lookaheadRef); } } } finally { diff --git a/packages/tar-xz/src/node/file.ts b/packages/tar-xz/src/node/file.ts index db90c5a..15b3ed7 100644 --- a/packages/tar-xz/src/node/file.ts +++ b/packages/tar-xz/src/node/file.ts @@ -13,7 +13,7 @@ import { link, lstat, mkdir, open, symlink, unlink } from 'node:fs/promises'; import { dirname, isAbsolute, relative, resolve, sep } from 'node:path'; import { Readable } from 'node:stream'; import { pipeline } from 'node:stream/promises'; -import type { CreateOptions, ExtractOptions, TarEntry } from '../types.js'; +import type { CreateOptions, ExtractOptions, TarEntry, TarEntryWithData } from '../types.js'; import { TarEntryType } from '../types.js'; import { create } from './create.js'; import { extract } from './extract.js'; @@ -46,6 +46,21 @@ function ensureSafeName(s: string | undefined, label: string): void { } } +/** + * V6b: Validate `entry.linkname` for SYMLINK and HARDLINK entries. + * + * `TarEntry.linkname` is always present as a string (parser sets `''` for + * empty link fields). Only SYMLINK and HARDLINK entries are required to have + * non-empty linknames β€” other entry types may legitimately carry an empty + * linkname. Delegates to `ensureSafeName` for the actual rejection logic + * (empty-string + NUL-byte + dot-segment guards). + */ +function ensureSafeLinkname(entry: TarEntry): void { + if (entry.type === TarEntryType.SYMLINK || entry.type === TarEntryType.HARDLINK) { + ensureSafeName(entry.linkname, 'linkname'); + } +} + /** * S3 (TOCTOU guard): Check whether any ancestor directory of `filePath` (up to * but not including `root`) is a symlink. If so, a malicious archive could first @@ -97,7 +112,7 @@ async function ensureSafeTarget( // F-001: safe traversal check β€” only reject when the relative path IS '..' or // starts with '../' (POSIX) / '..\' (Windows). Dotfiles like '..gitignore' are fine. const rel = relative(cwd, target); - if (rel === '..' || rel.startsWith('..' + sep) || isAbsolute(rel)) { + if (rel === '..' || rel.startsWith(`..${sep}`) || isAbsolute(rel)) { throw new Error(`Refusing to extract entry outside cwd: ${entryName}`); } // F-002: TOCTOU guard β€” reject if any ancestor directory is a symlink. @@ -121,6 +136,277 @@ async function ensureSafeTarget( } } +/** + * Write a SYMLINK entry to `target`. + * + * Applies `strip` to linkname if requested (V6c / V14), removes an existing + * symlink if present (allow re-extract), then creates the new symlink. + * Skips silently when strip removes the entire linkname (caller should + * continue to next entry regardless β€” the skip-or-create decision is always + * followed by `continue` in `extractFile`). + * + * Extracted from `extractFile` to reduce cognitive complexity; behavior is + * byte-identical to the original inline SYMLINK branch. + */ +async function extractSymlinkEntry( + target: string, + entry: TarEntryWithData, + strip: number +): Promise { + // S3 (TOCTOU): symlinks pointing outside cwd could be used to redirect + // subsequent file writes (e.g. archive creates linkβ†’/etc, then writes link/file). + // `entry.linkname` has already been syntactically validated upstream by + // `ensureSafeLinkname()` (non-empty, no NUL byte, not a dot-segment placeholder). + // What we do NOT enforce here is that the symlink target stays within `cwd` β€” + // symlinks can legitimately point to relative paths inside the archive. The + // TOCTOU risk comes from follow-up entries, which are guarded by + // `ensureSafeTarget()` on each extraction target. + + // V6c / V14: apply strip to SYMLINK linkname, consistent with HARDLINK treatment. + let strippedLinkname = entry.linkname; + if (strip && strippedLinkname) { + const parts = strippedLinkname.split('/').filter(Boolean); + strippedLinkname = parts.slice(strip).join('/'); + if (!strippedLinkname) return; // stripped away β€” skip entry + } + + await mkdir(dirname(target), { recursive: true }); + // Remove existing symlink if present (allow re-extract). Narrow catch to ENOENT only. + try { + await unlink(target); + } catch (err) { + if ((err as NodeJS.ErrnoException).code !== 'ENOENT') throw err; + } + await symlink(strippedLinkname, target); +} + +/** + * Write a HARDLINK entry to `target`. + * + * Applies `strip` to linkname, validates that the link source does not escape + * `cwd`, rejects symlink sources (R5-1), checks for symlink ancestors (TOCTOU), + * then creates the hardlink. + * Skips silently when strip removes the entire linkname (caller should + * continue to next entry regardless β€” the skip-or-create decision is always + * followed by `continue` in `extractFile`). + * + * Extracted from `extractFile` to reduce cognitive complexity; behavior is + * byte-identical to the original inline HARDLINK branch. + */ +async function extractHardlinkEntry( + target: string, + entry: TarEntryWithData, + cwd: string, + strip: number +): Promise { + // R4-2: apply the same strip logic to linkname as to entry.name, so that + // hardlink targets are consistent with stripped extraction paths. + let strippedLinkname = entry.linkname; + if (strip && strippedLinkname) { + const parts = strippedLinkname.split('/').filter(Boolean); + strippedLinkname = parts.slice(strip).join('/'); + if (!strippedLinkname) return; // link target stripped away β€” skip entry + } + // S2: validate linkname β€” it must not escape cwd (absolute paths or ".." segments). + const linkSource = resolve(cwd, strippedLinkname); + const linkRel = relative(cwd, linkSource); + if (linkRel === '..' || linkRel.startsWith(`..${sep}`) || isAbsolute(linkRel)) { + throw new Error(`Refusing hardlink outside cwd: ${entry.linkname}`); + } + // R5-1: reject if linkSource itself is a symlink β€” the kernel would follow it + // and create a hardlink to whatever the symlink points to (possibly outside cwd). + // ENOENT is fine (linkname may point to a not-yet-extracted file); rethrow others. + let linkSrcStat: Awaited> | null = null; + try { + linkSrcStat = await lstat(linkSource); + } catch (err) { + if ((err as NodeJS.ErrnoException).code !== 'ENOENT') throw err; + } + if (linkSrcStat?.isSymbolicLink()) { + throw new Error( + `Refusing hardlink: source '${entry.linkname}' is a symlink (would resolve outside cwd)` + ); + } + // R5-1: reject if any ancestor of linkSource is a symlink (TOCTOU risk). + if (await hasSymlinkAncestor(linkSource, cwd)) { + throw new Error( + `Refusing hardlink: source '${entry.linkname}' has a symlink ancestor (TOCTOU risk)` + ); + } + await mkdir(dirname(target), { recursive: true }); + // Remove existing file if present (allow re-extract). Narrow catch to ENOENT only. + try { + await unlink(target); + } catch (err) { + if ((err as NodeJS.ErrnoException).code !== 'ENOENT') throw err; + } + await link(linkSource, target); +} + +/** + * POSIX implementation of FILE entry write (used by `writeFileEntry`). + * + * Opens with `O_WRONLY | O_CREAT | O_TRUNC | O_NOFOLLOW`. `O_NOFOLLOW` + * prevents opening a symlink at the leaf β€” if `ensureSafeTarget` somehow + * missed one, the OS rejects it here (V2 / V3). + * `chmod` and `utimes` are fd-based and strict (errors propagate). + * `handle.close()` is always called in `finally`. + */ +async function writeFileEntryPosix( + target: string, + entry: TarEntryWithData, + fileMode: number +): Promise { + let handle: Awaited>; + try { + handle = await open( + target, + fsConstants.O_WRONLY | fsConstants.O_CREAT | fsConstants.O_TRUNC | fsConstants.O_NOFOLLOW, + fileMode + ); + } catch (err) { + if ((err as NodeJS.ErrnoException).code === 'ELOOP') { + throw new Error(`Refusing '${entry.name}': target path is an existing symlink (O_NOFOLLOW)`); + } + throw err; + } + try { + // Collect all chunks from the async generator and write via fd. + // Using direct handle.write() calls avoids stream lifecycle issues + // with autoClose:false + pipeline in some Node versions. + for await (const chunk of entry.data) { + await handle.write(chunk as Uint8Array); + } + // fd-based chmod and utimes β€” do NOT follow symlinks (and there can't be one: + // O_NOFOLLOW would have errored on open). + await handle.chmod(fileMode); + if (entry.mtime > 0) { + const mt = new Date(entry.mtime * 1000); + await handle.utimes(mt, mt); + } + } finally { + await handle.close(); + } +} + +/** + * Open `target` with 'wx' (O_CREAT | O_EXCL β€” atomic exclusive create) for + * the Win32 FILE entry write path. + * + * Strategy: + * 1. Attempt `open(target, 'wx', fileMode)`. + * 2. On `EEXIST` (target exists β€” legitimate overwrite): unlink then retry. + * - Ignore `ENOENT` on unlink (target disappeared between attempts β€” OK). + * 3. If the retry also fails with `EEXIST`, a symlink/junction was injected + * between our unlink and our retry-open. Throw fail-closed security error. + * + * This implements the W1 β†’ W2 window closure from WIN32-TOCTOU-2026-04-29 Β§3. + * The returned `FileHandle` is exclusively created β€” no by-path swap can occur + * on writes via the handle. + */ +async function openFileExclusive( + target: string, + entryName: string, + fileMode: number +): Promise>> { + try { + return await open(target, 'wx', fileMode); + } catch (firstErr) { + if ((firstErr as NodeJS.ErrnoException).code !== 'EEXIST') throw firstErr; + // Target exists β€” legitimate overwrite: unlink then retry. + // If the target disappears between the failed open() and unlink(), ignore + // ENOENT and still retry the atomic exclusive create. + try { + await unlink(target); + } catch (unlinkErr) { + if ((unlinkErr as NodeJS.ErrnoException).code !== 'ENOENT') throw unlinkErr; + } + try { + return await open(target, 'wx', fileMode); + } catch (retryErr) { + if ((retryErr as NodeJS.ErrnoException).code === 'EEXIST') { + // A symlink (or junction) was injected between our unlink and our open. + // Fail-closed: do NOT write through the symlink. + throw new Error( + `Security error: target still exists on retry for '${entryName}' β€” ` + + `possible symlink/junction injection or concurrent creation at the target path between unlink and open` + ); + } + throw retryErr; + } + } +} + +/** + * Windows implementation of FILE entry write (used by `writeFileEntry`). + * + * Opens exclusively via `openFileExclusive` ('wx' + unlink+retry fail-closed), + * then writes all chunks via fd, then does best-effort fd-based chmod/utimes. + * `handle.close()` is always called in `finally`. + * + * Threat model (W1-W4 per WIN32-TOCTOU-2026-04-29 Β§3): + * W1: lstat check β†’ open() ~ms CLOSED by 'wx' atomic create + * W2: open() β†’ last byte per-chunk streaming window β†’ CLOSED by 'wx' fd + * W3: last byte β†’ chmod ~ms CLOSED: fd-based handle.chmod() + * W4: chmod β†’ utimes ~ms CLOSED: fd-based handle.utimes() + * See SECURITY.mdΒ§"Windows symlink-swap TOCTOU" for the full reparse-tag table. + */ +async function writeFileEntryWin32( + target: string, + entry: TarEntryWithData, + fileMode: number +): Promise { + const handle = await openFileExclusive(target, entry.name, fileMode); + try { + // Write all chunks via fd β€” by-path swap after open() cannot redirect writes. + for await (const chunk of entry.data) { + await handle.write(chunk as Uint8Array); + } + // fd-based chmod and utimes to avoid any by-path follow after write. + // On Windows these metadata updates are best-effort: some filesystems can + // reject them (for example with EPERM) even when the file contents were + // written successfully. + try { + await handle.chmod(fileMode); + } catch { + // Best-effort on Windows. + } + if (entry.mtime > 0) { + const mt = new Date(entry.mtime * 1000); + try { + await handle.utimes(mt, mt); + } catch { + // Best-effort on Windows. + } + } + } finally { + await handle.close(); + } +} + +/** + * Write a FILE entry to `target` using fd-based I/O. + * + * Dispatches to `writeFileEntryPosix` (O_NOFOLLOW) or `writeFileEntryWin32` + * ('wx' atomic-create + unlink+retry fail-closed) based on `process.platform`. + * See the per-platform helpers for the full TOCTOU threat model documentation. + * + * @param target - Validated absolute target path (caller has already run `ensureSafeTarget`) + * @param entry - The tar entry (used for `entry.name`, `entry.data`, `entry.mtime`) + * @param fileMode - Mode already masked with `SAFE_MODE_MASK` (setuid/setgid/sticky stripped) + */ +async function writeFileEntry( + target: string, + entry: TarEntryWithData, + fileMode: number +): Promise { + if (process.platform !== 'win32') { + await writeFileEntryPosix(target, entry, fileMode); + } else { + await writeFileEntryWin32(target, entry, fileMode); + } +} + /** * Create a tar.xz archive on disk from a list of source files. * @@ -183,17 +469,12 @@ export async function extractFile( ): Promise { const cwd = resolve(options.cwd ?? process.cwd()); const archiveStream = createReadStream(archivePath); + const strip = options.strip ?? 0; for await (const entry of extract(archiveStream, options)) { // V6a / V6b: reject empty or NUL-containing names/linknames early, before any path math. ensureSafeName(entry.name, 'name'); - // For SYMLINK and HARDLINK, also validate linkname (but only when it must be non-empty). - if ( - (entry.type === TarEntryType.SYMLINK || entry.type === TarEntryType.HARDLINK) && - entry.linkname !== undefined - ) { - ensureSafeName(entry.linkname, 'linkname'); - } + ensureSafeLinkname(entry); const target = resolve(cwd, entry.name); @@ -212,74 +493,12 @@ export async function extractFile( } if (entry.type === TarEntryType.SYMLINK) { - // S3 (TOCTOU): symlinks pointing outside cwd could be used to redirect - // subsequent file writes (e.g. archive creates linkβ†’/etc, then writes link/file). - // We do NOT validate entry.linkname here because symlinks can legitimately - // point to relative paths inside the archive; the TOCTOU risk comes from - // follow-up entries β€” those are guarded by ensureSafeTarget() on each entry. - - // V6c / V14: apply strip to SYMLINK linkname, consistent with HARDLINK treatment. - let strippedLinkname = entry.linkname; - if (options.strip && strippedLinkname) { - const parts = strippedLinkname.split('/').filter(Boolean); - strippedLinkname = parts.slice(options.strip).join('/'); - if (!strippedLinkname) continue; - } - - await mkdir(dirname(target), { recursive: true }); - // Remove existing symlink if present (allow re-extract). Narrow catch to ENOENT only. - try { - await unlink(target); - } catch (err) { - if ((err as NodeJS.ErrnoException).code !== 'ENOENT') throw err; - } - await symlink(strippedLinkname, target); + await extractSymlinkEntry(target, entry, strip); continue; } if (entry.type === TarEntryType.HARDLINK) { - // R4-2: apply the same strip logic to linkname as to entry.name, so that - // hardlink targets are consistent with stripped extraction paths. - let strippedLinkname = entry.linkname; - if (options.strip && strippedLinkname) { - const parts = strippedLinkname.split('/').filter(Boolean); - strippedLinkname = parts.slice(options.strip).join('/'); - if (!strippedLinkname) continue; // link target stripped away β€” skip entry - } - // S2: validate linkname β€” it must not escape cwd (absolute paths or ".." segments). - const linkSource = resolve(cwd, strippedLinkname); - const linkRel = relative(cwd, linkSource); - if (linkRel === '..' || linkRel.startsWith('..' + sep) || isAbsolute(linkRel)) { - throw new Error(`Refusing hardlink outside cwd: ${entry.linkname}`); - } - // R5-1: reject if linkSource itself is a symlink β€” the kernel would follow it - // and create a hardlink to whatever the symlink points to (possibly outside cwd). - // ENOENT is fine (linkname may point to a not-yet-extracted file); rethrow others. - let linkSrcStat: Awaited> | null = null; - try { - linkSrcStat = await lstat(linkSource); - } catch (err) { - if ((err as NodeJS.ErrnoException).code !== 'ENOENT') throw err; - } - if (linkSrcStat?.isSymbolicLink()) { - throw new Error( - `Refusing hardlink: source '${entry.linkname}' is a symlink (would resolve outside cwd)` - ); - } - // R5-1: reject if any ancestor of linkSource is a symlink (TOCTOU risk). - if (await hasSymlinkAncestor(linkSource, cwd)) { - throw new Error( - `Refusing hardlink: source '${entry.linkname}' has a symlink ancestor (TOCTOU risk)` - ); - } - await mkdir(dirname(target), { recursive: true }); - // Remove existing file if present (allow re-extract). Narrow catch to ENOENT only. - try { - await unlink(target); - } catch (err) { - if ((err as NodeJS.ErrnoException).code !== 'ENOENT') throw err; - } - await link(linkSource, target); + await extractHardlinkEntry(target, entry, cwd, strip); continue; } @@ -290,117 +509,9 @@ export async function extractFile( const fileMode = (entry.mode ?? 0o644) & SAFE_MODE_MASK; // V2 / V3: use file-descriptor-based extraction to eliminate the TOCTOU window - // between write and chmod/utimes. O_NOFOLLOW ensures we never open a symlink β€” - // if the leaf check (Fix 1) somehow missed one, the OS rejects it here. - if (process.platform !== 'win32') { - let handle: Awaited>; - try { - handle = await open( - target, - fsConstants.O_WRONLY | - fsConstants.O_CREAT | - fsConstants.O_TRUNC | - fsConstants.O_NOFOLLOW, - fileMode - ); - } catch (err) { - if ((err as NodeJS.ErrnoException).code === 'ELOOP') { - throw new Error( - `Refusing '${entry.name}': target path is an existing symlink (O_NOFOLLOW)` - ); - } - throw err; - } - try { - // Collect all chunks from the async generator and write via fd. - // Using direct handle.write() calls avoids stream lifecycle issues - // with autoClose:false + pipeline in some Node versions. - for await (const chunk of entry.data) { - await handle.write(chunk as Uint8Array); - } - // fd-based chmod and utimes β€” do NOT follow symlinks (and there can't be one: - // O_NOFOLLOW would have errored on open). - await handle.chmod(fileMode); - if (entry.mtime > 0) { - const mt = new Date(entry.mtime * 1000); - await handle.utimes(mt, mt); - } - } finally { - await handle.close(); - } - } else { - // Windows: O_NOFOLLOW is not available. - // - // Threat model (W1-W4 per WIN32-TOCTOU-2026-04-29 Β§3): - // W1: lstat check β†’ open() ~ms (atomic-create succeeds or EEXIST) - // W2: open() β†’ last byte per-chunk streaming window β†’ CLOSED by 'wx' fd - // W3: last byte β†’ chmod ~ms CLOSED: fd-based handle.chmod() - // W4: chmod β†’ utimes ~ms CLOSED: fd-based handle.utimes() - // - // Strategy: open with 'wx' (O_CREAT | O_EXCL β€” atomic exclusive create). - // β€’ If target does not exist β†’ open succeeds, proceed normally. - // β€’ If target exists (EEXIST) β†’ legitimate overwrite: unlink then retry 'wx'. - // - If retry also fails with EEXIST, an attacker won the race between our - // unlink and our retry-open (injected a symlink). Throw security error. - // β€’ Write, chmod, utimes all via fd (FileHandle) β€” immune to by-path swap. - // - // Residual race: the 'wx' open() syscall itself (atomic, sub-microsecond). - // A junction (IO_REPARSE_TAG_MOUNT_POINT) at the target path makes 'wx' fail - // with EEXIST β†’ unlink β†’ retry β†’ EEXIST (attacker re-injects) β†’ security error. - // See SECURITY.mdΒ§"Windows symlink-swap TOCTOU" for the full reparse-tag table. - let handle: Awaited>; - try { - handle = await open(target, 'wx', fileMode); - } catch (firstErr) { - if ((firstErr as NodeJS.ErrnoException).code !== 'EEXIST') throw firstErr; - // Target exists β€” legitimate overwrite: unlink then retry. - // If the target disappears between the failed open() and unlink(), ignore - // ENOENT and still retry the atomic exclusive create. - try { - await unlink(target); - } catch (unlinkErr) { - if ((unlinkErr as NodeJS.ErrnoException).code !== 'ENOENT') throw unlinkErr; - } - try { - handle = await open(target, 'wx', fileMode); - } catch (retryErr) { - if ((retryErr as NodeJS.ErrnoException).code === 'EEXIST') { - // A symlink (or junction) was injected between our unlink and our open. - // Fail-closed: do NOT write through the symlink. - throw new Error( - `Security error: target still exists on retry for '${entry.name}' β€” ` + - `possible symlink/junction injection or concurrent creation at the target path between unlink and open` - ); - } - throw retryErr; - } - } - try { - // Write all chunks via fd β€” by-path swap after open() cannot redirect writes. - for await (const chunk of entry.data) { - await handle.write(chunk as Uint8Array); - } - // fd-based chmod and utimes to avoid any by-path follow after write. - // On Windows these metadata updates are best-effort: some filesystems can - // reject them (for example with EPERM) even when the file contents were - // written successfully. - try { - await handle.chmod(fileMode); - } catch { - // Best-effort on Windows. - } - if (entry.mtime > 0) { - const mt = new Date(entry.mtime * 1000); - try { - await handle.utimes(mt, mt); - } catch { - // Best-effort on Windows. - } - } - } finally { - await handle.close(); - } - } + // between write and chmod/utimes. See `writeFileEntry` for the platform-specific + // POSIX (O_NOFOLLOW) and Windows ('wx' atomic-create + unlink+retry) strategies. + await writeFileEntry(target, entry, fileMode); } // Other entry types (CHARDEV, BLOCKDEV, FIFO, CONTIGUOUS) are skipped: // they require elevated privileges and have no portable cross-platform behavior. diff --git a/packages/tar-xz/src/tar/checksum.ts b/packages/tar-xz/src/tar/checksum.ts index b08bf9d..3e71812 100644 --- a/packages/tar-xz/src/tar/checksum.ts +++ b/packages/tar-xz/src/tar/checksum.ts @@ -29,7 +29,7 @@ export function calculateChecksum(header: Uint8Array): number { if (i >= CHECKSUM_OFFSET && i < CHECKSUM_OFFSET + CHECKSUM_LENGTH) { sum += 0x20; } else { - sum += header[i]!; + sum += header[i] ?? 0; } } @@ -85,11 +85,11 @@ export function parseOctal(header: Uint8Array, offset: number, length: number): for (let i = 0; i < length; i++) { const byte = header[offset + i]; - // Stop at null or space - if (byte === 0 || byte === 0x20) { + // Stop at null, space, or out-of-bounds (undefined) + if (byte === undefined || byte === 0 || byte === 0x20) { break; } - str += String.fromCharCode(byte!); + str += String.fromCharCode(byte); } if (str.length === 0) { diff --git a/packages/tar-xz/src/tar/format.ts b/packages/tar-xz/src/tar/format.ts index 54767c3..68b2e09 100644 --- a/packages/tar-xz/src/tar/format.ts +++ b/packages/tar-xz/src/tar/format.ts @@ -97,7 +97,9 @@ function writeString(header: Uint8Array, offset: number, length: number, value: const writeLen = Math.min(bytes.length, length); for (let i = 0; i < writeLen; i++) { - header[offset + i] = bytes[i]!; + const b = bytes[i]; + if (b === undefined) break; + header[offset + i] = b; } // Null-terminate if there's room @@ -122,8 +124,8 @@ function writeOctal(header: Uint8Array, offset: number, length: number, value: n * Check if a header block is empty (end of archive marker) */ export function isEmptyBlock(block: Uint8Array): boolean { - for (let i = 0; i < block.length; i++) { - if (block[i] !== 0) { + for (const byte of block) { + if (byte !== 0) { return false; } } @@ -165,7 +167,8 @@ export function parseHeader(header: Uint8Array): TarEntry | null { } // Parse type flag - const typeFlagChar = String.fromCharCode(header[OFFSETS.typeflag]!); + const typeFlag = header[OFFSETS.typeflag] ?? 0; + const typeFlagChar = String.fromCharCode(typeFlag); // Handle legacy type (null or empty = regular file) const type: TarEntryTypeValue = @@ -221,6 +224,25 @@ export interface CreateHeaderOptions { * @param options - Header options * @returns 512-byte header block */ + +/** + * Split a long TAR name into a prefix + name pair using the USTAR 155/100 fields. + * Throws if the name cannot be split to fit. + */ +function splitLongName(fullName: string): { prefix: string; name: string } { + const maxPrefix = LENGTHS.prefix; + const splitIdx = fullName.lastIndexOf('/', maxPrefix); + + if (splitIdx > 0 && fullName.length - splitIdx - 1 <= LENGTHS.name) { + return { + prefix: fullName.substring(0, splitIdx), + name: fullName.substring(splitIdx + 1), + }; + } + + throw new Error(`File name too long for TAR format: ${fullName} (use PAX for names > 255 chars)`); +} + export function createHeader(options: CreateHeaderOptions): Uint8Array { const header = new Uint8Array(BLOCK_SIZE); @@ -233,18 +255,7 @@ export function createHeader(options: CreateHeaderOptions): Uint8Array { // Handle long names using prefix field let prefix = ''; if (name.length > LENGTHS.name) { - // Find a good split point (at a path separator) - const maxPrefix = LENGTHS.prefix; - const splitIdx = name.lastIndexOf('/', maxPrefix); - - if (splitIdx > 0 && name.length - splitIdx - 1 <= LENGTHS.name) { - prefix = name.substring(0, splitIdx); - name = name.substring(splitIdx + 1); - } else { - throw new Error( - `File name too long for TAR format: ${options.name} (use PAX for names > 255 chars)` - ); - } + ({ prefix, name } = splitLongName(name)); } // Default mode based on type diff --git a/packages/tar-xz/test/coverage.spec.ts b/packages/tar-xz/test/coverage.spec.ts index 05a5fbb..897cff5 100644 --- a/packages/tar-xz/test/coverage.spec.ts +++ b/packages/tar-xz/test/coverage.spec.ts @@ -33,53 +33,72 @@ import { TarEntryType } from '../src/types.js'; // --------------------------------------------------------------------------- /** Build raw TAR buffer from entry descriptors */ -function buildTar( - entries: Array<{ + +/** Recalculate and write the USTAR checksum into bytes 148-155 of a 512-byte header. */ +function recalculateChecksum(header: Uint8Array): void { + let checksum = 0; + for (let i = 0; i < 512; i++) { + checksum += i >= 148 && i < 156 ? 0x20 : (header[i] ?? 0); + } + const checksumStr = `${checksum.toString(8).padStart(6, '0')}\x00 `; + for (let i = 0; i < 8; i++) header[148 + i] = checksumStr.charCodeAt(i); +} + +/** Append TAR blocks for a single entry descriptor into `blocks`. */ +function buildEntryBlocks( + entry: { name: string; content?: Buffer; type?: string; linkname?: string; usePax?: boolean; - }> -): Buffer { - const blocks: Buffer[] = []; - - for (const entry of entries) { - const content = entry.content ?? Buffer.alloc(0); - const type = (entry.type ?? TarEntryType.FILE) as string; - const isDir = type === TarEntryType.DIRECTORY; - const isLink = type === TarEntryType.SYMLINK || type === TarEntryType.HARDLINK; - const size = isDir || isLink ? 0 : content.length; - - let headerName = entry.name; - - if (entry.usePax || headerName.length > 255) { - const paxBlocks = createPaxHeaderBlocks(headerName, { - path: headerName, - size, - linkpath: entry.linkname, - }); - for (const block of paxBlocks) { - blocks.push(Buffer.from(block)); - } - headerName = headerName.slice(-99); - } - - const header = createHeader({ - name: headerName, + }, + blocks: Buffer[] +): void { + const content = entry.content ?? Buffer.alloc(0); + const type = (entry.type ?? TarEntryType.FILE) as string; + const isDir = type === TarEntryType.DIRECTORY; + const isLink = type === TarEntryType.SYMLINK || type === TarEntryType.HARDLINK; + const size = isDir || isLink ? 0 : content.length; + + let headerName = entry.name; + + if (entry.usePax || headerName.length > 255) { + const paxBlocks = createPaxHeaderBlocks(headerName, { + path: headerName, size, - type: type as '0', - linkname: entry.linkname, + linkpath: entry.linkname, }); - blocks.push(Buffer.from(header)); + for (const block of paxBlocks) blocks.push(Buffer.from(block)); + headerName = headerName.slice(-99); + } - if (size > 0) { - blocks.push(content); - const pad = calculatePadding(size); - if (pad > 0) blocks.push(Buffer.alloc(pad)); - } + const header = createHeader({ + name: headerName, + size, + type: type as '0', + linkname: entry.linkname, + }); + blocks.push(Buffer.from(header)); + + if (size > 0) { + blocks.push(content); + const pad = calculatePadding(size); + if (pad > 0) blocks.push(Buffer.alloc(pad)); } +} +function buildTar( + entries: Array<{ + name: string; + content?: Buffer; + type?: string; + linkname?: string; + usePax?: boolean; + }> +): Buffer { + const blocks: Buffer[] = []; + for (const entry of entries) buildEntryBlocks(entry, blocks); blocks.push(Buffer.from(createEndOfArchive())); return Buffer.concat(blocks); } @@ -911,14 +930,7 @@ describe('Coverage: Node API', () => { const header = createHeader({ name: 'safe.txt', size: content.length }); // Inject NUL at position 4 in the name field (offset 0) header[4] = 0x00; - // Recalculate checksum - let checksum = 0; - for (let i = 0; i < 512; i++) { - checksum += i >= 148 && i < 156 ? 0x20 : header[i]!; - } - // Write checksum in octal to bytes 148-155 - const checksumStr = checksum.toString(8).padStart(6, '0') + '\x00 '; - for (let i = 0; i < 8; i++) header[148 + i] = checksumStr.charCodeAt(i); + recalculateChecksum(header); const archive = path.join(tempDir, 'nul-name.tar.xz'); await saveAsXz( @@ -959,13 +971,7 @@ describe('Coverage: Node API', () => { }); // Inject NUL at offset 157 (linkname field starts at 157 in USTAR) header[158] = 0x00; - // Recalculate checksum - let checksum = 0; - for (let i = 0; i < 512; i++) { - checksum += i >= 148 && i < 156 ? 0x20 : header[i]!; - } - const checksumStr = checksum.toString(8).padStart(6, '0') + '\x00 '; - for (let i = 0; i < 8; i++) header[148 + i] = checksumStr.charCodeAt(i); + recalculateChecksum(header); const archive = path.join(tempDir, 'nul-linkname.tar.xz'); await saveAsXz( @@ -1037,15 +1043,9 @@ describe('Coverage: Node API', () => { // Manually patch the mode in the header to 0o4755 (setuid + rwxr-xr-x) const tarBuf = Buffer.from(tar); // Mode field is at offset 100, length 8 in USTAR - const modeStr = (0o4755).toString(8).padStart(7, '0') + '\x00'; + const modeStr = `${(0o4755).toString(8).padStart(7, '0')}\x00`; for (let i = 0; i < 8; i++) tarBuf[100 + i] = modeStr.charCodeAt(i); - // Recalculate checksum (field at offset 148, length 8) - let checksum = 0; - for (let i = 0; i < 512; i++) { - checksum += i >= 148 && i < 156 ? 0x20 : tarBuf[i]!; - } - const checksumStr = checksum.toString(8).padStart(6, '0') + '\x00 '; - for (let i = 0; i < 8; i++) tarBuf[148 + i] = checksumStr.charCodeAt(i); + recalculateChecksum(tarBuf); const archive = path.join(tempDir, 'setuid.tar.xz'); await saveAsXz(tarBuf, archive); diff --git a/packages/tar-xz/test/memory-shape.spec.ts b/packages/tar-xz/test/memory-shape.spec.ts index 52f2ab8..f30b420 100644 --- a/packages/tar-xz/test/memory-shape.spec.ts +++ b/packages/tar-xz/test/memory-shape.spec.ts @@ -94,7 +94,7 @@ describe('Memory shape gate (requires --expose-gc; skips otherwise)', () => { // preset: 6 matches the 16 MB slack rationale (XZ preset-6 dictionary β‰ˆ 8 MB) const archive = await buildArchive([{ name: 'big.bin', size: ENTRY_SIZE }], 6); - gc!(); + gc?.(); const sampler = makeSampler(); for await (const entry of extract(Readable.from([archive]))) { @@ -105,7 +105,7 @@ describe('Memory shape gate (requires --expose-gc; skips otherwise)', () => { sampler.sample(); } - gc!(); + gc?.(); sampler.sample(); const delta = sampler.peak() - sampler.baseline; @@ -142,7 +142,7 @@ describe('Memory shape gate (requires --expose-gc; skips otherwise)', () => { })) ); - gc!(); + gc?.(); const sampler = makeSampler(); let count = 0; @@ -153,7 +153,7 @@ describe('Memory shape gate (requires --expose-gc; skips otherwise)', () => { expect(entry.size).toBe(ENTRY_SIZE); } - gc!(); + gc?.(); sampler.sample(); expect(count).toBe(ENTRY_COUNT); @@ -190,7 +190,7 @@ describe('Memory shape gate (requires --expose-gc; skips otherwise)', () => { })) ); - gc!(); + gc?.(); const sampler = makeSampler(); let totalBytes = 0; @@ -203,7 +203,7 @@ describe('Memory shape gate (requires --expose-gc; skips otherwise)', () => { sampler.sample(); } - gc!(); + gc?.(); sampler.sample(); expect(totalBytes).toBe(ENTRY_COUNT * LARGEST_ENTRY); diff --git a/packages/tar-xz/test/node-api.spec.ts b/packages/tar-xz/test/node-api.spec.ts index 062a6af..2ede672 100644 --- a/packages/tar-xz/test/node-api.spec.ts +++ b/packages/tar-xz/test/node-api.spec.ts @@ -378,7 +378,7 @@ describe('Node.js file API (v6)', () => { expect(entries).toHaveLength(1); expect(entries[0]?.name).toBe('hello.txt'); - expect(Buffer.from(entries[0]!.content).toString('utf-8')).toBe('hello world'); + expect(Buffer.from(entries[0]?.content).toString('utf-8')).toBe('hello world'); }); }); }); diff --git a/packages/tar-xz/test/security.spec.ts b/packages/tar-xz/test/security.spec.ts index eed552c..1e9b04a 100644 --- a/packages/tar-xz/test/security.spec.ts +++ b/packages/tar-xz/test/security.spec.ts @@ -28,12 +28,7 @@ import { xzSync } from 'node-liblzma'; import { afterEach, beforeEach, describe, expect, it } from 'vitest'; import { extractFile } from '../src/node/file.js'; import { extract } from '../src/node/index.js'; -import { - BLOCK_SIZE, - calculatePadding, - createEndOfArchive, - createHeader, -} from '../src/tar/format.js'; +import { calculatePadding, createEndOfArchive, createHeader } from '../src/tar/format.js'; import { createPaxHeaderBlocks } from '../src/tar/pax.js'; import { TarEntryType } from '../src/types.js'; @@ -43,55 +38,75 @@ import { TarEntryType } from '../src/types.js'; // --------------------------------------------------------------------------- /** Build a raw TAR buffer from entry descriptors. */ -function buildTar( - entries: Array<{ + +/** Recalculate and write the USTAR checksum into bytes 148-155 of a 512-byte header. */ +function recalculateChecksum(header: Uint8Array): void { + let checksum = 0; + for (let i = 0; i < 512; i++) { + checksum += i >= 148 && i < 156 ? 0x20 : (header[i] ?? 0); + } + const checksumStr = `${checksum.toString(8).padStart(6, '0')}\x00 `; + for (let i = 0; i < 8; i++) header[148 + i] = checksumStr.charCodeAt(i); +} + +/** Append TAR blocks for a single entry descriptor into `blocks`. */ +function buildEntryBlocks( + entry: { name: string; content?: Buffer; type?: string; linkname?: string; usePax?: boolean; mode?: number; - }> -): Buffer { - const blocks: Buffer[] = []; - - for (const entry of entries) { - const content = entry.content ?? Buffer.alloc(0); - const type = (entry.type ?? TarEntryType.FILE) as string; - const isDir = type === TarEntryType.DIRECTORY; - const isLink = type === TarEntryType.SYMLINK || type === TarEntryType.HARDLINK; - const size = isDir || isLink ? 0 : content.length; - - let headerName = entry.name; - - if (entry.usePax || headerName.length > 255) { - const paxBlocks = createPaxHeaderBlocks(headerName, { - path: headerName, - size, - linkpath: entry.linkname, - }); - for (const block of paxBlocks) { - blocks.push(Buffer.from(block)); - } - headerName = headerName.slice(-99); - } - - const header = createHeader({ - name: headerName, + }, + blocks: Buffer[] +): void { + const content = entry.content ?? Buffer.alloc(0); + const type = (entry.type ?? TarEntryType.FILE) as string; + const isDir = type === TarEntryType.DIRECTORY; + const isLink = type === TarEntryType.SYMLINK || type === TarEntryType.HARDLINK; + const size = isDir || isLink ? 0 : content.length; + + let headerName = entry.name; + + if (entry.usePax || headerName.length > 255) { + const paxBlocks = createPaxHeaderBlocks(headerName, { + path: headerName, size, - type: type as '0', - linkname: entry.linkname, - mode: entry.mode, + linkpath: entry.linkname, }); - blocks.push(Buffer.from(header)); + for (const block of paxBlocks) blocks.push(Buffer.from(block)); + headerName = headerName.slice(-99); + } + + const header = createHeader({ + name: headerName, + size, + type: type as '0', + linkname: entry.linkname, + mode: entry.mode, + }); + blocks.push(Buffer.from(header)); - if (size > 0) { - blocks.push(content); - const pad = calculatePadding(size); - if (pad > 0) blocks.push(Buffer.alloc(pad)); - } + if (size > 0) { + blocks.push(content); + const pad = calculatePadding(size); + if (pad > 0) blocks.push(Buffer.alloc(pad)); } +} +function buildTar( + entries: Array<{ + name: string; + content?: Buffer; + type?: string; + linkname?: string; + usePax?: boolean; + mode?: number; + }> +): Buffer { + const blocks: Buffer[] = []; + for (const entry of entries) buildEntryBlocks(entry, blocks); blocks.push(Buffer.from(createEndOfArchive())); return Buffer.concat(blocks); } @@ -455,13 +470,7 @@ describe('Security regression gate', () => { }); // Inject NUL at position 158 β€” linkname field starts at 157 header[158] = 0x00; - // Recalculate checksum - let checksum = 0; - for (let i = 0; i < 512; i++) { - checksum += i >= 148 && i < 156 ? 0x20 : header[i]!; - } - const checksumStr = checksum.toString(8).padStart(6, '0') + '\x00 '; - for (let i = 0; i < 8; i++) header[148 + i] = checksumStr.charCodeAt(i); + recalculateChecksum(header); const archive = path.join(tempDir, 'nul-linkname.tar.xz'); await saveAsXz( @@ -491,13 +500,7 @@ describe('Security regression gate', () => { const content = Buffer.from('evil'); const header = createHeader({ name: 'safe.txt', size: content.length }); header[4] = 0x00; - // Recalculate checksum - let checksum = 0; - for (let i = 0; i < 512; i++) { - checksum += i >= 148 && i < 156 ? 0x20 : header[i]!; - } - const checksumStr = checksum.toString(8).padStart(6, '0') + '\x00 '; - for (let i = 0; i < 8; i++) header[148 + i] = checksumStr.charCodeAt(i); + recalculateChecksum(header); const archive = path.join(tempDir, 'nul-name.tar.xz'); await saveAsXz( @@ -538,15 +541,9 @@ describe('Security regression gate', () => { const tar = buildTar([{ name: 'x', content: Buffer.from('data') }]); const tarBuf = Buffer.from(tar); // Patch mode field at offset 100 to 0o4755 (setuid + rwxr-xr-x) - const modeStr = (0o4755).toString(8).padStart(7, '0') + '\x00'; + const modeStr = `${(0o4755).toString(8).padStart(7, '0')}\x00`; for (let i = 0; i < 8; i++) tarBuf[100 + i] = modeStr.charCodeAt(i); - // Recalculate checksum - let checksum = 0; - for (let i = 0; i < 512; i++) { - checksum += i >= 148 && i < 156 ? 0x20 : tarBuf[i]!; - } - const checksumStr = checksum.toString(8).padStart(6, '0') + '\x00 '; - for (let i = 0; i < 8; i++) tarBuf[148 + i] = checksumStr.charCodeAt(i); + recalculateChecksum(tarBuf); const archive = path.join(tempDir, 'setuid.tar.xz'); await saveAsXz(tarBuf, archive); @@ -687,8 +684,8 @@ describe('Security regression gate', () => { // the guard fires first β€” both are correct rejection outcomes) expect(caught).not.toBeNull(); const isExpectedError = - caught!.message.includes('Unexpected end') || - caught!.message.toLowerCase().includes('pax') || + caught?.message.includes('Unexpected end') || + caught?.message.toLowerCase().includes('pax') || (caught as Error & { code?: string }).code === 'TAR_PARSER_INVARIANT'; expect(isExpectedError).toBe(true); @@ -716,7 +713,7 @@ describe('Security regression gate', () => { } expect(caught).not.toBeNull(); - expect(caught!.message).toMatch(/PAX|header exceeds/i); + expect(caught?.message).toMatch(/PAX|header exceeds/i); expect((caught as Error & { code?: string }).code).toBe('TAR_PARSER_INVARIANT'); }); }); diff --git a/packages/tar-xz/test/tar-parser-stream.spec.ts b/packages/tar-xz/test/tar-parser-stream.spec.ts index 6815f88..04ceea5 100644 --- a/packages/tar-xz/test/tar-parser-stream.spec.ts +++ b/packages/tar-xz/test/tar-parser-stream.spec.ts @@ -14,8 +14,73 @@ import { TarEntryType } from '../src/types.js'; // Helpers // --------------------------------------------------------------------------- -// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: test helper covering PAX/regular/global entries /** Build a raw TAR buffer with end-of-archive blocks. */ + +/** Append a PAX_GLOBAL block for the given attributes into `blocks`. */ +function buildGlobalPaxBlock(attrs: Record, blocks: Buffer[]): void { + const paxData = Object.entries(attrs) + .map(([k, v]) => { + const line = ` ${k}=${v}\n`; + const len = String(line.length + 1).length + line.length; + return `${len}${line}`; + }) + .join(''); + const paxBuf = Buffer.from(paxData); + const paxPad = calculatePadding(paxBuf.length); + const gHeader = createHeader({ + name: '././@PaxHeader', + size: paxBuf.length, + type: 'g' as '0', + }); + blocks.push(Buffer.from(gHeader)); + blocks.push(paxBuf); + if (paxPad > 0) blocks.push(Buffer.alloc(paxPad)); +} + +/** Append TAR blocks for a single regular or PAX entry into `blocks`. */ +function buildEntryBlocks( + entry: { + name: string; + content?: Buffer; + type?: string; + linkname?: string; + usePax?: boolean; + }, + blocks: Buffer[] +): void { + const content = entry.content ?? Buffer.alloc(0); + const type = (entry.type ?? TarEntryType.FILE) as string; + const isDir = type === TarEntryType.DIRECTORY; + const isLink = type === TarEntryType.SYMLINK || type === TarEntryType.HARDLINK; + const size = isDir || isLink ? 0 : content.length; + + let headerName = entry.name; + + if (entry.usePax || headerName.length > 99) { + const paxBlocks = createPaxHeaderBlocks(headerName, { + path: headerName, + size, + linkpath: entry.linkname, + }); + for (const block of paxBlocks) blocks.push(Buffer.from(block)); + headerName = headerName.slice(-99); + } + + const header = createHeader({ + name: headerName, + size, + type: type as '0', + linkname: entry.linkname, + }); + blocks.push(Buffer.from(header)); + + if (size > 0) { + blocks.push(content); + const pad = calculatePadding(size); + if (pad > 0) blocks.push(Buffer.alloc(pad)); + } +} + function buildTar( entries: Array<{ name: string; @@ -28,66 +93,13 @@ function buildTar( }> ): Buffer { const blocks: Buffer[] = []; - for (const entry of entries) { - const content = entry.content ?? Buffer.alloc(0); - const type = (entry.type ?? TarEntryType.FILE) as string; - const isDir = type === TarEntryType.DIRECTORY; - const isLink = type === TarEntryType.SYMLINK || type === TarEntryType.HARDLINK; - const size = isDir || isLink ? 0 : content.length; - if (entry.globalPax && entry.globalAttrs) { - // Build a PAX_GLOBAL block manually. - const attrs = entry.globalAttrs; - const paxData = Object.entries(attrs) - .map(([k, v]) => { - const line = ` ${k}=${v}\n`; - const len = String(line.length + 1).length + line.length; - return `${len}${line}`; - }) - .join(''); - const paxBuf = Buffer.from(paxData); - const paxPad = calculatePadding(paxBuf.length); - const gHeader = createHeader({ - name: '././@PaxHeader', - size: paxBuf.length, - type: 'g' as '0', - }); - blocks.push(Buffer.from(gHeader)); - blocks.push(paxBuf); - if (paxPad > 0) blocks.push(Buffer.alloc(paxPad)); - continue; - } - - let headerName = entry.name; - - if (entry.usePax || headerName.length > 99) { - const paxBlocks = createPaxHeaderBlocks(headerName, { - path: headerName, - size, - linkpath: entry.linkname, - }); - for (const block of paxBlocks) { - blocks.push(Buffer.from(block)); - } - headerName = headerName.slice(-99); - } - - const header = createHeader({ - name: headerName, - size, - type: type as '0', - linkname: entry.linkname, - }); - blocks.push(Buffer.from(header)); - - if (size > 0) { - blocks.push(content); - const pad = calculatePadding(size); - if (pad > 0) blocks.push(Buffer.alloc(pad)); + buildGlobalPaxBlock(entry.globalAttrs, blocks); + } else { + buildEntryBlocks(entry, blocks); } } - blocks.push(Buffer.from(createEndOfArchive())); return Buffer.concat(blocks); } diff --git a/packages/tar-xz/test/xz-helpers.spec.ts b/packages/tar-xz/test/xz-helpers.spec.ts index eaf8b30..28cc4fb 100644 --- a/packages/tar-xz/test/xz-helpers.spec.ts +++ b/packages/tar-xz/test/xz-helpers.spec.ts @@ -191,9 +191,9 @@ describe('streamXz', () => { try { // Only consume first chunk β€” break immediately - let count = 0; + let _count = 0; for await (const _chunk of streamXz(tarXz)) { - count++; + _count++; break; // consumer breaks early } // Allow a microtask turn for any unhandled rejection to surface diff --git a/src/errors.ts b/src/errors.ts index 0f79ad5..fe9cb50 100644 --- a/src/errors.ts +++ b/src/errors.ts @@ -173,5 +173,7 @@ function getErrorMessage(errno: number): string { if (errno < 0 || errno >= messages.length) { return `Unknown LZMA error code: ${errno}`; } - return messages[errno]!; + const msg = messages[errno]; + if (msg === undefined) return `Unknown LZMA error code: ${errno}`; + return msg; } diff --git a/src/lzma.ts b/src/lzma.ts index 63096dd..ee7a4cc 100644 --- a/src/lzma.ts +++ b/src/lzma.ts @@ -61,6 +61,7 @@ export { }; // Re-export pool for concurrency control +// biome-ignore lint/suspicious/noImportCycles: intentional ESM cycle resolved at runtime; LZMAPool re-exports from lzma.ts to keep public surface flat export { LZMAPool, type PoolMetrics } from './pool.js'; // F-009: Type for internal Node.js stream state (no public API equivalent for _writableState.ended) diff --git a/src/pool.ts b/src/pool.ts index cae6e73..0372c8e 100644 --- a/src/pool.ts +++ b/src/pool.ts @@ -17,6 +17,7 @@ */ import { EventEmitter } from 'node:events'; +// biome-ignore lint/suspicious/noImportCycles: intentional ESM cycle resolved at runtime; LZMAPool re-exports from lzma.ts to keep public surface flat import { type LZMAOptions, unxzAsync, xzAsync } from './lzma.js'; /** @@ -163,7 +164,14 @@ export class LZMAPool extends EventEmitter { return; } - const item = this.queue.shift()!; + const item = this.queue.shift(); + if (item === undefined) { + // The `this.queue.length === 0` early-return guard above means shift() + // must return a value here. Returning silently would let queued tasks + // stall on a future invariant breach (re-entrancy, refactor, etc.). + // Throw so the bug surfaces at the breach instead of as a hung pool. + throw new Error('Invariant violation: queue was non-empty but shift() returned undefined'); + } this.metrics.active++; this.metrics.queued = this.queue.length; diff --git a/test/wasm/decompress-memlimit.test.ts b/test/wasm/decompress-memlimit.test.ts index b11f513..1e072f6 100644 --- a/test/wasm/decompress-memlimit.test.ts +++ b/test/wasm/decompress-memlimit.test.ts @@ -27,7 +27,7 @@ import { loadWasmModule, unloadWasmModule } from './wasm-helpers.utils.js'; * lzma_stream_buffer_decode regardless of the exact stream content. */ async function makeFixture(): Promise<{ original: Uint8Array; compressed: Uint8Array }> { - const original = new TextEncoder().encode('memlimit fixture: ' + 'x'.repeat(512)); + const original = new TextEncoder().encode(`memlimit fixture: ${'x'.repeat(512)}`); const compressed = await xzAsync(original, { preset: 6 }); return { original, compressed }; } @@ -138,7 +138,8 @@ describe('WASM unxzAsync β€” memlimit option', () => { expect(err).toBeNull(); expect(result).toBeDefined(); // Verify decompressed bytes equal the original, not merely that something was returned. - expect(Array.from(result!)).toEqual(Array.from(original)); + if (result === undefined) throw new Error('result is undefined β€” test setup failed'); + expect(Array.from(result)).toEqual(Array.from(original)); resolve(); } catch (e) { reject(e);