11import { Buffer , isUtf8 } from 'buffer'
2+ import type { Readable } from 'stream'
23import { createLogger } from '@sim/logger'
34import { getErrorMessage } from '@sim/utils/errors'
45import { generateShortId } from '@sim/utils/id'
@@ -197,13 +198,68 @@ const MAX_DECOMPRESS_TOTAL_BYTES = 200 * 1024 * 1024
197198const S_IFMT = 0o170000
198199const S_IFLNK = 0o120000
199200
200- /** Read a zip entry's declared uncompressed size without materializing it (zip-bomb pre-check). */
201+ /**
202+ * Read a zip entry's declared uncompressed size without materializing it. This
203+ * value comes straight from the (attacker-controlled) ZIP metadata, so it is only
204+ * usable as a cheap fast-reject for honestly-declared archives — never as the
205+ * authoritative cap. {@link inflateEntryWithinCaps} enforces the real limit on the
206+ * inflated byte stream.
207+ */
201208const readEntryUncompressedSize = ( entry : JSZip . JSZipObject ) : number | undefined => {
202209 const data = ( entry as JSZip . JSZipObject & { _data ?: { uncompressedSize ?: number } } ) . _data
203210 const size = data ?. uncompressedSize
204211 return typeof size === 'number' && Number . isFinite ( size ) ? size : undefined
205212}
206213
214+ type InflateResult = { ok : true ; buffer : Buffer } | { ok : false ; reason : 'entry' | 'total' }
215+
216+ /**
217+ * Inflate a single zip entry through a streaming counting sink, tearing the
218+ * stream down the moment cumulative output would exceed the per-entry cap or the
219+ * remaining total budget. The declared uncompressed size in the ZIP header is
220+ * attacker-controlled and is NOT trusted here: a forged-small or absent size
221+ * cannot cause the full (potentially gigabyte-scale) entry to be materialized in
222+ * memory, because enforcement happens on the actual inflated bytes as they
223+ * arrive. Peak memory is bounded by the cap plus one DEFLATE chunk.
224+ */
225+ const inflateEntryWithinCaps = (
226+ entry : JSZip . JSZipObject ,
227+ remainingTotalBudget : number
228+ ) : Promise < InflateResult > =>
229+ new Promise ( ( resolve , reject ) => {
230+ const chunks : Buffer [ ] = [ ]
231+ let size = 0
232+ let settled = false
233+ const stream = entry . nodeStream ( ) as Readable
234+
235+ const settle = ( result : InflateResult ) => {
236+ if ( settled ) return
237+ settled = true
238+ stream . destroy ( )
239+ resolve ( result )
240+ }
241+
242+ stream . on ( 'data' , ( chunk : Buffer ) => {
243+ size += chunk . length
244+ if ( size > MAX_DECOMPRESS_ENTRY_BYTES ) {
245+ settle ( { ok : false , reason : 'entry' } )
246+ return
247+ }
248+ if ( size > remainingTotalBudget ) {
249+ settle ( { ok : false , reason : 'total' } )
250+ return
251+ }
252+ chunks . push ( chunk )
253+ } )
254+ stream . on ( 'end' , ( ) => settle ( { ok : true , buffer : Buffer . concat ( chunks , size ) } ) )
255+ stream . on ( 'error' , ( error ) => {
256+ if ( settled ) return
257+ settled = true
258+ stream . destroy ( )
259+ reject ( error )
260+ } )
261+ } )
262+
207263/** True when a zip entry's unix mode marks it as a symlink (never extracted). */
208264const isSymlinkEntry = ( entry : JSZip . JSZipObject ) : boolean => {
209265 const mode = ( entry as JSZip . JSZipObject & { unixPermissions ?: number | null } ) . unixPermissions
@@ -770,8 +826,6 @@ export const POST = withRouteHandler(async (request: NextRequest) => {
770826 safeEntries . push ( { entry, segments } )
771827 }
772828
773- // Reject standard zip bombs up front using the declared uncompressed sizes,
774- // before materializing any entry into memory.
775829 let declaredTotal = 0
776830 for ( const { entry } of safeEntries ) {
777831 const declaredSize = readEntryUncompressedSize ( entry )
@@ -781,19 +835,20 @@ export const POST = withRouteHandler(async (request: NextRequest) => {
781835 if ( declaredTotal > MAX_DECOMPRESS_TOTAL_BYTES ) return totalTooLargeResponse ( )
782836 }
783837
784- // Read and validate every safe entry before writing anything, so a cap
785- // breach never leaves partially-extracted files behind in the workspace.
786838 const pending : Array < { segments : string [ ] ; buffer : Buffer } > = [ ]
787839 let totalBytes = 0
788840 for ( const { entry, segments } of safeEntries ) {
789- const buffer = await entry . async ( 'nodebuffer' )
790- // Enforce the per-entry cap on the materialized size too, covering
791- // entries that omit a declared uncompressed size.
792- if ( buffer . length > MAX_DECOMPRESS_ENTRY_BYTES ) return entryTooLargeResponse ( entry . name )
793- totalBytes += buffer . length
794- if ( totalBytes > MAX_DECOMPRESS_TOTAL_BYTES ) return totalTooLargeResponse ( )
795-
796- pending . push ( { segments, buffer } )
841+ const result = await inflateEntryWithinCaps (
842+ entry ,
843+ MAX_DECOMPRESS_TOTAL_BYTES - totalBytes
844+ )
845+ if ( ! result . ok ) {
846+ return result . reason === 'entry'
847+ ? entryTooLargeResponse ( entry . name )
848+ : totalTooLargeResponse ( )
849+ }
850+ totalBytes += result . buffer . length
851+ pending . push ( { segments, buffer : result . buffer } )
797852 }
798853
799854 if ( pending . length === 0 ) {
0 commit comments