11import { Buffer , isUtf8 } from 'buffer'
2+ import type { Readable } from 'stream'
23import { createLogger } from '@sim/logger'
34import { getErrorMessage } from '@sim/utils/errors'
45import { generateShortId } from '@sim/utils/id'
@@ -197,13 +198,67 @@ const MAX_DECOMPRESS_TOTAL_BYTES = 200 * 1024 * 1024
197198const S_IFMT = 0o170000
198199const S_IFLNK = 0o120000
199200
200- /** Read a zip entry's declared uncompressed size without materializing it (zip-bomb pre-check). */
201+ /**
202+ * Read a zip entry's declared uncompressed size without materializing it. This
203+ * value comes straight from the (attacker-controlled) ZIP metadata, so it is only
204+ * usable as a cheap fast-reject for honestly-declared archives — never as the
205+ * authoritative cap. {@link inflateEntryWithinCaps} enforces the real limit on the
206+ * inflated byte stream.
207+ */
201208const readEntryUncompressedSize = ( entry : JSZip . JSZipObject ) : number | undefined => {
202209 const data = ( entry as JSZip . JSZipObject & { _data ?: { uncompressedSize ?: number } } ) . _data
203210 const size = data ?. uncompressedSize
204211 return typeof size === 'number' && Number . isFinite ( size ) ? size : undefined
205212}
206213
214+ type InflateResult = { ok : true ; buffer : Buffer } | { ok : false ; reason : 'entry' | 'total' }
215+
216+ /**
217+ * Inflate a single zip entry through a streaming counting sink, tearing the
218+ * stream down the moment cumulative output would exceed the per-entry cap or the
219+ * remaining total budget. The declared uncompressed size in the ZIP header is
220+ * attacker-controlled and is NOT trusted here: a forged-small or absent size
221+ * cannot cause the full (potentially gigabyte-scale) entry to be materialized in
222+ * memory, because enforcement happens on the actual inflated bytes as they
223+ * arrive. Peak memory is bounded by the cap plus one DEFLATE chunk.
224+ */
225+ const inflateEntryWithinCaps = (
226+ entry : JSZip . JSZipObject ,
227+ remainingTotalBudget : number
228+ ) : Promise < InflateResult > =>
229+ new Promise ( ( resolve , reject ) => {
230+ const chunks : Buffer [ ] = [ ]
231+ let size = 0
232+ let settled = false
233+ const stream = entry . nodeStream ( ) as Readable
234+
235+ const settle = ( result : InflateResult ) => {
236+ if ( settled ) return
237+ settled = true
238+ stream . destroy ( )
239+ resolve ( result )
240+ }
241+
242+ stream . on ( 'data' , ( chunk : Buffer ) => {
243+ size += chunk . length
244+ if ( size > MAX_DECOMPRESS_ENTRY_BYTES ) {
245+ settle ( { ok : false , reason : 'entry' } )
246+ return
247+ }
248+ if ( size > remainingTotalBudget ) {
249+ settle ( { ok : false , reason : 'total' } )
250+ return
251+ }
252+ chunks . push ( chunk )
253+ } )
254+ stream . on ( 'end' , ( ) => settle ( { ok : true , buffer : Buffer . concat ( chunks , size ) } ) )
255+ stream . on ( 'error' , ( error ) => {
256+ if ( settled ) return
257+ settled = true
258+ reject ( error )
259+ } )
260+ } )
261+
207262/** True when a zip entry's unix mode marks it as a symlink (never extracted). */
208263const isSymlinkEntry = ( entry : JSZip . JSZipObject ) : boolean => {
209264 const mode = ( entry as JSZip . JSZipObject & { unixPermissions ?: number | null } ) . unixPermissions
@@ -770,8 +825,6 @@ export const POST = withRouteHandler(async (request: NextRequest) => {
770825 safeEntries . push ( { entry, segments } )
771826 }
772827
773- // Reject standard zip bombs up front using the declared uncompressed sizes,
774- // before materializing any entry into memory.
775828 let declaredTotal = 0
776829 for ( const { entry } of safeEntries ) {
777830 const declaredSize = readEntryUncompressedSize ( entry )
@@ -781,19 +834,20 @@ export const POST = withRouteHandler(async (request: NextRequest) => {
781834 if ( declaredTotal > MAX_DECOMPRESS_TOTAL_BYTES ) return totalTooLargeResponse ( )
782835 }
783836
784- // Read and validate every safe entry before writing anything, so a cap
785- // breach never leaves partially-extracted files behind in the workspace.
786837 const pending : Array < { segments : string [ ] ; buffer : Buffer } > = [ ]
787838 let totalBytes = 0
788839 for ( const { entry, segments } of safeEntries ) {
789- const buffer = await entry . async ( 'nodebuffer' )
790- // Enforce the per-entry cap on the materialized size too, covering
791- // entries that omit a declared uncompressed size.
792- if ( buffer . length > MAX_DECOMPRESS_ENTRY_BYTES ) return entryTooLargeResponse ( entry . name )
793- totalBytes += buffer . length
794- if ( totalBytes > MAX_DECOMPRESS_TOTAL_BYTES ) return totalTooLargeResponse ( )
795-
796- pending . push ( { segments, buffer } )
840+ const result = await inflateEntryWithinCaps (
841+ entry ,
842+ MAX_DECOMPRESS_TOTAL_BYTES - totalBytes
843+ )
844+ if ( ! result . ok ) {
845+ return result . reason === 'entry'
846+ ? entryTooLargeResponse ( entry . name )
847+ : totalTooLargeResponse ( )
848+ }
849+ totalBytes += result . buffer . length
850+ pending . push ( { segments, buffer : result . buffer } )
797851 }
798852
799853 if ( pending . length === 0 ) {
0 commit comments