66 * @module utils/headerExtractor
77 */
88
9- /** Max bytes to read from file for header extraction. */
10- const MAX_HEADER_READ = 4 * 1024 * 1024 ;
9+ import { MAX_FILE_SIZE } from './helpers.js' ;
10+
11+ /**
12+ * Concatenate an array of Uint8Array chunks into a single Uint8Array.
13+ * @param {Uint8Array[] } chunks
14+ * @returns {Uint8Array }
15+ */
16+ function concatChunks ( chunks ) {
17+ const totalLength = chunks . reduce ( ( sum , c ) => sum + c . length , 0 ) ;
18+ const result = new Uint8Array ( totalLength ) ;
19+ let offset = 0 ;
20+ for ( const chunk of chunks ) {
21+ result . set ( chunk , offset ) ;
22+ offset += chunk . length ;
23+ }
24+ return result ;
25+ }
1126
1227/**
1328 * Read a slice of a File as an ArrayBuffer.
@@ -46,14 +61,7 @@ async function decompressGzipBlock(block) {
4661 chunks . push ( value ) ;
4762 }
4863
49- const totalLength = chunks . reduce ( ( sum , c ) => sum + c . length , 0 ) ;
50- const result = new Uint8Array ( totalLength ) ;
51- let offset = 0 ;
52- for ( const chunk of chunks ) {
53- result . set ( chunk , offset ) ;
54- offset += chunk . length ;
55- }
56- return result ;
64+ return concatChunks ( chunks ) ;
5765 } catch ( err ) {
5866 reader . cancel ( ) . catch ( ( ) => { } ) ;
5967 writer . abort ( err ) . catch ( ( ) => { } ) ;
@@ -62,13 +70,16 @@ async function decompressGzipBlock(block) {
6270}
6371
6472/**
65- * Decompress consecutive BGZF blocks from raw bytes.
73+ * Decompress consecutive BGZF blocks, stopping as soon as we have
74+ * enough uncompressed data for the BAM header.
6675 * @param {Uint8Array } bytes - Raw BGZF data
76+ * @param {number } [neededBytes=0] - Stop after accumulating this many uncompressed bytes (0 = all)
6777 * @returns {Promise<Uint8Array> } Concatenated decompressed data
6878 */
69- async function decompressBgzfBlocks ( bytes ) {
79+ async function decompressBgzfBlocks ( bytes , neededBytes = 0 ) {
7080 const chunks = [ ] ;
7181 let offset = 0 ;
82+ let accumulated = 0 ;
7283
7384 while ( offset < bytes . length ) {
7485 // Check for gzip magic
@@ -105,21 +116,18 @@ async function decompressBgzfBlocks(bytes) {
105116 const decompressed = await decompressGzipBlock ( block ) ;
106117 if ( decompressed . length === 0 ) break ; // EOF block
107118 chunks . push ( decompressed ) ;
119+ accumulated += decompressed . length ;
108120 } catch {
109121 break ;
110122 }
111123
112124 offset += bsize ;
113- }
114125
115- const totalLength = chunks . reduce ( ( sum , c ) => sum + c . length , 0 ) ;
116- const result = new Uint8Array ( totalLength ) ;
117- let writeOffset = 0 ;
118- for ( const chunk of chunks ) {
119- result . set ( chunk , writeOffset ) ;
120- writeOffset += chunk . length ;
126+ // Stop early once we have enough data for the header
127+ if ( neededBytes > 0 && accumulated >= neededBytes ) break ;
121128 }
122- return result ;
129+
130+ return concatChunks ( chunks ) ;
123131}
124132
125133/**
@@ -129,12 +137,12 @@ async function decompressBgzfBlocks(bytes) {
129137 * @throws {Error } If the file is not a valid BAM or extraction fails
130138 */
131139export async function extractBamHeader ( file ) {
132- const readSize = Math . min ( file . size , MAX_HEADER_READ ) ;
140+ const readSize = Math . min ( file . size , MAX_FILE_SIZE ) ;
133141 const buffer = await readFileSlice ( file , 0 , readSize ) ;
134142 const bytes = new Uint8Array ( buffer ) ;
135143
136- const uncompressed = await decompressBgzfBlocks ( bytes ) ;
137- const view = new DataView ( uncompressed . buffer ) ;
144+ // First pass: decompress enough to read the l_text field (8 bytes minimum)
145+ let uncompressed = await decompressBgzfBlocks ( bytes , 8 ) ;
138146
139147 // Verify BAM magic: "BAM\1"
140148 if ( uncompressed . length < 8 ||
@@ -143,9 +151,19 @@ export async function extractBamHeader(file) {
143151 throw new Error ( 'Not a valid BAM file (bad magic bytes)' ) ;
144152 }
145153
154+ const view = new DataView ( uncompressed . buffer ) ;
146155 const headerLength = view . getInt32 ( 4 , true ) ;
147- if ( headerLength < 0 || headerLength > uncompressed . length - 8 ) {
148- throw new Error ( 'BAM header length exceeds available data' ) ;
156+ if ( headerLength < 0 ) {
157+ throw new Error ( 'BAM header length is negative' ) ;
158+ }
159+
160+ // If we don't have enough data yet, decompress more blocks
161+ const needed = 8 + headerLength ;
162+ if ( uncompressed . length < needed ) {
163+ uncompressed = await decompressBgzfBlocks ( bytes , needed ) ;
164+ if ( uncompressed . length < needed ) {
165+ throw new Error ( 'BAM header length exceeds available data' ) ;
166+ }
149167 }
150168
151169 const decoder = new TextDecoder ( 'ascii' ) ;
@@ -162,12 +180,3 @@ export async function extractBamHeader(file) {
162180export function isBamFile ( filename ) {
163181 return filename . toLowerCase ( ) . endsWith ( '.bam' ) ;
164182}
165-
166- /**
167- * Check if a filename indicates a CRAM file.
168- * @param {string } filename
169- * @returns {boolean }
170- */
171- export function isCramFile ( filename ) {
172- return filename . toLowerCase ( ) . endsWith ( '.cram' ) ;
173- }
0 commit comments