Skip to content

Commit a2cd2a5

Browse files
committed
refactor: simplify headerExtractor, fix review findings
- Extract concatChunks helper to deduplicate chunk concatenation - Import MAX_FILE_SIZE from helpers.js instead of redefining locally - Remove unused isCramFile export (not wired up yet) - Use tabManager.setCurrentFile() instead of direct property mutation - Early-stop BGZF decompression once enough bytes for header are available - Propagate underlying parse errors instead of suppressing them
1 parent 9fd36eb commit a2cd2a5

3 files changed

Lines changed: 50 additions & 42 deletions

File tree

src/web/format_detection.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -402,19 +402,19 @@ pub fn parse_binary_file(
402402
match format {
403403
FileFormat::Bam => {
404404
let cursor = std::io::Cursor::new(file_content);
405-
crate::parsing::sam::parse_bam_from_reader(cursor).map_err(|_e| {
405+
crate::parsing::sam::parse_bam_from_reader(cursor).map_err(|e| {
406406
ParseError::ParseFailed {
407407
format,
408-
message: "BAM file parsing failed".to_string(),
408+
message: format!("BAM file parsing failed: {e}"),
409409
}
410410
})
411411
}
412412
FileFormat::Cram => {
413413
let cursor = std::io::Cursor::new(file_content);
414-
crate::parsing::sam::parse_cram_from_reader(cursor).map_err(|_e| {
414+
crate::parsing::sam::parse_cram_from_reader(cursor).map_err(|e| {
415415
ParseError::ParseFailed {
416416
format,
417-
message: "CRAM file parsing failed".to_string(),
417+
message: format!("CRAM file parsing failed: {e}"),
418418
}
419419
})
420420
}
@@ -432,9 +432,9 @@ pub fn parse_binary_file(
432432
temp_file.write_all(file_content).map_err(ParseError::Io)?;
433433

434434
let result = crate::parsing::fasta::parse_fasta_file(temp_file.path());
435-
result.map_err(|_e| ParseError::ParseFailed {
435+
result.map_err(|e| ParseError::ParseFailed {
436436
format,
437-
message: "FASTA file parsing failed".to_string(),
437+
message: format!("FASTA file parsing failed: {e}"),
438438
})
439439
}
440440
_ => Err(ParseError::ParseFailed {

src/web/static/js/main.js

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -241,8 +241,7 @@ async function handleFileUpload(input, format) {
241241
const headerFile = new File(
242242
[headerBlob], file.name + '.header.sam', { type: 'text/plain' }
243243
);
244-
tabManager.currentFile = headerFile;
245-
tabManager.currentFormat = 'text';
244+
tabManager.setCurrentFile(headerFile, 'text');
246245

247246
// Show the extracted header in the binary preview area
248247
document.getElementById('binary-preview').style.display = 'block';

src/web/static/js/utils/headerExtractor.js

Lines changed: 43 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,23 @@
66
* @module utils/headerExtractor
77
*/
88

9-
/** Max bytes to read from file for header extraction. */
10-
const MAX_HEADER_READ = 4 * 1024 * 1024;
9+
import { MAX_FILE_SIZE } from './helpers.js';
10+
11+
/**
12+
* Concatenate an array of Uint8Array chunks into a single Uint8Array.
13+
* @param {Uint8Array[]} chunks
14+
* @returns {Uint8Array}
15+
*/
16+
function concatChunks(chunks) {
17+
const totalLength = chunks.reduce((sum, c) => sum + c.length, 0);
18+
const result = new Uint8Array(totalLength);
19+
let offset = 0;
20+
for (const chunk of chunks) {
21+
result.set(chunk, offset);
22+
offset += chunk.length;
23+
}
24+
return result;
25+
}
1126

1227
/**
1328
* Read a slice of a File as an ArrayBuffer.
@@ -46,14 +61,7 @@ async function decompressGzipBlock(block) {
4661
chunks.push(value);
4762
}
4863

49-
const totalLength = chunks.reduce((sum, c) => sum + c.length, 0);
50-
const result = new Uint8Array(totalLength);
51-
let offset = 0;
52-
for (const chunk of chunks) {
53-
result.set(chunk, offset);
54-
offset += chunk.length;
55-
}
56-
return result;
64+
return concatChunks(chunks);
5765
} catch (err) {
5866
reader.cancel().catch(() => {});
5967
writer.abort(err).catch(() => {});
@@ -62,13 +70,16 @@ async function decompressGzipBlock(block) {
6270
}
6371

6472
/**
65-
* Decompress consecutive BGZF blocks from raw bytes.
73+
* Decompress consecutive BGZF blocks, stopping as soon as we have
74+
* enough uncompressed data for the BAM header.
6675
* @param {Uint8Array} bytes - Raw BGZF data
76+
* @param {number} [neededBytes=0] - Stop after accumulating this many uncompressed bytes (0 = all)
6777
* @returns {Promise<Uint8Array>} Concatenated decompressed data
6878
*/
69-
async function decompressBgzfBlocks(bytes) {
79+
async function decompressBgzfBlocks(bytes, neededBytes = 0) {
7080
const chunks = [];
7181
let offset = 0;
82+
let accumulated = 0;
7283

7384
while (offset < bytes.length) {
7485
// Check for gzip magic
@@ -105,21 +116,18 @@ async function decompressBgzfBlocks(bytes) {
105116
const decompressed = await decompressGzipBlock(block);
106117
if (decompressed.length === 0) break; // EOF block
107118
chunks.push(decompressed);
119+
accumulated += decompressed.length;
108120
} catch {
109121
break;
110122
}
111123

112124
offset += bsize;
113-
}
114125

115-
const totalLength = chunks.reduce((sum, c) => sum + c.length, 0);
116-
const result = new Uint8Array(totalLength);
117-
let writeOffset = 0;
118-
for (const chunk of chunks) {
119-
result.set(chunk, writeOffset);
120-
writeOffset += chunk.length;
126+
// Stop early once we have enough data for the header
127+
if (neededBytes > 0 && accumulated >= neededBytes) break;
121128
}
122-
return result;
129+
130+
return concatChunks(chunks);
123131
}
124132

125133
/**
@@ -129,12 +137,12 @@ async function decompressBgzfBlocks(bytes) {
129137
* @throws {Error} If the file is not a valid BAM or extraction fails
130138
*/
131139
export async function extractBamHeader(file) {
132-
const readSize = Math.min(file.size, MAX_HEADER_READ);
140+
const readSize = Math.min(file.size, MAX_FILE_SIZE);
133141
const buffer = await readFileSlice(file, 0, readSize);
134142
const bytes = new Uint8Array(buffer);
135143

136-
const uncompressed = await decompressBgzfBlocks(bytes);
137-
const view = new DataView(uncompressed.buffer);
144+
// First pass: decompress enough to read the l_text field (8 bytes minimum)
145+
let uncompressed = await decompressBgzfBlocks(bytes, 8);
138146

139147
// Verify BAM magic: "BAM\1"
140148
if (uncompressed.length < 8 ||
@@ -143,9 +151,19 @@ export async function extractBamHeader(file) {
143151
throw new Error('Not a valid BAM file (bad magic bytes)');
144152
}
145153

154+
const view = new DataView(uncompressed.buffer);
146155
const headerLength = view.getInt32(4, true);
147-
if (headerLength < 0 || headerLength > uncompressed.length - 8) {
148-
throw new Error('BAM header length exceeds available data');
156+
if (headerLength < 0) {
157+
throw new Error('BAM header length is negative');
158+
}
159+
160+
// If we don't have enough data yet, decompress more blocks
161+
const needed = 8 + headerLength;
162+
if (uncompressed.length < needed) {
163+
uncompressed = await decompressBgzfBlocks(bytes, needed);
164+
if (uncompressed.length < needed) {
165+
throw new Error('BAM header length exceeds available data');
166+
}
149167
}
150168

151169
const decoder = new TextDecoder('ascii');
@@ -162,12 +180,3 @@ export async function extractBamHeader(file) {
162180
export function isBamFile(filename) {
163181
return filename.toLowerCase().endsWith('.bam');
164182
}
165-
166-
/**
167-
* Check if a filename indicates a CRAM file.
168-
* @param {string} filename
169-
* @returns {boolean}
170-
*/
171-
export function isCramFile(filename) {
172-
return filename.toLowerCase().endsWith('.cram');
173-
}

0 commit comments

Comments
 (0)