Skip to content

Commit cb8d7a3

Browse files
committed
perf: 256 MB HTTP chunks for streaming GGUF indexing
Fewer HTTP round-trips: 18 GB shard = ~72 requests instead of ~1125. 256 MB fits comfortably in RAM alongside the dequantized tensor. https://claude.ai/code/session_01Y69Vnw751w75iVSBRws7o7
1 parent 5b98eee commit cb8d7a3

2 files changed

Lines changed: 4 additions & 4 deletions

File tree

src/hpc/gguf_indexer.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -591,7 +591,7 @@ mod tests {
591591
};
592592
eprintln!(" URL resolved, size: {:.2} GB", size as f64 / 1e9);
593593

594-
let mut reader = HttpRangeReader::with_chunk_size(url, size, 16 * 1024 * 1024); // 16 MB chunks
594+
let mut reader = HttpRangeReader::with_chunk_size(url, size, 256 * 1024 * 1024); // 16 MB chunks
595595

596596
let out_path = "/tmp/llama4_scout.bgz7";
597597
let out = std::fs::File::create(out_path).expect("create output");
@@ -651,7 +651,7 @@ mod tests {
651651
eprintln!(" URL: {}", url);
652652

653653
// 16 MB chunks for fewer HTTP round-trips
654-
let mut reader = HttpRangeReader::with_chunk_size(url, size, 16 * 1024 * 1024);
654+
let mut reader = HttpRangeReader::with_chunk_size(url, size, 256 * 1024 * 1024);
655655

656656
let out_path = "/tmp/llama4_scout_shard5.bgz7";
657657
let out = std::fs::File::create(out_path).expect("create output");

src/hpc/http_reader.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ pub struct HttpRangeReader {
3131
}
3232

3333
impl HttpRangeReader {
34-
/// Default chunk: 8 MB (good balance for GGUF tensor reads).
35-
const DEFAULT_CHUNK: usize = 8 * 1024 * 1024;
34+
/// Default chunk: 256 MB (fewer HTTP round-trips, fits in RAM easily).
35+
const DEFAULT_CHUNK: usize = 256 * 1024 * 1024;
3636

3737
/// Create a new HTTP range reader.
3838
///

0 commit comments

Comments
 (0)