perf: 256 MB HTTP chunks for streaming GGUF indexing

claude · claude · commit cb8d7a3b626c · 2026-03-30T00:24:58.000Z
Fewer HTTP round-trips: 18 GB shard = ~72 requests instead of ~1125. 256 MB fits comfortably in RAM alongside the dequantized tensor. https://claude.ai/code/session_01Y69Vnw751w75iVSBRws7o7
diff --git a/src/hpc/gguf_indexer.rs b/src/hpc/gguf_indexer.rs
@@ -591,7 +591,7 @@ mod tests {
         };
         eprintln!("  URL resolved, size: {:.2} GB", size as f64 / 1e9);
 
-        let mut reader = HttpRangeReader::with_chunk_size(url, size, 16 * 1024 * 1024); // 16 MB chunks
+        let mut reader = HttpRangeReader::with_chunk_size(url, size, 256 * 1024 * 1024); // 16 MB chunks
 
         let out_path = "/tmp/llama4_scout.bgz7";
         let out = std::fs::File::create(out_path).expect("create output");
@@ -651,7 +651,7 @@ mod tests {
         eprintln!("  URL: {}", url);
 
         // 16 MB chunks for fewer HTTP round-trips
-        let mut reader = HttpRangeReader::with_chunk_size(url, size, 16 * 1024 * 1024);
+        let mut reader = HttpRangeReader::with_chunk_size(url, size, 256 * 1024 * 1024);
 
         let out_path = "/tmp/llama4_scout_shard5.bgz7";
         let out = std::fs::File::create(out_path).expect("create output");
diff --git a/src/hpc/http_reader.rs b/src/hpc/http_reader.rs
@@ -31,8 +31,8 @@ pub struct HttpRangeReader {
 }
 
 impl HttpRangeReader {
-    /// Default chunk: 8 MB (good balance for GGUF tensor reads).
-    const DEFAULT_CHUNK: usize = 8 * 1024 * 1024;
+    /// Default chunk: 256 MB (fewer HTTP round-trips, fits in RAM easily).
+    const DEFAULT_CHUNK: usize = 256 * 1024 * 1024;
 
     /// Create a new HTTP range reader.
     ///

Original file line number	Diff line number	Diff line change
`@@ -31,8 +31,8 @@ pub struct HttpRangeReader {`
`31`	`31`	`}`
`32`	`32`
`33`	`33`	`impl HttpRangeReader {`
`34`		`- /// Default chunk: 8 MB (good balance for GGUF tensor reads).`
`35`		`- const DEFAULT_CHUNK: usize = 8 * 1024 * 1024;`
	`34`	`+ /// Default chunk: 256 MB (fewer HTTP round-trips, fits in RAM easily).`
	`35`	`+ const DEFAULT_CHUNK: usize = 256 * 1024 * 1024;`
`36`	`36`
`37`	`37`	`/// Create a new HTTP range reader.`
`38`	`38`	`///`