@@ -635,4 +635,65 @@ mod tests {
635635
636636 assert ! ( stats. tensors_indexed > 0 ) ;
637637 }
638+
639+ #[ test]
640+ #[ ignore] // Streams BF16 shard 5 (18.2 GB) from HuggingFace
641+ fn test_stream_index_llama4_bf16_shard5 ( ) {
642+ use super :: super :: http_reader:: HttpRangeReader ;
643+ use std:: io:: BufWriter ;
644+
645+ let repo = "unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF" ;
646+ let filename = "BF16/Llama-4-Scout-17B-16E-Instruct-BF16-00005-of-00005.gguf" ;
647+ let size: u64 = 18_220_000_000 ; // ~18.2 GB from metadata
648+
649+ let url = format ! ( "https://huggingface.co/{}/resolve/main/{}" , repo, filename) ;
650+ eprintln ! ( "Streaming shard 5: {:.2} GB" , size as f64 / 1e9 ) ;
651+ eprintln ! ( " URL: {}" , url) ;
652+
653+ // 16 MB chunks for fewer HTTP round-trips
654+ let mut reader = HttpRangeReader :: with_chunk_size ( url, size, 16 * 1024 * 1024 ) ;
655+
656+ let out_path = "/tmp/llama4_scout_shard5.bgz7" ;
657+ let out = std:: fs:: File :: create ( out_path) . expect ( "create output" ) ;
658+ let mut writer = BufWriter :: new ( out) ;
659+
660+ let stats = stream_index_gguf (
661+ & mut reader,
662+ & mut writer,
663+ Some ( & |name, layer_type, orig, comp| {
664+ let ratio = if comp > 0 { orig as f64 / comp as f64 } else { 0.0 } ;
665+ eprintln ! ( " {:60} {:12?} {:>12} → {:>8} ({:.0}×)" ,
666+ name, layer_type, orig, comp, ratio) ;
667+ } ) ,
668+ ) . expect ( "stream_index_gguf" ) ;
669+
670+ drop ( writer) ;
671+ let out_size = std:: fs:: metadata ( out_path) . map ( |m| m. len ( ) ) . unwrap_or ( 0 ) ;
672+
673+ eprintln ! ( ) ;
674+ eprintln ! ( "=== Llama 4 Scout BF16 Shard 5 → bgz17 ===" ) ;
675+ eprintln ! ( " Source: {:.2} GB (BF16, streamed from HF)" , size as f64 / 1e9 ) ;
676+ eprintln ! ( " Output: {:.2} MB" , out_size as f64 / 1e6 ) ;
677+ eprintln ! ( " Downloaded: {:.2} GB" , reader. bytes_downloaded( ) as f64 / 1e9 ) ;
678+ eprintln ! ( " Tensors: {} indexed, {} skipped" ,
679+ stats. tensors_indexed, stats. tensors_skipped) ;
680+ eprintln ! ( " Original (f32): {:.2} GB" , stats. original_bytes as f64 / 1e9 ) ;
681+ eprintln ! ( " Compressed: {:.2} MB" , stats. compressed_bytes as f64 / 1e6 ) ;
682+ eprintln ! ( " Ratio: {:.1}×" , stats. overall_ratio( ) ) ;
683+ eprintln ! ( " Peak tensor: {:.2} MB" , stats. peak_tensor_bytes as f64 / 1e6 ) ;
684+
685+ let type_names = [ "Attention" , "FeedForward" , "Conv2D" , "Norm" , "Embedding" , "Skip" ] ;
686+ for ( i, name) in type_names. iter ( ) . enumerate ( ) {
687+ let ( count, orig, comp) = stats. by_type [ i] ;
688+ if count > 0 {
689+ let ratio = if comp > 0 { orig as f64 / comp as f64 } else { 0.0 } ;
690+ eprintln ! ( " {:<12} {:>3} tensors: {:>10.2} GB → {:>8.2} MB ({:.1}×)" ,
691+ name, count, orig as f64 / 1e9 , comp as f64 / 1e6 , ratio) ;
692+ }
693+ }
694+
695+ assert ! ( stats. tensors_indexed > 0 ) ;
696+ // BF16 dequant to f32 doubles the size, so original_bytes > source size
697+ assert ! ( stats. original_bytes > 0 ) ;
698+ }
638699}
0 commit comments