Skip to content

Commit 7ab5461

Browse files
committed
refactor: extract stream_index_gguf_bf16_with_header for format-agnostic indexing
Splits stream_index_gguf_bf16 into: - stream_index_gguf_bf16(): parses GGUF header, delegates to _with_header - stream_index_gguf_bf16_with_header(): the core loop, works with any pre-parsed header (GGUF or safetensors) No behavior change for existing callers.
1 parent 75541e6 commit 7ab5461

1 file changed

Lines changed: 21 additions & 6 deletions

File tree

src/hpc/gguf_indexer.rs

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -519,19 +519,34 @@ pub fn stream_index_gguf_bf16<R: Read + Seek, W: Write>(
519519
octave_stride: usize,
520520
callback: Option<&dyn Fn(&str, &LayerType, usize, usize)>,
521521
) -> Result<IndexStats, String> {
522-
let gguf_header = gguf::read_gguf_header(reader)?;
522+
let header = gguf::read_gguf_header(reader)?;
523+
stream_index_gguf_bf16_with_header(reader, writer, &header, octave_stride, callback)
524+
}
525+
526+
/// Core BF16-direct indexer — works with any pre-parsed header (GGUF or safetensors).
527+
///
528+
/// The header must have:
529+
/// - `tensor_data_offset`: absolute byte offset where tensor data starts
530+
/// - `tensors`: Vec<TensorInfo> with name, dimensions, dtype, offset (relative to data start)
531+
pub fn stream_index_gguf_bf16_with_header<R: Read + Seek, W: Write>(
532+
reader: &mut R,
533+
writer: &mut W,
534+
header: &gguf::GgufFile,
535+
octave_stride: usize,
536+
callback: Option<&dyn Fn(&str, &LayerType, usize, usize)>,
537+
) -> Result<IndexStats, String> {
523538
let mut stats = IndexStats::default();
524-
stats.tensors_total = gguf_header.tensors.len();
539+
stats.tensors_total = header.tensors.len();
525540

526541
writer.write_all(b"BGZ7").map_err(|e| e.to_string())?;
527-
writer.write_all(&(gguf_header.tensors.len() as u32).to_le_bytes()).map_err(|e| e.to_string())?;
542+
writer.write_all(&(header.tensors.len() as u32).to_le_bytes()).map_err(|e| e.to_string())?;
528543

529544
// Reusable buffer — capped at 128 MB (64M u16 elements).
530545
// Tensors larger than this are read in row batches.
531546
const MAX_BUF_ELEMS: usize = 64 * 1024 * 1024; // 128 MB of u16
532547
let mut bf16_buf: Vec<u16> = Vec::new();
533548

534-
for tensor in &gguf_header.tensors {
549+
for tensor in &header.tensors {
535550
let layer_type = classify_tensor(&tensor.name, &tensor.dimensions);
536551

537552
if matches!(layer_type, LayerType::Skip | LayerType::Norm) {
@@ -559,7 +574,7 @@ pub fn stream_index_gguf_bf16<R: Read + Seek, W: Write>(
559574
}
560575

561576
// Seek to tensor start
562-
let abs_offset = gguf_header.tensor_data_offset + tensor.offset;
577+
let abs_offset = header.tensor_data_offset + tensor.offset;
563578
reader.seek(std::io::SeekFrom::Start(abs_offset)).map_err(|e| e.to_string())?;
564579

565580
let mut rows: Vec<Base17> = Vec::with_capacity(n_rows);
@@ -636,7 +651,7 @@ pub fn stream_index_gguf_bf16<R: Read + Seek, W: Write>(
636651
}
637652
} else {
638653
// FALLBACK: non-BF16 — use original f32 path
639-
let data = gguf::read_tensor_f32(reader, &gguf_header, tensor)?;
654+
let data = gguf::read_tensor_f32(reader, &header, tensor)?;
640655
let tensor_bytes = data.len() as u64 * 4;
641656
if tensor_bytes > stats.peak_tensor_bytes {
642657
stats.peak_tensor_bytes = tensor_bytes;

0 commit comments

Comments
 (0)