Skip to content

Commit 6744150

Browse files
committed
feat(hpc): add zeck module (ZeckF64 encoding + batch/top_k) and hamming_top_k_raw
New hpc::zeck module ported from lance-graph's ZeckF64 progressive edge encoding: - zeckf64() / zeckf64_from_distances() — encode SPO triple distances - zeckf64_distance() / scent_distance() / progressive_distance() - zeckf64_batch() / zeckf64_top_k() — batch distance + O(n) partial sort - zeckf64_scent_batch() / zeckf64_scent_top_k() — scent-only fast path - zeckf64_encode_batch() — encode query against flat database - is_legal_scent() — boolean lattice validation - 11 tests passing Also added hamming_top_k_raw() to hpc::bitwise for raw slice top-k search. https://claude.ai/code/session_01CdqyUTUfjKZuk8YGJzv6LB
1 parent 8741ea8 commit 6744150

3 files changed

Lines changed: 444 additions & 0 deletions

File tree

src/hpc/bitwise.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,31 @@ pub fn hamming_batch_raw(query: &[u8], database: &[u8], num_rows: usize, row_byt
230230
dispatch_hamming_batch(query, database, num_rows, row_bytes)
231231
}
232232

233+
/// Top-k nearest neighbors by Hamming distance on raw slices.
234+
///
235+
/// Returns (indices, distances) of the k closest rows in the database.
236+
/// Uses `select_nth_unstable` for O(n) partial sort instead of O(n log n).
237+
pub fn hamming_top_k_raw(
238+
query: &[u8],
239+
database: &[u8],
240+
num_rows: usize,
241+
row_bytes: usize,
242+
k: usize,
243+
) -> (Vec<usize>, Vec<u64>) {
244+
let distances = dispatch_hamming_batch(query, database, num_rows, row_bytes);
245+
let k = k.min(num_rows);
246+
if k == 0 {
247+
return (Vec::new(), Vec::new());
248+
}
249+
let mut indexed: Vec<(usize, u64)> = distances.into_iter().enumerate().collect();
250+
indexed.select_nth_unstable_by_key(k.saturating_sub(1), |&(_, d)| d);
251+
indexed.truncate(k);
252+
indexed.sort_unstable_by_key(|&(_, d)| d);
253+
let indices = indexed.iter().map(|&(i, _)| i).collect();
254+
let dists = indexed.iter().map(|&(_, d)| d).collect();
255+
(indices, dists)
256+
}
257+
233258
fn dispatch_hamming(a: &[u8], b: &[u8]) -> u64 {
234259
#[cfg(target_arch = "x86_64")]
235260
{

src/hpc/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,10 @@ pub mod layered_distance;
126126
#[allow(missing_docs)]
127127
pub mod parallel_search;
128128

129+
// ZeckF64 progressive edge encoding + batch/top-k
130+
#[allow(missing_docs)]
131+
pub mod zeck;
132+
129133
#[cfg(test)]
130134
mod e2e_tests {
131135
//! End-to-end pipeline test: Fingerprint → Node → Seal → Cascade → CLAM → Causality → BNN

0 commit comments

Comments
 (0)