Skip to content

Commit 65c8a98

Browse files
LuQQiuclaude
andauthored
feat: add ANN proto codecs and extract table_identifier module (#6503)
Add protobuf encode/decode for `ANNIvfSubIndexExec` --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent b3698e6 commit 65c8a98

11 files changed

Lines changed: 692 additions & 105 deletions

File tree

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

protos/ann.proto

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright The Lance Authors
3+
4+
syntax = "proto3";
5+
6+
package lance.pb;
7+
8+
import "table_identifier.proto";
9+
import "table.proto";
10+
import "index.proto";
11+
12+
// Serialized vector query parameters.
13+
message VectorQueryProto {
14+
// Query vector as Arrow IPC bytes (supports Float16, Float32, Float64, UInt8, etc.)
15+
bytes query_vector_arrow_ipc = 1;
16+
string column = 2;
17+
uint32 k = 3;
18+
optional float lower_bound = 4;
19+
optional float upper_bound = 5;
20+
optional uint32 minimum_nprobes = 6;
21+
optional uint32 maximum_nprobes = 7;
22+
optional uint32 ef = 8;
23+
optional uint32 refine_factor = 9;
24+
// Distance metric type. Absent means None (use the index's default metric).
25+
optional lance.index.pb.VectorMetricType metric_type = 10;
26+
bool use_index = 11;
27+
optional float dist_q_c = 12;
28+
}
29+
30+
// Serializable form of ANNIvfSubIndexExec — the IVF sub-index search node.
31+
//
32+
// Note: ANNIvfSubIndexExec.prefilter_source (child ExecutionPlan) is NOT
33+
// serialized here. DataFusion's PhysicalExtensionCodec handles child plans
34+
// automatically via children() / with_new_children(). The codec receives
35+
// deserialized children in the `inputs` parameter of try_decode and
36+
// reconstructs the PreFilterSource from them.
37+
message ANNIvfSubIndexExecProto {
38+
VectorQueryProto query = 1;
39+
lance.datafusion.TableIdentifier table = 2;
40+
repeated lance.table.IndexMetadata indices = 3;
41+
}

python/Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

rust/lance/Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,10 @@ semver.workspace = true
8585
tokio-stream = { workspace = true }
8686
tokio-util = { workspace = true }
8787

88+
[build-dependencies]
89+
prost-build.workspace = true
90+
protobuf-src = { version = "2.1", optional = true }
91+
8892
[target.'cfg(target_os = "linux")'.dev-dependencies]
8993
pprof.workspace = true
9094
# Need this so we can prevent dynamic linking in binaries (see cli feature)
@@ -129,6 +133,7 @@ dynamodb = ["lance-table/dynamodb", "dep:aws-sdk-dynamodb"]
129133
dynamodb_tests = ["dynamodb"]
130134
substrait = ["lance-datafusion/substrait"]
131135
protoc = [
136+
"dep:protobuf-src",
132137
"lance-encoding/protoc",
133138
"lance-file/protoc",
134139
"lance-index/protoc",

rust/lance/build.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright The Lance Authors
3+
4+
use std::io::Result;
5+
6+
fn main() -> Result<()> {
7+
println!("cargo:rerun-if-changed=protos");
8+
9+
#[cfg(feature = "protoc")]
10+
// Use vendored protobuf compiler if requested.
11+
unsafe {
12+
std::env::set_var("PROTOC", protobuf_src::protoc());
13+
}
14+
15+
let mut prost_build = prost_build::Config::new();
16+
prost_build.extern_path(".lance.table", "::lance_table::format::pb");
17+
prost_build.extern_path(".lance.index.pb", "::lance_index::pb");
18+
prost_build.extern_path(".lance.datafusion", "::lance_datafusion::pb");
19+
prost_build.protoc_arg("--experimental_allow_proto3_optional");
20+
prost_build.enable_type_names();
21+
prost_build.compile_protos(&["./protos/ann.proto"], &["./protos"])?;
22+
23+
Ok(())
24+
}

rust/lance/src/io/exec.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
//!
66
//! WARNING: Internal API with no stability guarantees.
77
8+
#[cfg(feature = "substrait")]
9+
pub mod ann_proto;
810
mod filter;
911
pub mod filtered_read;
1012
#[cfg(feature = "substrait")]
@@ -17,6 +19,8 @@ mod pushdown_scan;
1719
mod rowids;
1820
pub mod scalar_index;
1921
mod scan;
22+
#[cfg(feature = "substrait")]
23+
pub mod table_identifier;
2024
mod take;
2125
#[cfg(test)]
2226
pub mod testing;

0 commit comments

Comments
 (0)