diff --git a/ann_benchmarks/algorithms/arrwdb/Dockerfile b/ann_benchmarks/algorithms/arrwdb/Dockerfile new file mode 100644 index 000000000..acade7c97 --- /dev/null +++ b/ann_benchmarks/algorithms/arrwdb/Dockerfile @@ -0,0 +1,16 @@ +FROM ann-benchmarks + +# Install Rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y +ENV PATH="/root/.cargo/bin:${PATH}" + +# Install maturin +RUN pip install maturin numpy + +# Clone and build arrwDB's Rust HNSW index +RUN git clone https://github.com/bledden/arrwDB.git /opt/arrwdb +RUN cd /opt/arrwdb/rust/indexes && \ + RUSTFLAGS='-C target-cpu=native' maturin build --release && \ + pip install target/wheels/*.whl + +RUN python -c 'from rust_hnsw import RustFastHNSWIndex; print("arrwDB OK")' diff --git a/ann_benchmarks/algorithms/arrwdb/__init__.py b/ann_benchmarks/algorithms/arrwdb/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/ann_benchmarks/algorithms/arrwdb/config.yml b/ann_benchmarks/algorithms/arrwdb/config.yml new file mode 100644 index 000000000..965534a48 --- /dev/null +++ b/ann_benchmarks/algorithms/arrwdb/config.yml @@ -0,0 +1,25 @@ +float: + any: + - base_args: ['@metric'] + constructor: ArrwDB + disabled: false + docker_tag: ann-benchmarks-arrwdb + module: ann_benchmarks.algorithms.arrwdb + name: arrwdb + run_groups: + M-16: + arg_groups: [{M: 16, efConstruction: 400}] + args: {} + query_args: [[10, 20, 50, 100, 150, 200, 400, 800]] + M-24: + arg_groups: [{M: 24, efConstruction: 400}] + args: {} + query_args: [[10, 20, 50, 100, 150, 200, 400, 800]] + M-32: + arg_groups: [{M: 32, efConstruction: 400}] + args: {} + query_args: [[10, 20, 50, 100, 150, 200, 400, 800]] + M-48: + arg_groups: [{M: 48, efConstruction: 400}] + args: {} + query_args: [[10, 20, 50, 100, 150, 200, 400, 800]] diff --git a/ann_benchmarks/algorithms/arrwdb/module.py b/ann_benchmarks/algorithms/arrwdb/module.py new file mode 100644 index 000000000..8123e6f74 --- /dev/null +++ b/ann_benchmarks/algorithms/arrwdb/module.py @@ -0,0 +1,49 @@ +import numpy as np + +from ..base.module import BaseANN + + +class ArrwDB(BaseANN): + """arrwDB — Rust HNSW with AVX-512/AVX2/FMA intrinsics.""" + + def __init__(self, metric, method_param): + self._metric = {"angular": "cosine", "euclidean": "l2"}[metric] + self._m = method_param.get("M", 32) + self._ef_construction = method_param.get("efConstruction", 400) + self._ef_search = 50 + self._index = None + + def fit(self, X): + from rust_hnsw import RustFastHNSWIndex + + n, dim = X.shape + X = np.ascontiguousarray(X, dtype=np.float32) + + self._index = RustFastHNSWIndex( + dimension=dim, + m=self._m, + ef_construction=self._ef_construction, + ef_search=self._ef_search, + metric=self._metric, + ) + + ids = [str(i) for i in range(n)] + self._index.build_bulk(ids, X.ravel()) + + def set_query_arguments(self, ef_search): + self._ef_search = ef_search + if self._index is not None: + self._index.set_ef_search(ef_search) + self.name = "arrwdb (M=%d, efC=%d, efS=%d)" % (self._m, self._ef_construction, ef_search) + + def query(self, v, n): + results = self._index.search( + np.ascontiguousarray(v, dtype=np.float32), + k=n, + ef_override=self._ef_search, + ) + return [int(vid) for vid, dist in results] + + def freeIndex(self): + del self._index + self._index = None