Skip to content

Commit 7d49d63

Browse files
authored
Merge branch 'main' into cluster-cost
2 parents 57eae14 + 0e94a8c commit 7d49d63

22 files changed

Lines changed: 1430 additions & 799 deletions

File tree

.github/CODEOWNERS

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,49 +1,49 @@
11
#c code owners
2-
c/ @rapidsai/cuvs-c-codeowners
3-
examples/c/ @rapidsai/cuvs-c-codeowners
2+
c/ @NVIDIA/cuvs-c-codeowners
3+
examples/c/ @NVIDIA/cuvs-c-codeowners
44

55
#cpp code owners
6-
cpp/ @rapidsai/cuvs-cpp-codeowners
7-
examples/cpp/ @rapidsai/cuvs-cpp-codeowners
6+
cpp/ @NVIDIA/cuvs-cpp-codeowners
7+
examples/cpp/ @NVIDIA/cuvs-cpp-codeowners
88

99

1010
#java code owners
11-
java/ @rapidsai/cuvs-java-codeowners
12-
examples/java/ @rapidsai/cuvs-java-codeowners
11+
java/ @NVIDIA/cuvs-java-codeowners
12+
examples/java/ @NVIDIA/cuvs-java-codeowners
1313

1414
#python code owners
15-
python/ @rapidsai/cuvs-python-codeowners
15+
python/ @NVIDIA/cuvs-python-codeowners
1616

1717
#rust code owners
18-
rust/ @rapidsai/cuvs-rust-codeowners
19-
examples/rust/ @rapidsai/cuvs-rust-codeowners
18+
rust/ @NVIDIA/cuvs-rust-codeowners
19+
examples/rust/ @NVIDIA/cuvs-rust-codeowners
2020

2121
#docs code owners
22-
docs/ @rapidsai/cuvs-docs-codeowners
23-
fern/ @rapidsai/cuvs-docs-codeowners
22+
docs/ @NVIDIA/cuvs-docs-codeowners
23+
fern/ @NVIDIA/cuvs-docs-codeowners
2424

2525
#cmake code owners
26-
CMakeLists.txt @rapidsai/cuvs-cmake-codeowners
27-
**/cmake/ @rapidsai/cuvs-cmake-codeowners
28-
*.cmake @rapidsai/cuvs-cmake-codeowners
29-
cpp/scripts/run-cmake-format.sh @rapidsai/cuvs-cmake-codeowners
26+
CMakeLists.txt @NVIDIA/cuvs-cmake-codeowners
27+
**/cmake/ @NVIDIA/cuvs-cmake-codeowners
28+
*.cmake @NVIDIA/cuvs-cmake-codeowners
29+
cpp/scripts/run-cmake-format.sh @NVIDIA/cuvs-cmake-codeowners
3030

3131
#CI code owners
32-
/.github/ @rapidsai/ci-codeowners
33-
/ci/ @rapidsai/ci-codeowners
34-
/.shellcheckrc @rapidsai/ci-codeowners
35-
/.coderabbit.yaml @rapidsai/ci-codeowners
32+
/.github/ @NVIDIA/adi-ci-codeowners
33+
/ci/ @NVIDIA/adi-ci-codeowners
34+
/.shellcheckrc @NVIDIA/adi-ci-codeowners
35+
/.coderabbit.yaml @NVIDIA/adi-ci-codeowners
3636

3737
#packaging code owners
38-
/.pre-commit-config.yaml @rapidsai/packaging-codeowners
39-
/.devcontainer/ @rapidsai/packaging-codeowners
40-
/conda/ @rapidsai/packaging-codeowners
41-
dependencies.yaml @rapidsai/packaging-codeowners
42-
/build.sh @rapidsai/packaging-codeowners
43-
pyproject.toml @rapidsai/packaging-codeowners
44-
python/setup.py @rapidsai/packaging-codeowners
45-
build.sh @rapidsai/packaging-codeowners
46-
**/build.sh @rapidsai/packaging-codeowners
38+
/.pre-commit-config.yaml @NVIDIA/adi-packaging-codeowners
39+
/.devcontainer/ @NVIDIA/adi-packaging-codeowners
40+
/conda/ @NVIDIA/adi-packaging-codeowners
41+
dependencies.yaml @NVIDIA/adi-packaging-codeowners
42+
/build.sh @NVIDIA/adi-packaging-codeowners
43+
pyproject.toml @NVIDIA/adi-packaging-codeowners
44+
python/setup.py @NVIDIA/adi-packaging-codeowners
45+
build.sh @NVIDIA/adi-packaging-codeowners
46+
**/build.sh @NVIDIA/adi-packaging-codeowners
4747

4848
# Ops code owners
49-
/SECURITY.md @rapidsai/ops-codeowners
49+
/SECURITY.md @NVIDIA/adi-ops-codeowners

.github/CODE_OF_CONDUCT.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
This project has adopted the [Contributor Covenant Code of Conduct](https://docs.rapids.ai/resources/conduct/).

.github/workflows/pr.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -452,6 +452,7 @@ jobs:
452452
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python_conda
453453
with:
454454
build_type: pull-request
455+
run_codecov: false
455456
script: ci/test_python.sh
456457
rocky8-clib-standalone-build-matrix:
457458
needs: checks

.pre-commit-config.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION.
1+
# SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
# SPDX-License-Identifier: Apache-2.0
33

44
repos:
@@ -117,7 +117,7 @@ repos:
117117
^cpp/cmake/patches/cutlass/build-export[.]patch$|
118118
^rust/cuvs-sys/src/bindings[.]rs$
119119
- repo: https://github.com/rapidsai/pre-commit-hooks
120-
rev: v1.3.3
120+
rev: v1.6.0
121121
hooks:
122122
- id: verify-copyright
123123
name: verify-copyright-cuvs
@@ -181,7 +181,7 @@ repos:
181181
)
182182
- id: verify-alpha-spec
183183
- id: verify-codeowners
184-
args: [--fix, --project-prefix=cuvs]
184+
args: [--fix, --org=NVIDIA, --project-prefix=cuvs]
185185
- id: verify-pyproject-license
186186
# ignore the top-level pyproject.toml, which doesn't
187187
# have or need a [project] table

rust/cuvs/Cargo.toml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,12 @@ doc-only = ["cuvs-sys/doc-only"]
1414

1515
[dependencies]
1616
cuvs-sys = { workspace = true }
17-
ndarray = "0.15"
17+
thiserror = "2"
18+
tinyvec = { version = "1", features = ["alloc"] }
1819

1920
[dev-dependencies]
20-
ndarray-rand = "0.14"
21+
ndarray = "0.17"
22+
ndarray-rand = "0.16"
2123

2224
[package.metadata.docs.rs]
2325
features = ["doc-only"]

rust/cuvs/examples/cagra.rs

Lines changed: 189 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,64 +1,222 @@
11
/*
2-
* SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
2+
* SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
33
* SPDX-License-Identifier: Apache-2.0
44
*/
55

6+
//! CAGRA example with a user-provided GPU tensor.
7+
//!
8+
//! This demonstrates how to feed your own device memory into cuVS by
9+
//! implementing the public [`AsDlTensor`]/[`AsDlTensorMut`] traits. The
10+
//! [`CudaTensor`] type manages device memory directly through the CUDA runtime
11+
//! (`cudaMalloc`/`cudaFree`) and copies to/from host arrays with `cudaMemcpyAsync`
12+
//! on the cuVS stream, reusing the resources handle's `get_cuda_stream`/
13+
//! `sync_stream` for stream access and synchronization.
14+
//!
15+
//! A real application would likely rely on a helper crate such as `cudarc`
16+
//! and its `CudaSlice`.
17+
18+
use std::ffi::c_void;
19+
use std::marker::PhantomData;
20+
use std::os::raw::c_int;
21+
22+
use cuvs::Resources;
623
use cuvs::cagra::{Index, IndexParams, SearchParams};
7-
use cuvs::{ManagedTensor, Resources, Result};
24+
use cuvs::dlpack::{
25+
AsDlTensor, AsDlTensorMut, DLDevice, DLDeviceType, DLPackError, DLTensorView, DLTensorViewMut,
26+
DType,
27+
};
828

929
use ndarray::s;
1030
use ndarray_rand::RandomExt;
1131
use ndarray_rand::rand_distr::Uniform;
1232

13-
/// Example showing how to index and search data with CAGRA
14-
fn cagra_example() -> Result<()> {
33+
type ExampleResult<T> = std::result::Result<T, Box<dyn std::error::Error>>;
34+
35+
// ---------------------------------------------------------------------------
36+
// Minimal CUDA runtime FFI
37+
// ---------------------------------------------------------------------------
38+
39+
#[allow(non_camel_case_types)]
40+
type cudaError_t = c_int;
41+
const CUDA_SUCCESS: cudaError_t = 0;
42+
const CUDA_MEMCPY_HOST_TO_DEVICE: c_int = 1;
43+
const CUDA_MEMCPY_DEVICE_TO_HOST: c_int = 2;
44+
45+
#[link(name = "cudart")]
46+
unsafe extern "C" {
47+
fn cudaMalloc(ptr: *mut *mut c_void, size: usize) -> cudaError_t;
48+
fn cudaFree(ptr: *mut c_void) -> cudaError_t;
49+
fn cudaMemcpyAsync(
50+
dst: *mut c_void,
51+
src: *const c_void,
52+
count: usize,
53+
kind: c_int,
54+
stream: cuvs_sys::cudaStream_t,
55+
) -> cudaError_t;
56+
}
57+
58+
fn check_cuda(status: cudaError_t) -> ExampleResult<()> {
59+
if status == CUDA_SUCCESS {
60+
Ok(())
61+
} else {
62+
Err(format!("CUDA runtime error: {status}").into())
63+
}
64+
}
65+
66+
// ---------------------------------------------------------------------------
67+
// A custom device tensor backed by the CUDA runtime
68+
// ---------------------------------------------------------------------------
69+
70+
struct CudaTensor<T: DType> {
71+
data: *mut c_void,
72+
shape: Vec<i64>,
73+
bytes: usize,
74+
_marker: PhantomData<T>,
75+
}
76+
77+
impl<T: DType> CudaTensor<T> {
78+
/// Allocate an uninitialized device buffer (used for search outputs).
79+
fn alloc(shape: &[usize]) -> ExampleResult<Self> {
80+
let bytes = shape.iter().product::<usize>() * std::mem::size_of::<T>();
81+
let mut data: *mut c_void = std::ptr::null_mut();
82+
check_cuda(unsafe { cudaMalloc(&mut data, bytes) })?;
83+
Ok(Self {
84+
data,
85+
shape: shape.iter().map(|&d| d as i64).collect(),
86+
bytes,
87+
_marker: PhantomData,
88+
})
89+
}
90+
91+
/// Copy a contiguous host array onto the device.
92+
fn from_host<D>(res: &Resources, host: &ndarray::ArrayRef<T, D>) -> ExampleResult<Self>
93+
where
94+
D: ndarray::Dimension,
95+
{
96+
if !host.is_standard_layout() {
97+
return Err("host array must be contiguous (row-major)".into());
98+
}
99+
let tensor = Self::alloc(host.shape())?;
100+
101+
let stream = res.get_cuda_stream()?;
102+
check_cuda(unsafe {
103+
cudaMemcpyAsync(
104+
tensor.data,
105+
host.as_ptr() as *const c_void,
106+
tensor.bytes,
107+
CUDA_MEMCPY_HOST_TO_DEVICE,
108+
stream,
109+
)
110+
})?;
111+
res.sync_stream()?;
112+
113+
Ok(tensor)
114+
}
115+
116+
/// Copy the device buffer back into a contiguous host array.
117+
fn to_host<D>(&self, res: &Resources, host: &mut ndarray::ArrayRef<T, D>) -> ExampleResult<()>
118+
where
119+
D: ndarray::Dimension,
120+
{
121+
if !host.is_standard_layout() {
122+
return Err("host array must be contiguous (row-major)".into());
123+
}
124+
125+
let stream = res.get_cuda_stream()?;
126+
check_cuda(unsafe {
127+
cudaMemcpyAsync(
128+
host.as_mut_ptr() as *mut c_void,
129+
self.data,
130+
self.bytes,
131+
CUDA_MEMCPY_DEVICE_TO_HOST,
132+
stream,
133+
)
134+
})?;
135+
res.sync_stream()?;
136+
137+
Ok(())
138+
}
139+
}
140+
141+
impl<T: DType> Drop for CudaTensor<T> {
142+
fn drop(&mut self) {
143+
if !self.data.is_null() {
144+
unsafe { cudaFree(self.data) };
145+
}
146+
}
147+
}
148+
149+
impl<T: DType> AsDlTensor for CudaTensor<T> {
150+
fn as_dl_tensor(&self) -> std::result::Result<DLTensorView<'_>, DLPackError> {
151+
unsafe {
152+
DLTensorView::from_raw_parts(
153+
self.data,
154+
DLDevice { device_type: DLDeviceType::kDLCUDA, device_id: 0 },
155+
&self.shape,
156+
None,
157+
T::dl_dtype(),
158+
)
159+
}
160+
}
161+
}
162+
163+
impl<T: DType> AsDlTensorMut for CudaTensor<T> {
164+
fn as_dl_tensor_mut(&mut self) -> std::result::Result<DLTensorViewMut<'_>, DLPackError> {
165+
unsafe {
166+
DLTensorViewMut::from_raw_parts(
167+
self.data,
168+
DLDevice { device_type: DLDeviceType::kDLCUDA, device_id: 0 },
169+
&self.shape,
170+
None,
171+
T::dl_dtype(),
172+
)
173+
}
174+
}
175+
}
176+
177+
/// Example showing how to index and search data with CAGRA.
178+
fn cagra_example() -> ExampleResult<()> {
15179
let res = Resources::new()?;
16180

17-
// Create a new random dataset to index
181+
// Create a new random dataset to index and copy it to the device.
18182
let n_datapoints = 65536;
19183
let n_features = 512;
20-
let dataset =
21-
ndarray::Array::<f32, _>::random((n_datapoints, n_features), Uniform::new(0., 1.0));
184+
let dataset_host = ndarray::Array::<f32, _>::random(
185+
(n_datapoints, n_features),
186+
Uniform::new(0., 1.0).unwrap(),
187+
);
188+
let dataset = CudaTensor::from_host(&res, &dataset_host)?;
22189

23-
// build the cagra index
190+
// Build the CAGRA index.
24191
let build_params = IndexParams::new()?;
25192
let index = Index::build(&res, &build_params, &dataset)?;
26-
println!("Indexed {}x{} datapoints into cagra index", n_datapoints, n_features);
193+
println!("Indexed {n_datapoints}x{n_features} datapoints into cagra index");
27194

28-
// use the first 4 points from the dataset as queries : will test that we get them back
29-
// as their own nearest neighbor
195+
// Use the first 4 points as queries; each should be its own nearest neighbor.
30196
let n_queries = 4;
31-
let queries = dataset.slice(s![0..n_queries, ..]);
32-
33197
let k = 10;
198+
let queries_host = dataset_host.slice(s![0..n_queries, ..]).to_owned();
199+
let queries = CudaTensor::from_host(&res, &queries_host)?;
34200

35-
// CAGRA search API requires queries and outputs to be on device memory
36-
// copy query data over, and allocate new device memory for the distances/ neighbors
37-
// outputs
38-
let queries = ManagedTensor::from(&queries).to_device(&res)?;
39-
let mut neighbors_host = ndarray::Array::<u32, _>::zeros((n_queries, k));
40-
let neighbors = ManagedTensor::from(&neighbors_host).to_device(&res)?;
41-
42-
let mut distances_host = ndarray::Array::<f32, _>::zeros((n_queries, k));
43-
let distances = ManagedTensor::from(&distances_host).to_device(&res)?;
201+
let mut neighbors = CudaTensor::<u32>::alloc(&[n_queries, k])?;
202+
let mut distances = CudaTensor::<f32>::alloc(&[n_queries, k])?;
44203

45204
let search_params = SearchParams::new()?;
205+
index.search(&res, &search_params, &queries, &mut neighbors, &mut distances)?;
46206

47-
index.search(&res, &search_params, &queries, &neighbors, &distances)?;
48-
49-
// Copy back to host memory
50-
distances.to_host(&res, &mut distances_host)?;
207+
// Copy the results back to the host.
208+
let mut neighbors_host = ndarray::Array::<u32, _>::zeros((n_queries, k));
209+
let mut distances_host = ndarray::Array::<f32, _>::zeros((n_queries, k));
51210
neighbors.to_host(&res, &mut neighbors_host)?;
211+
distances.to_host(&res, &mut distances_host)?;
52212

53-
// nearest neighbors should be themselves, since queries are from the
54-
// dataset
55-
println!("Neighbors {:?}", neighbors_host);
56-
println!("Distances {:?}", distances_host);
213+
println!("Neighbors {neighbors_host:?}");
214+
println!("Distances {distances_host:?}");
57215
Ok(())
58216
}
59217

60218
fn main() {
61219
if let Err(e) = cagra_example() {
62-
println!("Failed to run CAGRA: {:?}", e);
220+
println!("Failed to run CAGRA: {e:?}");
63221
}
64222
}

0 commit comments

Comments
 (0)