Skip to content

Commit 8a69ad8

Browse files
authored
Support constructing FMBScene with Python data (#34)
Part of MET-36 ## Summary of Changes While working on the forward kernel, I noticed that currently we don't have a way to create a `FMBScene` with data from Python side. This PR fills in this missing piece by defining a new constructor for `FMBScene` that takes in references to `std::vector`s and copy the values. Sadly, this copy is needed because `FMBScene` is designed to manage the memory for itself and we want to avoid double-free issue. In addition, since Python list is always on host (CPU) side, if we want the scene to live in GPU memory, then we have to move the data anyways. ### Aside: Using RAII Containers in `FMBScene` While writing the constructor & the unit test, I started to get a bit frustrated by having to write similar code twice to specialize for host/device memory management, so I replaced the underlying containers in `FMBScene` with `thrust::device/host_vector` :). Another nice side effect of this is that Thrust's device pointer & device reference types handle the moving of data for us automatically when we attempt to read it from host (see the definition of `FMBScene.__getitem__` in `bindints.cu` for example). ## Test Plans I added a unit test to verify that we can create `FMBScene` on both device & host and read the values stored in it. ```bash pixi run test ```
1 parent ccc0329 commit 8a69ad8

5 files changed

Lines changed: 99 additions & 83 deletions

File tree

genmetaballs/src/cuda/bindings.cu

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -253,10 +253,22 @@ template <MemoryLocation location>
253253
void bind_fmb_scene(nb::module_& m, const char* name) {
254254
nb::class_<FMBScene<location>>(m, name)
255255
.def(nb::init<size_t>(), nb::arg("size"))
256+
.def(nb::init<const std::vector<FMB>&, const std::vector<float>&>(), nb::arg("fmbs"),
257+
nb::arg("log_weights"),
258+
"Construct FMBScene from a list of FMBs and corresponding log weights")
256259
.def_prop_ro("size", &FMBScene<location>::size)
257260
.def("__len__", &FMBScene<location>::size)
258-
.def("__getitem__", &FMBScene<location>::get_fmb, nb::arg("idx"),
259-
"Get the (FMB, log_weight) tuple at index i")
261+
.def(
262+
"__getitem__",
263+
// Convert cuda::std::tuple to std::tuple for nanobind
264+
[](const FMBScene<location>& scene, size_t idx) {
265+
const auto& [fmb, log_weight] = scene[idx];
266+
// for device data, the types would be thrust::device_reference, which cannot be
267+
// returned directly to Python. The static cast forces a copy (to host) to be made.
268+
return std::make_tuple(static_cast<const FMB&>(fmb),
269+
static_cast<const float&>(log_weight));
270+
},
271+
"Get the (FMB, log_weight) tuple at index i")
260272
.def("__repr__", [=](const FMBScene<location>& scene) {
261273
return nb::str("{}(size={})").format(name, scene.size());
262274
});

genmetaballs/src/cuda/core/fmb.cu

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -15,25 +15,3 @@ CUDA_CALLABLE float FMB::quadratic_form(const Vec3D vec) const {
1515
const auto shifted_vec = vec - get_mean();
1616
return dot(shifted_vec, cov_inv_apply(shifted_vec));
1717
}
18-
19-
template <>
20-
__host__ FMBScene<MemoryLocation::HOST>::FMBScene(size_t size)
21-
: fmbs_{new FMB[size]}, log_weights_{new float[size]}, size_{size} {}
22-
23-
template <>
24-
__host__ FMBScene<MemoryLocation::DEVICE>::FMBScene(size_t size) : size_{size} {
25-
CUDA_CHECK(cudaMalloc(&fmbs_, size * sizeof(FMB)));
26-
CUDA_CHECK(cudaMalloc(&log_weights_, size * sizeof(float)));
27-
}
28-
29-
template <>
30-
__host__ FMBScene<MemoryLocation::HOST>::~FMBScene() {
31-
delete[] fmbs_;
32-
delete[] log_weights_;
33-
}
34-
35-
template <>
36-
__host__ FMBScene<MemoryLocation::DEVICE>::~FMBScene() {
37-
CUDA_CHECK(cudaFree(fmbs_));
38-
CUDA_CHECK(cudaFree(log_weights_));
39-
}

genmetaballs/src/cuda/core/fmb.cuh

Lines changed: 37 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,12 @@
22

33
#include <cuda/std/span>
44
#include <cuda/std/tuple>
5+
#include <cuda_runtime.h>
56
#include <stdexcept>
7+
#include <thrust/device_vector.h>
8+
#include <thrust/host_vector.h>
9+
#include <thrust/iterator/zip_iterator.h>
10+
#include <vector>
611

712
#include "geometry.cuh"
813
#include "utils.cuh"
@@ -46,74 +51,49 @@ public:
4651
template <MemoryLocation location>
4752
class FMBScene {
4853
private:
49-
FMB* fmbs_;
50-
float* log_weights_;
54+
// Host memory -> thrust::host_vector
55+
// Device memory -> thrust::device_vector
56+
template <typename T>
57+
using vector_t = std::conditional_t<location == MemoryLocation::HOST, thrust::host_vector<T>,
58+
thrust::device_vector<T>>;
59+
60+
vector_t<FMB> fmbs_;
61+
vector_t<float> log_weights_;
5162
size_t size_;
5263

5364
public:
54-
__host__ FMBScene(size_t size);
55-
56-
__host__ ~FMBScene();
57-
58-
CUDA_CALLABLE cuda::std::tuple<FMB&, float&> operator[](const uint32_t i) {
59-
return cuda::std::tie(fmbs_[i], log_weights_[i]);
65+
__host__ FMBScene(size_t size) : size_{size}, fmbs_(size), log_weights_(size) {};
66+
67+
// Copy constructor from std::vector
68+
// This enables easy construction from Python side
69+
__host__ FMBScene<location>(const std::vector<FMB>& fmbs, const std::vector<float>& log_weights)
70+
: size_{fmbs.size()}, fmbs_(fmbs.begin(), fmbs.end()),
71+
log_weights_(log_weights.begin(), log_weights.end()) {
72+
if (fmbs.size() != log_weights.size()) {
73+
throw std::invalid_argument(
74+
"FMBScene constructor: fmbs and log_weights must have the same size");
75+
}
6076
}
6177

62-
CUDA_CALLABLE cuda::std::tuple<const FMB&, const float&> operator[](const uint32_t i) const {
63-
return cuda::std::tie(fmbs_[i], log_weights_[i]);
78+
CUDA_CALLABLE auto operator[](const uint32_t i) {
79+
return cuda::std::make_tuple(fmbs_[i], log_weights_[i]);
6480
}
6581

66-
class Iterator {
67-
private:
68-
FMB* fmb_ptr_;
69-
float* log_weight_ptr_;
70-
71-
public:
72-
CUDA_CALLABLE Iterator(FMB* const fmb_ptr, float* const log_weight_ptr)
73-
: fmb_ptr_{fmb_ptr}, log_weight_ptr_{log_weight_ptr} {}
74-
CUDA_CALLABLE cuda::std::tuple<FMB&, float&> operator*() {
75-
return cuda::std::tie(*fmb_ptr_, *log_weight_ptr_);
76-
}
77-
CUDA_CALLABLE bool operator!=(const Iterator& other) const {
78-
return fmb_ptr_ != other.fmb_ptr_ || log_weight_ptr_ != other.log_weight_ptr_;
79-
}
80-
CUDA_CALLABLE Iterator& operator++() {
81-
fmb_ptr_++, log_weight_ptr_++;
82-
return *this;
83-
}
84-
};
85-
86-
class ConstIterator {
87-
private:
88-
const FMB* fmb_ptr_;
89-
const float* log_weight_ptr_;
90-
91-
public:
92-
CUDA_CALLABLE ConstIterator(const FMB* const fmb_ptr, const float* const log_weight_ptr)
93-
: fmb_ptr_{fmb_ptr}, log_weight_ptr_{log_weight_ptr} {}
94-
CUDA_CALLABLE cuda::std::tuple<const FMB&, const float&> operator*() const {
95-
return cuda::std::tie(*fmb_ptr_, *log_weight_ptr_);
96-
}
97-
CUDA_CALLABLE bool operator!=(const ConstIterator& other) const {
98-
return fmb_ptr_ != other.fmb_ptr_ || log_weight_ptr_ != other.log_weight_ptr_;
99-
}
100-
CUDA_CALLABLE ConstIterator& operator++() {
101-
fmb_ptr_++, log_weight_ptr_++;
102-
return *this;
103-
}
104-
};
82+
CUDA_CALLABLE auto operator[](const uint32_t i) const {
83+
return cuda::std::make_tuple(fmbs_[i], log_weights_[i]);
84+
}
10585

106-
CUDA_CALLABLE Iterator begin() {
107-
return Iterator(fmbs_, log_weights_);
86+
CUDA_CALLABLE auto begin() {
87+
return thrust::make_zip_iterator(fmbs_.begin(), log_weights_.begin());
10888
}
109-
CUDA_CALLABLE Iterator end() {
110-
return Iterator(fmbs_ + size_, log_weights_ + size_);
89+
CUDA_CALLABLE auto end() {
90+
return thrust::make_zip_iterator(fmbs_.end(), log_weights_.end());
11191
}
112-
CUDA_CALLABLE ConstIterator begin() const {
113-
return ConstIterator(fmbs_, log_weights_);
92+
CUDA_CALLABLE auto begin() const {
93+
return thrust::make_zip_iterator(fmbs_.begin(), log_weights_.begin());
11494
}
115-
CUDA_CALLABLE ConstIterator end() const {
116-
return ConstIterator(fmbs_ + size_, log_weights_ + size_);
95+
CUDA_CALLABLE auto end() const {
96+
return thrust::make_zip_iterator(fmbs_.end(), log_weights_.end());
11797
}
11898
CUDA_CALLABLE const FMB& get_fmb(uint32_t idx) const {
11999
return fmbs_[idx];

genmetaballs/src/genmetaballs/core/__init__.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
TwoParameterConfidence,
1111
ZeroParameterConfidence,
1212
)
13-
from genmetaballs._genmetaballs_bindings.fmb import CPUFMBScene, GPUFMBScene
13+
from genmetaballs._genmetaballs_bindings.fmb import FMB, CPUFMBScene, GPUFMBScene
1414
from genmetaballs._genmetaballs_bindings.image import CPUImage, GPUImage
1515
from genmetaballs._genmetaballs_bindings.utils import CPUFloatArray2D, GPUFloatArray2D, sigmoid
1616

@@ -63,6 +63,19 @@ def make_fmb_scene(size: int, device: DeviceType) -> CPUFMBScene | GPUFMBScene:
6363
raise ValueError(f"Unsupported device type: {device}")
6464

6565

66+
# TODO: create a wrapper class for FMBScene and turn the factory functions into
67+
# class methods
68+
def make_fmb_scene_from_values(
69+
fmbs: list[fmb.FMB], log_weights: list[float], device: DeviceType
70+
) -> CPUFMBScene | GPUFMBScene:
71+
if device == "cpu":
72+
return CPUFMBScene(fmbs, log_weights)
73+
elif device == "gpu":
74+
return GPUFMBScene(fmbs, log_weights)
75+
else:
76+
raise ValueError(f"Unsupported device type: {device}")
77+
78+
6679
__all__ = [
6780
"array2d_float",
6881
"ZeroParameterConfidence",
@@ -74,7 +87,10 @@ def make_fmb_scene(size: int, device: DeviceType) -> CPUFMBScene | GPUFMBScene:
7487
"intersector",
7588
"sigmoid",
7689
"FourParameterBlender",
90+
"FMB",
91+
"Intrinsics",
7792
"ThreeParameterBlender",
7893
"make_image",
7994
"make_fmb_scene",
95+
"make_fmb_scene_from_values",
8096
]

tests/python_tests/test_fmb.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from scipy.spatial.distance import mahalanobis
44
from scipy.spatial.transform import Rotation as Rot
55

6-
from genmetaballs.core import fmb, geometry, make_fmb_scene
6+
from genmetaballs.core import fmb, geometry, make_fmb_scene, make_fmb_scene_from_values
77

88
FMB = fmb.FMB
99
Pose, Vec3D, Rotation = geometry.Pose, geometry.Vec3D, geometry.Rotation
@@ -48,3 +48,33 @@ def test_fmb_scene_creation():
4848
gpu_scene = make_fmb_scene(20, device="gpu")
4949
assert isinstance(gpu_scene, fmb.GPUFMBScene)
5050
assert len(gpu_scene) == 20
51+
52+
53+
@pytest.mark.parametrize("device", ["cpu", "gpu"])
54+
def test_fmb_scene_creation_from_lists(rng, device):
55+
fmbs = []
56+
log_weights = []
57+
gt_translations = []
58+
gt_extents = []
59+
num_balls = 15
60+
for _ in range(num_balls):
61+
quat = rng.uniform(size=4).astype(np.float32)
62+
tran, extent = rng.uniform(size=(2, 3)).astype(np.float32)
63+
pose = Pose.from_components(Rotation.from_quat(*quat), Vec3D(*tran))
64+
fmbs.append(FMB(pose, *extent))
65+
log_weights.append(rng.uniform())
66+
gt_translations.append(tran)
67+
gt_extents.append(extent)
68+
69+
scene = make_fmb_scene_from_values(fmbs, log_weights, device=device)
70+
71+
assert len(scene) == num_balls
72+
# Verify that we can retrieve each FMB and log weight correctly
73+
for i in range(num_balls):
74+
fmb_i, log_weight = scene[i]
75+
translation = fmb_i.pose.tran
76+
assert np.allclose([translation.x, translation.y, translation.z], gt_translations[i])
77+
78+
fmb_extent = fmb_i.extent
79+
assert np.allclose(fmb_extent, gt_extents[i])
80+
assert np.isclose(log_weight, log_weights[i])

0 commit comments

Comments
 (0)