Skip to content

Commit 0e296ad

Browse files
committed
Extract Vector::wrap_storage and use it at four call sites
The two-line `ExtDType::<Vector>::try_new(EmptyMetadata, storage.dtype().clone()) + ExtensionArray::new(ext_dtype, storage).into_array()` incantation for wrapping a storage array in a [`Vector`] extension appeared verbatim in: - `compress::wrap_padded_as_vector` (private, used twice) - `sorf_transform::vtable` (empty-array branch + `inverse_rotate_typed`) - `vector_search::build_constant_query_vector` Promote it to `Vector::wrap_storage(storage)`, an associated function on the [`Vector`] vtable struct that is the natural home for the operation. Each call site drops to a single line and the `ExtDType`/`EmptyMetadata` imports go away where they were only pulled in for this pattern. The old private helper is deleted. Signed-off-by: Claude <noreply@anthropic.com>
1 parent dd5b665 commit 0e296ad

5 files changed

Lines changed: 35 additions & 25 deletions

File tree

vortex-tensor/public-api.lock

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -528,6 +528,10 @@ pub fn vortex_tensor::vector::AnyVector::try_match<'a>(ext_dtype: &'a vortex_arr
528528

529529
pub struct vortex_tensor::vector::Vector
530530

531+
impl vortex_tensor::vector::Vector
532+
533+
pub fn vortex_tensor::vector::Vector::wrap_storage(storage: vortex_array::array::erased::ArrayRef) -> vortex_error::VortexResult<vortex_array::array::erased::ArrayRef>
534+
531535
impl core::clone::Clone for vortex_tensor::vector::Vector
532536

533537
pub fn vortex_tensor::vector::Vector::clone(&self) -> vortex_tensor::vector::Vector

vortex-tensor/src/encodings/turboquant/compress.rs

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,13 @@ use vortex_array::ArrayView;
1515
use vortex_array::ExecutionCtx;
1616
use vortex_array::IntoArray;
1717
use vortex_array::arrays::Extension;
18-
use vortex_array::arrays::ExtensionArray;
1918
use vortex_array::arrays::FixedSizeListArray;
2019
use vortex_array::arrays::PrimitiveArray;
2120
use vortex_array::arrays::dict::DictArray;
2221
use vortex_array::arrays::extension::ExtensionArrayExt;
2322
use vortex_array::arrays::fixed_size_list::FixedSizeListArrayExt;
2423
use vortex_array::arrays::scalar_fn::ScalarFnArrayExt;
2524
use vortex_array::dtype::Nullability;
26-
use vortex_array::dtype::extension::ExtDType;
27-
use vortex_array::extension::EmptyMetadata;
2825
use vortex_array::validity::Validity;
2926
use vortex_buffer::Buffer;
3027
use vortex_buffer::BufferMut;
@@ -242,7 +239,7 @@ pub unsafe fn turboquant_encode_unchecked(
242239
Validity::NonNullable,
243240
0,
244241
)?;
245-
let empty_padded_vector = wrap_padded_as_vector(empty_fsl.into_array())?;
242+
let empty_padded_vector = Vector::wrap_storage(empty_fsl.into_array())?;
246243

247244
let sorf_options = SorfOptions {
248245
seed,
@@ -258,7 +255,7 @@ pub unsafe fn turboquant_encode_unchecked(
258255
let core = turboquant_quantize_core(&fsl, seed, config.bit_width, config.num_rounds, ctx)?;
259256
let quantized_fsl =
260257
build_quantized_fsl(num_rows, core.all_indices, &core.centroids, core.padded_dim)?;
261-
let padded_vector = wrap_padded_as_vector(quantized_fsl)?;
258+
let padded_vector = Vector::wrap_storage(quantized_fsl)?;
262259

263260
let sorf_options = SorfOptions {
264261
seed,
@@ -269,13 +266,6 @@ pub unsafe fn turboquant_encode_unchecked(
269266
Ok(SorfTransform::try_new_array(&sorf_options, padded_vector, num_rows)?.into_array())
270267
}
271268

272-
/// Wrap an `FSL<f32, padded_dim>` in a [`Vector`](crate::vector::Vector) extension so it can be
273-
/// passed as the child of [`SorfTransform`], which expects a `Vector<padded_dim>` input.
274-
fn wrap_padded_as_vector(fsl: ArrayRef) -> VortexResult<ArrayRef> {
275-
let ext_dtype = ExtDType::<Vector>::try_new(EmptyMetadata, fsl.dtype().clone())?.erased();
276-
Ok(ExtensionArray::new(ext_dtype, fsl).into_array())
277-
}
278-
279269
/// Apply the full TurboQuant compression pipeline to a [`Vector`](crate::vector::Vector)
280270
/// extension array: normalize the rows via [`normalize_as_l2_denorm`], quantize the normalized
281271
/// child via [`turboquant_encode_unchecked`], and reattach the stored norms as the outer

vortex-tensor/src/scalar_fns/sorf_transform/vtable.rs

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -143,9 +143,7 @@ impl ScalarFnVTable for SorfTransform {
143143
validity,
144144
0,
145145
)?;
146-
let ext_dtype =
147-
ExtDType::<Vector>::try_new(EmptyMetadata, fsl.dtype().clone())?.erased();
148-
Ok(ExtensionArray::new(ext_dtype, fsl.into_array()).into_array())
146+
Vector::wrap_storage(fsl.into_array())
149147
});
150148
}
151149

@@ -330,7 +328,5 @@ fn inverse_rotate_typed<T: NativePType + Float + FromPrimitive>(
330328

331329
let elements = PrimitiveArray::new::<T>(output.freeze(), Validity::NonNullable);
332330
let fsl = FixedSizeListArray::try_new(elements.into_array(), dim_u32, validity, num_rows)?;
333-
334-
let ext_dtype = ExtDType::<Vector>::try_new(EmptyMetadata, fsl.dtype().clone())?.erased();
335-
Ok(ExtensionArray::new(ext_dtype, fsl.into_array()).into_array())
331+
Vector::wrap_storage(fsl.into_array())
336332
}

vortex-tensor/src/vector/mod.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,36 @@
33

44
//! Vector extension type for fixed-length float vectors (e.g., embeddings).
55
6+
use vortex_array::ArrayRef;
7+
use vortex_array::IntoArray;
8+
use vortex_array::arrays::ExtensionArray;
9+
use vortex_array::dtype::extension::ExtDType;
10+
use vortex_array::extension::EmptyMetadata;
11+
use vortex_error::VortexResult;
12+
613
/// The Vector extension type.
714
#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)]
815
pub struct Vector;
916

17+
impl Vector {
18+
/// Wrap a `FixedSizeList`-valued `storage` array in a [`Vector`] extension array.
19+
///
20+
/// The storage's dtype is reused verbatim for the extension's storage dtype, so the caller
21+
/// is responsible for having already constructed an FSL with the float element ptype and
22+
/// non-nullable elements that [`Vector::validate_dtype`](ExtVTable::validate_dtype) requires.
23+
///
24+
/// [`ExtVTable::validate_dtype`]: vortex_array::dtype::extension::ExtVTable::validate_dtype
25+
///
26+
/// # Errors
27+
///
28+
/// Returns an error if `storage` does not satisfy [`Vector`]'s storage-dtype contract (e.g.
29+
/// it is not a `FixedSizeList` of non-nullable floats).
30+
pub fn wrap_storage(storage: ArrayRef) -> VortexResult<ArrayRef> {
31+
let ext_dtype = ExtDType::<Self>::try_new(EmptyMetadata, storage.dtype().clone())?.erased();
32+
Ok(ExtensionArray::new(ext_dtype, storage).into_array())
33+
}
34+
}
35+
1036
mod matcher;
1137

1238
pub use matcher::AnyVector;

vortex-tensor/src/vector_search.rs

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -46,13 +46,10 @@ use vortex_array::ArrayRef;
4646
use vortex_array::ExecutionCtx;
4747
use vortex_array::IntoArray;
4848
use vortex_array::arrays::ConstantArray;
49-
use vortex_array::arrays::ExtensionArray;
5049
use vortex_array::builtins::ArrayBuiltins;
5150
use vortex_array::dtype::DType;
5251
use vortex_array::dtype::NativePType;
5352
use vortex_array::dtype::Nullability;
54-
use vortex_array::dtype::extension::ExtDType;
55-
use vortex_array::extension::EmptyMetadata;
5653
use vortex_array::scalar::PValue;
5754
use vortex_array::scalar::Scalar;
5855
use vortex_array::scalar_fn::fns::operators::Operator;
@@ -106,11 +103,8 @@ pub fn build_constant_query_vector<T: NativePType + Into<PValue>>(
106103
.map(|&v| Scalar::primitive(v, Nullability::NonNullable))
107104
.collect();
108105
let storage_scalar = Scalar::fixed_size_list(element_dtype, children, Nullability::NonNullable);
109-
110106
let storage = ConstantArray::new(storage_scalar, num_rows).into_array();
111-
112-
let ext_dtype = ExtDType::<Vector>::try_new(EmptyMetadata, storage.dtype().clone())?.erased();
113-
Ok(ExtensionArray::new(ext_dtype, storage).into_array())
107+
Vector::wrap_storage(storage)
114108
}
115109

116110
/// Build the lazy similarity-search expression tree for a prepared database array and a

0 commit comments

Comments
 (0)