Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions vortex-array/public-api.lock
Original file line number Diff line number Diff line change
Expand Up @@ -21546,6 +21546,8 @@ pub fn vortex_array::Array<vortex_array::arrays::Extension>::new(ext_dtype: vort

pub fn vortex_array::Array<vortex_array::arrays::Extension>::try_new(ext_dtype: vortex_array::dtype::extension::ExtDTypeRef, storage_array: vortex_array::ArrayRef) -> vortex_error::VortexResult<Self>

pub fn vortex_array::Array<vortex_array::arrays::Extension>::try_new_from_vtable<V: vortex_array::dtype::extension::ExtVTable>(vtable: V, metadata: <V as vortex_array::dtype::extension::ExtVTable>::Metadata, storage_array: vortex_array::ArrayRef) -> vortex_error::VortexResult<Self>

impl vortex_array::Array<vortex_array::arrays::Filter>

pub fn vortex_array::Array<vortex_array::arrays::Filter>::new(array: vortex_array::ArrayRef, mask: vortex_mask::Mask) -> Self
Expand Down
15 changes: 15 additions & 0 deletions vortex-array/src/arrays/extension/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ use crate::array::ArrayParts;
use crate::array::TypedArrayRef;
use crate::arrays::Extension;
use crate::dtype::DType;
use crate::dtype::extension::ExtDType;
use crate::dtype::extension::ExtDTypeRef;
use crate::dtype::extension::ExtVTable;

/// The backing storage array for this extension array.
pub(super) const STORAGE_SLOT: usize = 0;
Expand Down Expand Up @@ -163,4 +165,17 @@ impl Array<Extension> {
)
})
}

/// Creates a new [`ExtensionArray`](crate::arrays::ExtensionArray) from a vtable, metadata, and
/// a storage array.
pub fn try_new_from_vtable<V: ExtVTable>(
vtable: V,
metadata: V::Metadata,
storage_array: ArrayRef,
) -> VortexResult<Self> {
let ext_dtype =
ExtDType::<V>::try_with_vtable(vtable, metadata, storage_array.dtype().clone())?
.erased();
Self::try_new(ext_dtype, storage_array)
}
}
20 changes: 11 additions & 9 deletions vortex-array/src/dtype/extension/vtable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ use crate::scalar::ScalarValue;

/// The public API for defining new extension types.
///
/// This is the non-object-safe trait that plugin authors implement to define a new extension
/// type. It specifies the type's identity, metadata, serialization, and validation.
/// This is the non-object-safe trait that plugin authors implement to define a new extension type.
/// It specifies the type's identity, metadata, serialization, and validation.
pub trait ExtVTable: 'static + Sized + Send + Sync + Clone + Debug + Eq + Hash {
/// Associated type containing the deserialized metadata for this extension type.
type Metadata: 'static + Send + Sync + Clone + Debug + Display + Eq + Hash;
Expand All @@ -39,26 +39,27 @@ pub trait ExtVTable: 'static + Sized + Send + Sync + Clone + Debug + Eq + Hash {
/// Validate that the given storage type is compatible with this extension type.
fn validate_dtype(ext_dtype: &ExtDType<Self>) -> VortexResult<()>;

/// Can a value of `other` be implicitly widened into this type?
/// e.g. GeographyType might accept Point, LineString, etc.
/// Can a value of `other` be implicitly widened into this type? (e.g. GeographyType might
/// accept Point, LineString, etc.)
///
/// Implementors only need to override one of `can_coerce_from` or `can_coerce_to` both
/// exist so that either side of the coercion can provide the logic.
/// Implementors only need to override one of `can_coerce_from` or `can_coerce_to`. We have both
/// so that either side of the coercion can provide the logic.
fn can_coerce_from(ext_dtype: &ExtDType<Self>, other: &DType) -> bool {
let _ = (ext_dtype, other);
false
}

/// Can this type be implicitly widened into `other`?
///
/// Implementors only need to override one of `can_coerce_from` or `can_coerce_to` both
/// exist so that either side of the coercion can provide the logic.
/// Implementors only need to override one of `can_coerce_from` or `can_coerce_to`. We have both
/// so that either side of the coercion can provide the logic.
fn can_coerce_to(ext_dtype: &ExtDType<Self>, other: &DType) -> bool {
let _ = (ext_dtype, other);
false
}

/// Given two types in a Uniform context, what is their least supertype?
///
/// Return None if no supertype exists.
fn least_supertype(ext_dtype: &ExtDType<Self>, other: &DType) -> Option<DType> {
let _ = (ext_dtype, other);
Expand All @@ -69,7 +70,8 @@ pub trait ExtVTable: 'static + Sized + Send + Sync + Clone + Debug + Eq + Hash {

/// Validate the given storage value is compatible with the extension type.
///
/// By default, this calls [`unpack_native()`](ExtVTable::unpack_native) and discards the result.
/// By default, this calls [`unpack_native()`](ExtVTable::unpack_native) and discards the
/// result.
///
/// # Errors
///
Expand Down
14 changes: 5 additions & 9 deletions vortex-tensor/public-api.lock
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ pub const vortex_tensor::encodings::turboquant::MIN_DIMENSION: u32

pub fn vortex_tensor::encodings::turboquant::tq_validate_vector_dtype(dtype: &vortex_array::dtype::DType) -> vortex_error::VortexResult<vortex_tensor::vector::VectorMatcherMetadata>

pub fn vortex_tensor::encodings::turboquant::turboquant_encode(ext: vortex_array::array::view::ArrayView<'_, vortex_array::arrays::extension::vtable::Extension>, config: &vortex_tensor::encodings::turboquant::TurboQuantConfig, ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::array::erased::ArrayRef>
pub fn vortex_tensor::encodings::turboquant::turboquant_encode(input: vortex_array::array::erased::ArrayRef, config: &vortex_tensor::encodings::turboquant::TurboQuantConfig, ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::array::erased::ArrayRef>

pub unsafe fn vortex_tensor::encodings::turboquant::turboquant_encode_unchecked(ext: vortex_array::array::view::ArrayView<'_, vortex_array::arrays::extension::vtable::Extension>, config: &vortex_tensor::encodings::turboquant::TurboQuantConfig, ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::array::erased::ArrayRef>

Expand Down Expand Up @@ -142,7 +142,7 @@ impl vortex_tensor::fixed_shape::FixedShapeTensorMatcherMetadata<'_>

pub fn vortex_tensor::fixed_shape::FixedShapeTensorMatcherMetadata<'_>::element_ptype(&self) -> vortex_array::dtype::ptype::PType

pub fn vortex_tensor::fixed_shape::FixedShapeTensorMatcherMetadata<'_>::list_size(&self) -> usize
pub fn vortex_tensor::fixed_shape::FixedShapeTensorMatcherMetadata<'_>::flat_list_size(&self) -> u32

pub fn vortex_tensor::fixed_shape::FixedShapeTensorMatcherMetadata<'_>::metadata(&self) -> &vortex_tensor::fixed_shape::FixedShapeTensorMetadata

Expand Down Expand Up @@ -222,7 +222,7 @@ impl vortex_tensor::matcher::TensorMatch<'_>

pub fn vortex_tensor::matcher::TensorMatch<'_>::element_ptype(self) -> vortex_array::dtype::ptype::PType

pub fn vortex_tensor::matcher::TensorMatch<'_>::list_size(self) -> usize
pub fn vortex_tensor::matcher::TensorMatch<'_>::list_size(self) -> u32

impl<'a> core::clone::Clone for vortex_tensor::matcher::TensorMatch<'a>

Expand Down Expand Up @@ -382,7 +382,7 @@ pub fn vortex_tensor::scalar_fns::l2_denorm::L2Denorm::validity(&self, _options:

pub fn vortex_tensor::scalar_fns::l2_denorm::normalize_as_l2_denorm(input: vortex_array::array::erased::ArrayRef, ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::arrays::scalar_fn::vtable::ScalarFnArray>

pub fn vortex_tensor::scalar_fns::l2_denorm::validate_l2_normalized_rows(input: &vortex_array::array::erased::ArrayRef, ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<()>
pub fn vortex_tensor::scalar_fns::l2_denorm::validate_l2_normalized_rows_against_norms(normalized: &vortex_array::array::erased::ArrayRef, norms: core::option::Option<&vortex_array::array::erased::ArrayRef>, ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<()>

pub mod vortex_tensor::scalar_fns::l2_norm

Expand Down Expand Up @@ -502,7 +502,7 @@ pub fn vortex_tensor::scalar_fns::sorf_transform::SorfTransform::child_name(&sel

pub fn vortex_tensor::scalar_fns::sorf_transform::SorfTransform::execute(&self, options: &Self::Options, args: &dyn vortex_array::scalar_fn::vtable::ExecutionArgs, ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::array::erased::ArrayRef>

pub fn vortex_tensor::scalar_fns::sorf_transform::SorfTransform::fmt_sql(&self, _options: &Self::Options, expr: &vortex_array::expr::expression::Expression, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result
pub fn vortex_tensor::scalar_fns::sorf_transform::SorfTransform::fmt_sql(&self, options: &Self::Options, expr: &vortex_array::expr::expression::Expression, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result

pub fn vortex_tensor::scalar_fns::sorf_transform::SorfTransform::id(&self) -> vortex_array::scalar_fn::ScalarFnId

Expand Down Expand Up @@ -600,12 +600,8 @@ impl core::marker::StructuralPartialEq for vortex_tensor::vector::VectorMatcherM

pub mod vortex_tensor::vector_search

pub fn vortex_tensor::vector_search::build_constant_query_vector<T: vortex_array::dtype::ptype::NativePType + core::convert::Into<vortex_array::scalar::typed_view::primitive::pvalue::PValue>>(query: &[T], num_rows: usize) -> vortex_error::VortexResult<vortex_array::array::erased::ArrayRef>

pub fn vortex_tensor::vector_search::build_similarity_search_tree<T: vortex_array::dtype::ptype::NativePType + core::convert::Into<vortex_array::scalar::typed_view::primitive::pvalue::PValue>>(data: vortex_array::array::erased::ArrayRef, query: &[T], threshold: T) -> vortex_error::VortexResult<vortex_array::array::erased::ArrayRef>

pub fn vortex_tensor::vector_search::compress_turboquant(data: vortex_array::array::erased::ArrayRef, ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::array::erased::ArrayRef>

pub const vortex_tensor::SCALAR_FN_ARRAY_TENSOR_PLUGIN_ENV: &str

pub fn vortex_tensor::initialize(session: &vortex_session::VortexSession)
3 changes: 1 addition & 2 deletions vortex-tensor/src/encodings/l2_denorm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,8 @@ use crate::scalar_fns::l2_denorm::normalize_as_l2_denorm;
pub struct L2DenormScheme;

impl Scheme for L2DenormScheme {
// TODO(connor): FIX THIS!!!
fn scheme_name(&self) -> &'static str {
"vortex.tensor.UNSTABLE.l2_denorm"
"vortex.tensor.l2_denorm"
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I forgot that this is a completely separate label than the array IDs, and this is confined to just ID equality checking in the compressor

}

fn matches(&self, canonical: &Canonical) -> bool {
Expand Down
10 changes: 5 additions & 5 deletions vortex-tensor/src/encodings/turboquant/centroids.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

use std::sync::LazyLock;

use vortex_buffer::Buffer;
use vortex_error::VortexResult;
use vortex_error::vortex_ensure;
use vortex_utils::aliases::dash_map::DashMap;
Expand All @@ -27,16 +28,15 @@ const CONVERGENCE_EPSILON: f64 = 1e-12;
/// Number of numerical integration points for computing conditional expectations.
const INTEGRATION_POINTS: usize = 1000;

// TODO(connor): Maybe we should just store an `ArrayRef` here?
/// Global centroid cache keyed by (dimension, bit_width).
static CENTROID_CACHE: LazyLock<DashMap<(u32, u8), Vec<f32>>> = LazyLock::new(DashMap::default);
static CENTROID_CACHE: LazyLock<DashMap<(u32, u8), Buffer<f32>>> = LazyLock::new(DashMap::default);

/// Get or compute cached centroids for the given dimension and bit width.
///
/// Returns `2^bit_width` centroids sorted in ascending order, representing optimal scalar
/// quantization levels for the coordinate distribution after random rotation in
/// `dimension`-dimensional space.
pub fn get_centroids(dimension: u32, bit_width: u8) -> VortexResult<Vec<f32>> {
pub fn get_centroids(dimension: u32, bit_width: u8) -> VortexResult<Buffer<f32>> {
vortex_ensure!(
(1..=MAX_BIT_WIDTH).contains(&bit_width),
"TurboQuant bit_width must be 1-{}, got {bit_width}",
Expand Down Expand Up @@ -92,7 +92,7 @@ impl HalfIntExponent {
/// The probability distribution function is:
/// `f(x) = C_d * (1 - x^2)^((d-3)/2)` on `[-1, 1]`
/// where `C_d` is the normalizing constant.
fn max_lloyd_centroids(dimension: u32, bit_width: u8) -> Vec<f32> {
fn max_lloyd_centroids(dimension: u32, bit_width: u8) -> Buffer<f32> {
debug_assert!((1..=MAX_BIT_WIDTH).contains(&bit_width));
let num_centroids = 1usize << bit_width;

Expand Down Expand Up @@ -288,7 +288,7 @@ mod tests {
#[case(128, 4)]
fn centroids_within_bounds(#[case] dim: u32, #[case] bits: u8) -> VortexResult<()> {
let centroids = get_centroids(dim, bits)?;
for &val in &centroids {
for &val in centroids.iter() {
assert!(
(-1.0..=1.0).contains(&val),
"centroid out of [-1, 1]: {val}",
Expand Down
Loading
Loading