Skip to content

Commit 8b587bb

Browse files
perf: intern identifiers (array, layout, etc). (#7412)
We use id for scalar_fns, array and layouts. This we used to match rules among other things. This PR replace Arc<str> ID with interned str (u32) id. --------- Signed-off-by: Nicholas Gates <nick@nickgates.com> Signed-off-by: Joe Isaacs <joe.isaacs@live.co.uk>
1 parent 59c92ab commit 8b587bb

173 files changed

Lines changed: 625 additions & 611 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

Cargo.lock

Lines changed: 26 additions & 15 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@ itertools = "0.14.0"
163163
jetscii = "0.5.3"
164164
jiff = "0.2.0"
165165
kanal = "0.1.1"
166+
lasso = { version = "0.7", features = ["multi-threaded"] }
166167
lending-iterator = "0.1.7"
167168
libfuzzer-sys = "0.4"
168169
libloading = "0.8"

encodings/alp/public-api.lock

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,6 @@ pub struct vortex_alp::ALP
66

77
impl vortex_alp::ALP
88

9-
pub const vortex_alp::ALP::ID: vortex_array::array::ArrayId
10-
11-
impl vortex_alp::ALP
12-
139
pub fn vortex_alp::ALP::new(encoded: vortex_array::array::erased::ArrayRef, exponents: vortex_alp::Exponents, patches: core::option::Option<vortex_array::patches::Patches>) -> vortex_alp::ALPArray
1410

1511
pub unsafe fn vortex_alp::ALP::new_unchecked(encoded: vortex_array::array::erased::ArrayRef, exponents: vortex_alp::Exponents, patches: core::option::Option<vortex_array::patches::Patches>) -> vortex_alp::ALPArray
@@ -148,8 +144,6 @@ pub struct vortex_alp::ALPRD
148144

149145
impl vortex_alp::ALPRD
150146

151-
pub const vortex_alp::ALPRD::ID: vortex_array::array::ArrayId
152-
153147
pub unsafe fn vortex_alp::ALPRD::new_unchecked(dtype: vortex_array::dtype::DType, left_parts: vortex_array::array::erased::ArrayRef, left_parts_dictionary: vortex_buffer::buffer::Buffer<u16>, right_parts: vortex_array::array::erased::ArrayRef, right_bit_width: u8, left_parts_patches: core::option::Option<vortex_array::patches::Patches>) -> vortex_alp::ALPRDArray
154148

155149
pub fn vortex_alp::ALPRD::try_new(dtype: vortex_array::dtype::DType, left_parts: vortex_array::array::erased::ArrayRef, left_parts_dictionary: vortex_buffer::buffer::Buffer<u16>, right_parts: vortex_array::array::erased::ArrayRef, right_bit_width: u8, left_parts_patches: core::option::Option<vortex_array::patches::Patches>) -> vortex_error::VortexResult<vortex_alp::ALPRDArray>

encodings/alp/src/alp/array.rs

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ use vortex_error::vortex_bail;
3939
use vortex_error::vortex_ensure;
4040
use vortex_error::vortex_panic;
4141
use vortex_session::VortexSession;
42+
use vortex_session::registry::CachedId;
4243

4344
use crate::ALPFloat;
4445
use crate::alp::Exponents;
@@ -72,7 +73,8 @@ impl VTable for ALP {
7273
type ValidityVTable = ValidityVTableFromChild;
7374

7475
fn id(&self) -> ArrayId {
75-
Self::ID
76+
static ID: CachedId = CachedId::new("vortex.alp");
77+
*ID
7678
}
7779

7880
fn validate(
@@ -237,10 +239,6 @@ impl Display for ALPData {
237239
#[derive(Clone, Debug)]
238240
pub struct ALP;
239241

240-
impl ALP {
241-
pub const ID: ArrayId = ArrayId::new_ref("vortex.alp");
242-
}
243-
244242
#[derive(Clone, prost::Message)]
245243
pub struct ALPMetadata {
246244
#[prost(uint32, tag = "1")]

encodings/alp/src/alp/plugin.rs

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,11 @@
66
//!
77
//! This enables zero-cost backward compatibility with previously written datasets.
88
9+
use vortex_array::Array;
910
use vortex_array::ArrayId;
1011
use vortex_array::ArrayPlugin;
1112
use vortex_array::ArrayRef;
13+
use vortex_array::ArrayVTable;
1214
use vortex_array::IntoArray;
1315
use vortex_array::VortexSessionExecute;
1416
use vortex_array::arrays::Patched;
@@ -32,7 +34,8 @@ impl ArrayPlugin for ALPPatchedPlugin {
3234
fn id(&self) -> ArrayId {
3335
// We reuse the existing `ALP` ID so that we can take over its
3436
// deserialization pathway.
35-
ALP::ID
37+
// TODO(joe): dedup method name
38+
ArrayVTable::id(&ALP)
3639
}
3740

3841
fn serialize(
@@ -53,10 +56,10 @@ impl ArrayPlugin for ALPPatchedPlugin {
5356
children: &dyn ArrayChildren,
5457
session: &VortexSession,
5558
) -> VortexResult<ArrayRef> {
56-
let alp_array = ALP
57-
.deserialize(dtype, len, metadata, buffers, children, session)?
58-
.try_downcast::<ALP>()
59-
.map_err(|_| vortex_err!("ALP plugin should only deserialize vortex.alp"))?;
59+
let alp_array = Array::<ALP>::try_from_parts(ArrayVTable::deserialize(
60+
&ALP, dtype, len, metadata, buffers, children, session,
61+
)?)
62+
.map_err(|_| vortex_err!("ALP plugin should only deserialize vortex.alp"))?;
6063

6164
// Check if there are interior patches to externalize.
6265
let Some(patches) = alp_array.patches() else {
@@ -78,7 +81,8 @@ impl ArrayPlugin for ALPPatchedPlugin {
7881
}
7982

8083
fn is_supported_encoding(&self, id: &ArrayId) -> bool {
81-
id == &Patched.id() || id == &ALP.id()
84+
// TODO(joe): dedup method name
85+
id == ArrayVTable::id(&Patched) || id == ArrayVTable::id(&ALP)
8286
}
8387
}
8488

encodings/alp/src/alp_rd/array.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ use vortex_error::vortex_ensure;
4444
use vortex_error::vortex_err;
4545
use vortex_error::vortex_panic;
4646
use vortex_session::VortexSession;
47+
use vortex_session::registry::CachedId;
4748

4849
use crate::alp_rd::kernel::PARENT_KERNELS;
4950
use crate::alp_rd::rules::RULES;
@@ -92,7 +93,8 @@ impl VTable for ALPRD {
9293
type ValidityVTable = ValidityVTableFromChild;
9394

9495
fn id(&self) -> ArrayId {
95-
Self::ID
96+
static ID: CachedId = CachedId::new("vortex.alprd");
97+
*ID
9698
}
9799

98100
fn validate(
@@ -357,8 +359,6 @@ pub struct ALPRDDataParts {
357359
pub struct ALPRD;
358360

359361
impl ALPRD {
360-
pub const ID: ArrayId = ArrayId::new_ref("vortex.alprd");
361-
362362
pub fn try_new(
363363
dtype: DType,
364364
left_parts: ArrayRef,

encodings/alp/src/lib.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
1919
pub use alp::*;
2020
pub use alp_rd::*;
21+
use vortex_array::ArrayVTable;
2122
use vortex_array::aggregate_fn::AggregateFnVTable;
2223
use vortex_array::aggregate_fn::fns::nan_count::NanCount;
2324
use vortex_array::aggregate_fn::session::AggregateFnSessionExt;
@@ -41,7 +42,7 @@ pub fn initialize(session: &VortexSession) {
4142

4243
// Register the ALP-specific NaN count aggregate kernel.
4344
session.aggregate_fns().register_aggregate_kernel(
44-
ALP::ID,
45+
ALP.id(),
4546
Some(NanCount.id()),
4647
&compute::nan_count::ALPNanCountKernel,
4748
);

encodings/bytebool/public-api.lock

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@ pub struct vortex_bytebool::ByteBool
44

55
impl vortex_bytebool::ByteBool
66

7-
pub const vortex_bytebool::ByteBool::ID: vortex_array::array::ArrayId
8-
97
pub fn vortex_bytebool::ByteBool::from_option_vec(data: alloc::vec::Vec<core::option::Option<bool>>) -> vortex_bytebool::ByteBoolArray
108

119
pub fn vortex_bytebool::ByteBool::from_vec<V: core::convert::Into<vortex_array::validity::Validity>>(data: alloc::vec::Vec<bool>, validity: V) -> vortex_bytebool::ByteBoolArray

encodings/bytebool/src/array.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ use vortex_error::vortex_ensure;
3737
use vortex_error::vortex_panic;
3838
use vortex_mask::Mask;
3939
use vortex_session::VortexSession;
40+
use vortex_session::registry::CachedId;
4041

4142
use crate::kernel::PARENT_KERNELS;
4243

@@ -62,7 +63,8 @@ impl VTable for ByteBool {
6263
type ValidityVTable = Self;
6364

6465
fn id(&self) -> ArrayId {
65-
Self::ID
66+
static ID: CachedId = CachedId::new("vortex.bytebool");
67+
*ID
6668
}
6769

6870
fn validate(
@@ -200,8 +202,6 @@ impl<T: TypedArrayRef<ByteBool>> ByteBoolArrayExt for T {}
200202
pub struct ByteBool;
201203

202204
impl ByteBool {
203-
pub const ID: ArrayId = ArrayId::new_ref("vortex.bytebool");
204-
205205
pub fn new(buffer: BufferHandle, validity: Validity) -> ByteBoolArray {
206206
let dtype = DType::Bool(validity.nullability());
207207
let slots = ByteBoolData::make_slots(&validity, buffer.len());

encodings/datetime-parts/public-api.lock

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@ pub struct vortex_datetime_parts::DateTimeParts
44

55
impl vortex_datetime_parts::DateTimeParts
66

7-
pub const vortex_datetime_parts::DateTimeParts::ID: vortex_array::array::ArrayId
8-
97
pub fn vortex_datetime_parts::DateTimeParts::try_from_temporal(temporal: vortex_array::arrays::datetime::TemporalArray) -> vortex_error::VortexResult<vortex_datetime_parts::DateTimePartsArray>
108

119
pub fn vortex_datetime_parts::DateTimeParts::try_new(dtype: vortex_array::dtype::DType, days: vortex_array::array::erased::ArrayRef, seconds: vortex_array::array::erased::ArrayRef, subseconds: vortex_array::array::erased::ArrayRef) -> vortex_error::VortexResult<vortex_datetime_parts::DateTimePartsArray>

0 commit comments

Comments
 (0)