Skip to content

Commit dfe3699

Browse files
authored
Add TPC-H to sqllogictests (#6830)
This PR adds TPC-H to to our SLT suite, it requires generating data (script provided), and currently doesn't work due to subtle formatting differences between the crates. One reason for those differences is that we actually run the queries on different schemas as they each infer a different schema. I've also added some duckdb functionality as I tried to get it working, and I figure its worth merging it anyway. --------- Signed-off-by: Adam Gutglick <adam@spiraldb.com>
1 parent 0309e57 commit dfe3699

42 files changed

Lines changed: 6875 additions & 6 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/ci.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -821,8 +821,13 @@ jobs:
821821
uses: ./.github/actions/setup-rust
822822
with:
823823
repo-token: ${{ secrets.GITHUB_TOKEN }}
824+
- name: Install uv
825+
uses: spiraldb/actions/.github/actions/setup-uv@0.18.5
826+
with:
827+
sync: false
824828
- name: Run sqllogictest tests
825829
run: |
830+
./vortex-sqllogictest/slt/tpch/generate_data.sh
826831
cargo test -p vortex-sqllogictest --test sqllogictests
827832
828833

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ async-fs = "2.2.0"
100100
async-lock = "3.4"
101101
async-stream = "0.3.6"
102102
async-trait = "0.1.89"
103+
bigdecimal = "0.4.8"
103104
bindgen = "0.72.0"
104105
bit-vec = "0.8.0"
105106
bitvec = "1.0.1"

_typos.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ extend-ignore-re = [
88
]
99

1010
[files]
11-
extend-exclude = ["/vortex-bench/**", "/docs/references.bib", "benchmarks/**"]
11+
extend-exclude = ["/vortex-bench/**", "/docs/references.bib", "benchmarks/**", "vortex-sqllogictest/slt/**"]
1212

1313
[type.py]
1414
extend-ignore-identifiers-re = [

vortex-duckdb/src/convert/scalar.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,9 @@ impl<'a> TryFrom<&'a ValueRef> for Scalar {
253253
ExtractedValue::HugeInt(_) => {
254254
vortex_bail!("DuckDB HugeInt is not yet supported in Vortex");
255255
}
256+
ExtractedValue::UHugeInt(_) => {
257+
vortex_bail!("DuckDB UHugeInt is not yet supported in Vortex");
258+
}
256259
ExtractedValue::UTinyInt(v) => Ok(Scalar::primitive(v, Nullable)),
257260
ExtractedValue::USmallInt(v) => Ok(Scalar::primitive(v, Nullable)),
258261
ExtractedValue::UInteger(v) => Ok(Scalar::primitive(v, Nullable)),

vortex-duckdb/src/convert/vector.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -348,7 +348,7 @@ pub fn flat_vector_to_vortex(vector: &VectorRef, len: usize) -> VortexResult<Arr
348348
StructArray::try_new(names, children, len, vector.validity_ref(len).to_validity())
349349
.map(|a| a.into_array())
350350
}
351-
_ => todo!("missing impl for {type_id:?}"),
351+
_ => unimplemented!("missing impl for {type_id:?}"),
352352
}
353353
}
354354

vortex-duckdb/src/duckdb/logical_type.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,10 @@ impl LogicalType {
166166
pub fn timestamp_tz() -> Self {
167167
Self::new(DUCKDB_TYPE::DUCKDB_TYPE_TIMESTAMP_TZ)
168168
}
169+
170+
pub fn date() -> Self {
171+
Self::new(DUCKDB_TYPE::DUCKDB_TYPE_DATE)
172+
}
169173
}
170174

171175
impl LogicalTypeRef {
@@ -182,6 +186,10 @@ impl LogicalTypeRef {
182186
}
183187
}
184188

189+
pub fn is_decimal(&self) -> bool {
190+
matches!(self.as_type_id(), DUCKDB_TYPE::DUCKDB_TYPE_DECIMAL)
191+
}
192+
185193
pub fn array_child_type(&self) -> LogicalType {
186194
unsafe { LogicalType::own(duckdb_array_type_child_type(self.as_ptr())) }
187195
}

vortex-duckdb/src/duckdb/query_result.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,13 @@
33

44
use std::ffi::CStr;
55

6+
use vortex::error::VortexExpect;
67
use vortex::error::VortexResult;
78
use vortex::error::vortex_bail;
89
use vortex::error::vortex_err;
910

1011
use crate::cpp;
12+
use crate::cpp::DUCKDB_TYPE;
1113
use crate::duckdb::DataChunk;
1214
use crate::lifetime_wrapper;
1315

@@ -71,7 +73,15 @@ impl QueryResultRef {
7173
/// Get the type of a column by index.
7274
pub fn column_type(&self, col_idx: usize) -> LogicalType {
7375
let dtype = unsafe { cpp::duckdb_column_type(self.as_ptr(), col_idx as u64) };
74-
LogicalType::new(dtype)
76+
if dtype == DUCKDB_TYPE::DUCKDB_TYPE_DECIMAL {
77+
let lt = unsafe { cpp::duckdb_column_logical_type(self.as_ptr(), col_idx as u64) };
78+
let precision = unsafe { cpp::duckdb_decimal_width(lt) };
79+
let scale = unsafe { cpp::duckdb_decimal_scale(lt) };
80+
81+
LogicalType::decimal_type(precision, scale).vortex_expect("valid decimal")
82+
} else {
83+
LogicalType::new(dtype)
84+
}
7585
}
7686
}
7787

vortex-duckdb/src/duckdb/value.rs

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,10 @@ impl ValueRef {
5959
DUCKDB_TYPE::DUCKDB_TYPE_BIGINT => {
6060
ExtractedValue::BigInt(unsafe { cpp::duckdb_get_int64(self.as_ptr()) })
6161
}
62+
DUCKDB_TYPE::DUCKDB_TYPE_HUGEINT => {
63+
let huge_int = unsafe { cpp::duckdb_get_hugeint(self.as_ptr()) };
64+
ExtractedValue::HugeInt(i128_from_parts(huge_int.upper, huge_int.lower))
65+
}
6266
DUCKDB_TYPE::DUCKDB_TYPE_UTINYINT => {
6367
ExtractedValue::UTinyInt(unsafe { cpp::duckdb_get_uint8(self.as_ptr()) })
6468
}
@@ -71,6 +75,10 @@ impl ValueRef {
7175
DUCKDB_TYPE::DUCKDB_TYPE_UBIGINT => {
7276
ExtractedValue::UBigInt(unsafe { cpp::duckdb_get_uint64(self.as_ptr()) })
7377
}
78+
DUCKDB_TYPE::DUCKDB_TYPE_UHUGEINT => {
79+
let huge_uint = unsafe { cpp::duckdb_get_uhugeint(self.as_ptr()) };
80+
ExtractedValue::UHugeInt(u128_from_parts(huge_uint.upper, huge_uint.lower))
81+
}
7482
DUCKDB_TYPE::DUCKDB_TYPE_FLOAT => {
7583
ExtractedValue::Float(unsafe { cpp::duckdb_get_float(self.as_ptr()) })
7684
}
@@ -149,7 +157,7 @@ impl ValueRef {
149157
.collect::<Vec<_>>(),
150158
),
151159
// ...other types remain unimplemented..
152-
_ => vortex_panic!("Unsupported DuckDB value type {:?}", self),
160+
other => vortex_panic!("Unsupported DuckDB value type {other:?}"),
153161
}
154162
}
155163
}
@@ -264,6 +272,11 @@ pub fn i128_from_parts(high: i64, low: u64) -> i128 {
264272
((high as i128) << 64) | (low as i128)
265273
}
266274

275+
#[inline]
276+
pub fn u128_from_parts(high: u64, low: u64) -> u128 {
277+
((high as u128) << 64) | (low as u128)
278+
}
279+
267280
impl<T> TryFrom<Option<T>> for Value
268281
where
269282
T: Into<Value> + NativeDType,
@@ -376,6 +389,7 @@ pub enum ExtractedValue {
376389
USmallInt(u16),
377390
UInteger(u32),
378391
UBigInt(u64),
392+
UHugeInt(u128),
379393
Float(f32),
380394
Double(f64),
381395
Boolean(bool),
@@ -394,6 +408,7 @@ pub enum ExtractedValue {
394408
#[cfg(test)]
395409
mod tests {
396410
use crate::duckdb::i128_from_parts;
411+
use crate::duckdb::u128_from_parts;
397412

398413
#[test]
399414
fn test_huge_int_from_parts() {
@@ -408,4 +423,17 @@ mod tests {
408423
(1i128 << 64) + (u64::MAX as i128)
409424
);
410425
}
426+
427+
#[test]
428+
fn test_uhuge_int_from_parts() {
429+
assert_eq!(u128_from_parts(0, 0), 0u128);
430+
assert_eq!(u128_from_parts(0, 34534912), 34534912u128);
431+
assert_eq!(u128_from_parts(0, u64::MAX), u64::MAX as u128);
432+
assert_eq!(u128_from_parts(u64::MAX, u64::MAX), u128::MAX);
433+
assert_eq!(
434+
u128_from_parts(1, u64::MAX),
435+
(1u128 << 64) + (u64::MAX as u128)
436+
);
437+
assert_eq!(u128_from_parts(1, 0), 1u128 << 64);
438+
}
411439
}

vortex-duckdb/src/multi_file.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ impl DataSourceTableFunction for VortexMultiFileScan {
3939

4040
// Parse the URL and separate the base URL (keep scheme, host, etc.) from the path.
4141
let glob_url_str = glob_url_parameter.as_string();
42+
4243
let glob_url = match Url::parse(glob_url_str.as_str()) {
4344
Ok(url) => Ok(url),
4445
Err(_) => Url::from_file_path(Path::new(glob_url_str.as_str()))

0 commit comments

Comments
 (0)