Skip to content

Commit 9a3f471

Browse files
committed
feat(cubeorchestrator): Split numeric query result into typed variants
Replace the single f64 `DBResponsePrimitive::Number` / `FieldValue::Number` variant with precise `Int64`/`UInt64`/`Float64` variants so integer-vs-float type is preserved through the query result path and large 64-bit integers no longer lose precision via f64.
1 parent 44bd455 commit 9a3f471

5 files changed

Lines changed: 197 additions & 22 deletions

File tree

packages/cubejs-backend-native/src/orchestrator.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,11 @@ impl ResultWrapper {
132132
fn db_primitive_to_field_value(value: &DBResponsePrimitive) -> FieldValue<'_> {
133133
match value {
134134
DBResponsePrimitive::String(s) => FieldValue::String(Cow::Borrowed(s)),
135-
DBResponsePrimitive::Number(n) => FieldValue::Number(*n),
135+
DBResponsePrimitive::Int64(n) => FieldValue::Int64(*n),
136+
DBResponsePrimitive::UInt64(n) => FieldValue::UInt64(*n),
137+
DBResponsePrimitive::Float64(n) => FieldValue::Float64(*n),
138+
DBResponsePrimitive::Int128(s) => FieldValue::String(Cow::Borrowed(s)),
139+
DBResponsePrimitive::UInt128(s) => FieldValue::String(Cow::Borrowed(s)),
136140
DBResponsePrimitive::Boolean(b) => FieldValue::Bool(*b),
137141
DBResponsePrimitive::Uncommon(v) => FieldValue::String(Cow::Owned(
138142
serde_json::to_string(&v).unwrap_or_else(|_| v.to_string()),

packages/cubejs-backend-native/src/stream.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ impl ValueObject for JsValueObject<'_> {
280280
if let Ok(s) = value.downcast::<JsString, _>(&mut self.cx) {
281281
Ok(FieldValue::String(Cow::Owned(s.value(&mut self.cx))))
282282
} else if let Ok(n) = value.downcast::<JsNumber, _>(&mut self.cx) {
283-
Ok(FieldValue::Number(n.value(&mut self.cx)))
283+
Ok(FieldValue::Float64(n.value(&mut self.cx)))
284284
} else if let Ok(b) = value.downcast::<JsBoolean, _>(&mut self.cx) {
285285
Ok(FieldValue::Bool(b.value(&mut self.cx)))
286286
} else if value.downcast::<JsUndefined, _>(&mut self.cx).is_ok()

rust/cube/cubeorchestrator/benches/common/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ pub fn build_dataset(
5656
members.push(alias.clone());
5757
let mut col = ColumnarArray::with_capacity(row_count);
5858
for i in 0..row_count {
59-
col.push(DBResponsePrimitive::Number(((i * (j + 1)) as f64) * 0.5));
59+
col.push(DBResponsePrimitive::Float64(((i * (j + 1)) as f64) * 0.5));
6060
}
6161
columns.push(col);
6262
}

rust/cube/cubeorchestrator/src/query_result_transform.rs

Lines changed: 147 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1152,16 +1152,40 @@ pub struct RequestResultArray {
11521152
pub results: Vec<RequestResultData>,
11531153
}
11541154

1155-
#[derive(Debug, Clone, Serialize, PartialEq)]
1156-
#[serde(untagged)]
1155+
#[derive(Debug, Clone, PartialEq)]
11571156
pub enum DBResponsePrimitive {
11581157
Null,
11591158
Boolean(bool),
1160-
Number(f64),
1159+
Int64(i64),
1160+
UInt64(u64),
1161+
Float64(f64),
1162+
/// 128-bit integers that don't fit into `i64`/`u64`. Kept as their decimal
1163+
/// string form to avoid precision loss and to stay safe for JSON/JS consumers
1164+
/// (which can't represent integers beyond 2^53). Serialized as a JSON string.
1165+
Int128(String),
1166+
UInt128(String),
11611167
String(String),
11621168
Uncommon(Value),
11631169
}
11641170

1171+
// Hand-written `Serialize`. Numeric variants (`Int64`/`UInt64`/`Float64`) are
1172+
// rendered as JSON strings, not numbers.
1173+
impl Serialize for DBResponsePrimitive {
1174+
fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
1175+
match self {
1176+
DBResponsePrimitive::Null => serializer.serialize_none(),
1177+
DBResponsePrimitive::Boolean(b) => serializer.serialize_bool(*b),
1178+
DBResponsePrimitive::Int64(n) => serializer.collect_str(n),
1179+
DBResponsePrimitive::UInt64(n) => serializer.collect_str(n),
1180+
DBResponsePrimitive::Float64(n) => serializer.collect_str(n),
1181+
DBResponsePrimitive::Int128(s) => serializer.serialize_str(s),
1182+
DBResponsePrimitive::UInt128(s) => serializer.serialize_str(s),
1183+
DBResponsePrimitive::String(s) => serializer.serialize_str(s),
1184+
DBResponsePrimitive::Uncommon(v) => v.serialize(serializer),
1185+
}
1186+
}
1187+
}
1188+
11651189
// Hand-written `Deserialize` that avoids serde's untagged-enum buffering.
11661190
impl<'de> Deserialize<'de> for DBResponsePrimitive {
11671191
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
@@ -1182,23 +1206,23 @@ impl<'de> Deserialize<'de> for DBResponsePrimitive {
11821206
}
11831207

11841208
fn visit_i64<E: de::Error>(self, v: i64) -> Result<Self::Value, E> {
1185-
Ok(DBResponsePrimitive::Number(v as f64))
1209+
Ok(DBResponsePrimitive::Int64(v))
11861210
}
11871211

11881212
fn visit_i128<E: de::Error>(self, v: i128) -> Result<Self::Value, E> {
1189-
Ok(DBResponsePrimitive::Number(v as f64))
1213+
Ok(DBResponsePrimitive::Int128(v.to_string()))
11901214
}
11911215

11921216
fn visit_u64<E: de::Error>(self, v: u64) -> Result<Self::Value, E> {
1193-
Ok(DBResponsePrimitive::Number(v as f64))
1217+
Ok(DBResponsePrimitive::UInt64(v))
11941218
}
11951219

11961220
fn visit_u128<E: de::Error>(self, v: u128) -> Result<Self::Value, E> {
1197-
Ok(DBResponsePrimitive::Number(v as f64))
1221+
Ok(DBResponsePrimitive::UInt128(v.to_string()))
11981222
}
11991223

12001224
fn visit_f64<E: de::Error>(self, v: f64) -> Result<Self::Value, E> {
1201-
Ok(DBResponsePrimitive::Number(v))
1225+
Ok(DBResponsePrimitive::Float64(v))
12021226
}
12031227

12041228
fn visit_str<E: de::Error>(self, v: &str) -> Result<Self::Value, E> {
@@ -1254,7 +1278,11 @@ impl Display for DBResponsePrimitive {
12541278
let str = match self {
12551279
DBResponsePrimitive::Null => "null".to_string(),
12561280
DBResponsePrimitive::Boolean(b) => b.to_string(),
1257-
DBResponsePrimitive::Number(n) => n.to_string(),
1281+
DBResponsePrimitive::Int64(n) => n.to_string(),
1282+
DBResponsePrimitive::UInt64(n) => n.to_string(),
1283+
DBResponsePrimitive::Float64(n) => n.to_string(),
1284+
DBResponsePrimitive::Int128(s) => s.clone(),
1285+
DBResponsePrimitive::UInt128(s) => s.clone(),
12581286
DBResponsePrimitive::String(s) => s.clone(),
12591287
DBResponsePrimitive::Uncommon(v) => {
12601288
serde_json::to_string(&v).unwrap_or_else(|_| v.to_string())
@@ -1324,6 +1352,116 @@ mod tests {
13241352

13251353
type TestSuiteData = HashMap<String, TestData>;
13261354

1355+
/// Guards the in-memory size of the per-cell primitive. It is materialized
1356+
/// once per cell across every parse/transform path, so an accidental growth
1357+
/// (e.g. a fat new variant) would regress memory and throughput for large
1358+
/// result sets. Bump this deliberately if the layout must change.
1359+
#[test]
1360+
fn test_db_response_primitive_size() {
1361+
assert_eq!(std::mem::size_of::<DBResponsePrimitive>(), 32);
1362+
}
1363+
1364+
#[test]
1365+
fn test_numeric_primitives_serialize_as_json_strings() {
1366+
use serde_json::{json, to_value, Value as JsonValue};
1367+
1368+
assert_eq!(to_value(DBResponsePrimitive::Int64(5)).unwrap(), json!("5"));
1369+
assert_eq!(
1370+
to_value(DBResponsePrimitive::UInt64(42)).unwrap(),
1371+
json!("42")
1372+
);
1373+
assert_eq!(
1374+
to_value(DBResponsePrimitive::Float64(2.0)).unwrap(),
1375+
json!("2")
1376+
);
1377+
assert_eq!(
1378+
to_value(DBResponsePrimitive::Float64(0.6666666666666666)).unwrap(),
1379+
json!("0.6666666666666666")
1380+
);
1381+
assert_eq!(
1382+
to_value(DBResponsePrimitive::Int128(
1383+
"170141183460469231731687303715884105727".to_string()
1384+
))
1385+
.unwrap(),
1386+
json!("170141183460469231731687303715884105727")
1387+
);
1388+
1389+
assert_eq!(
1390+
to_value(DBResponsePrimitive::Boolean(true)).unwrap(),
1391+
json!(true)
1392+
);
1393+
assert_eq!(
1394+
to_value(DBResponsePrimitive::Null).unwrap(),
1395+
JsonValue::Null
1396+
);
1397+
assert_eq!(
1398+
to_value(DBResponsePrimitive::String("x".to_string())).unwrap(),
1399+
json!("x")
1400+
);
1401+
}
1402+
1403+
#[test]
1404+
fn test_from_js_raw_data_parses_numeric_variants() -> Result<()> {
1405+
use serde_json::{from_value, json};
1406+
1407+
let raw: JsRawColumnarData = from_value(json!({
1408+
"members": ["nonneg", "neg", "float", "mixed"],
1409+
"columns": [
1410+
[0, 1, u64::MAX],
1411+
[-1, -42, i64::MIN],
1412+
[1.5, 2.0, 0.6666666666666666],
1413+
["text", true, null],
1414+
],
1415+
}))?;
1416+
1417+
let result = QueryResult::from_js_raw_data(raw)?;
1418+
1419+
assert_eq!(result.row_count(), 3);
1420+
assert_eq!(
1421+
result.members().to_vec(),
1422+
vec!["nonneg", "neg", "float", "mixed"]
1423+
);
1424+
1425+
// Non-negative integers -> UInt64, including u64::MAX.
1426+
assert_eq!(
1427+
result.column(0)?.as_slice(),
1428+
&[
1429+
DBResponsePrimitive::UInt64(0),
1430+
DBResponsePrimitive::UInt64(1),
1431+
DBResponsePrimitive::UInt64(u64::MAX),
1432+
]
1433+
);
1434+
// Negative integers -> Int64, including i64::MIN.
1435+
assert_eq!(
1436+
result.column(1)?.as_slice(),
1437+
&[
1438+
DBResponsePrimitive::Int64(-1),
1439+
DBResponsePrimitive::Int64(-42),
1440+
DBResponsePrimitive::Int64(i64::MIN),
1441+
]
1442+
);
1443+
// Fractional values -> Float64 (`2.0` keeps its float token).
1444+
assert_eq!(
1445+
result.column(2)?.as_slice(),
1446+
&[
1447+
DBResponsePrimitive::Float64(1.5),
1448+
DBResponsePrimitive::Float64(2.0),
1449+
DBResponsePrimitive::Float64(0.6666666666666666),
1450+
]
1451+
);
1452+
// Non-numeric cells pass through.
1453+
assert_eq!(
1454+
result.column(3)?.as_slice(),
1455+
&[
1456+
DBResponsePrimitive::String("text".to_string()),
1457+
DBResponsePrimitive::Boolean(true),
1458+
DBResponsePrimitive::Null,
1459+
]
1460+
);
1461+
1462+
Ok(())
1463+
}
1464+
13271465
#[derive(Clone, Deserialize)]
13281466
#[serde(rename_all = "camelCase")]
13291467
struct TestData {

rust/cubesql/cubesql/src/compile/engine/df/scan.rs

Lines changed: 43 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,9 @@ pub enum FieldValue<'a> {
305305
// because V8 uses UTF-16 It allocates/converts a new strings while doing JsString.value()
306306
// @see v8 WriteUtf8 for more details. Cow::Owned is used for this variant
307307
String(Cow<'a, str>),
308-
Number(f64),
308+
Int64(i64),
309+
UInt64(u64),
310+
Float64(f64),
309311
Bool(bool),
310312
Null,
311313
}
@@ -345,9 +347,28 @@ impl JsonValueObject {
345347
fn json_value_to_field_value(value: &Value) -> std::result::Result<FieldValue<'_>, CubeError> {
346348
Ok(match value {
347349
Value::String(s) => FieldValue::String(Cow::Borrowed(s)),
348-
Value::Number(n) => FieldValue::Number(n.as_f64().ok_or_else(|| {
349-
DataFusionError::Execution(format!("Can't convert {:?} to float", n))
350-
})?),
350+
Value::Number(n) => {
351+
if n.is_f64() {
352+
return n
353+
.as_f64()
354+
.map(|v| FieldValue::Float64(v))
355+
.ok_or(CubeError::user(format!(
356+
"Can't convert number {:?} to FieldValue::Float64",
357+
n
358+
)));
359+
};
360+
361+
if let Some(i) = n.as_i64() {
362+
FieldValue::Int64(i)
363+
} else if let Some(u) = n.as_u64() {
364+
FieldValue::UInt64(u)
365+
} else {
366+
return Err(CubeError::user(format!(
367+
"Can't convert number: {:?} to FieldValue",
368+
n
369+
)));
370+
}
371+
}
351372
Value::Bool(b) => FieldValue::Bool(*b),
352373
Value::Null => FieldValue::Null,
353374
x => {
@@ -918,7 +939,9 @@ macro_rules! transform_response_body {
918939
{
919940
(FieldValue::String(v), builder) => builder.append_value(v)?,
920941
(FieldValue::Bool(v), builder) => builder.append_value(if v { "true" } else { "false" })?,
921-
(FieldValue::Number(v), builder) => builder.append_value(v.to_string())?,
942+
(FieldValue::Int64(v), builder) => builder.append_value(v.to_string())?,
943+
(FieldValue::UInt64(v), builder) => builder.append_value(v.to_string())?,
944+
(FieldValue::Float64(v), builder) => builder.append_value(v.to_string())?,
922945
},
923946
{
924947
(ScalarValue::Utf8(v), builder) => builder.append_option(v.as_ref())?,
@@ -933,7 +956,9 @@ macro_rules! transform_response_body {
933956
$response,
934957
field_name,
935958
{
936-
(FieldValue::Number(number), builder) => builder.append_value(number.round() as i16)?,
959+
(FieldValue::Int64(number), builder) => builder.append_value(number as i16)?,
960+
(FieldValue::UInt64(number), builder) => builder.append_value(number as i16)?,
961+
(FieldValue::Float64(number), builder) => builder.append_value(number.round() as i16)?,
937962
(FieldValue::String(s), builder) => match s.parse::<i16>() {
938963
Ok(v) => builder.append_value(v)?,
939964
Err(error) => {
@@ -959,7 +984,9 @@ macro_rules! transform_response_body {
959984
$response,
960985
field_name,
961986
{
962-
(FieldValue::Number(number), builder) => builder.append_value(number.round() as i32)?,
987+
(FieldValue::Int64(number), builder) => builder.append_value(number as i32)?,
988+
(FieldValue::UInt64(number), builder) => builder.append_value(number as i32)?,
989+
(FieldValue::Float64(number), builder) => builder.append_value(number.round() as i32)?,
963990
(FieldValue::String(s), builder) => match s.parse::<i32>() {
964991
Ok(v) => builder.append_value(v)?,
965992
Err(error) => {
@@ -985,7 +1012,9 @@ macro_rules! transform_response_body {
9851012
$response,
9861013
field_name,
9871014
{
988-
(FieldValue::Number(number), builder) => builder.append_value(number.round() as i64)?,
1015+
(FieldValue::Int64(number), builder) => builder.append_value(number)?,
1016+
(FieldValue::UInt64(number), builder) => builder.append_value(number as i64)?,
1017+
(FieldValue::Float64(number), builder) => builder.append_value(number.round() as i64)?,
9891018
(FieldValue::String(s), builder) => match s.parse::<i64>() {
9901019
Ok(v) => builder.append_value(v)?,
9911020
Err(error) => {
@@ -1011,7 +1040,9 @@ macro_rules! transform_response_body {
10111040
$response,
10121041
field_name,
10131042
{
1014-
(FieldValue::Number(number), builder) => builder.append_value(number as f32)?,
1043+
(FieldValue::Int64(number), builder) => builder.append_value(number as f32)?,
1044+
(FieldValue::UInt64(number), builder) => builder.append_value(number as f32)?,
1045+
(FieldValue::Float64(number), builder) => builder.append_value(number as f32)?,
10151046
(FieldValue::String(s), builder) => match s.parse::<f32>() {
10161047
Ok(v) => builder.append_value(v)?,
10171048
Err(error) => {
@@ -1037,7 +1068,9 @@ macro_rules! transform_response_body {
10371068
$response,
10381069
field_name,
10391070
{
1040-
(FieldValue::Number(number), builder) => builder.append_value(number)?,
1071+
(FieldValue::Int64(number), builder) => builder.append_value(number as f64)?,
1072+
(FieldValue::UInt64(number), builder) => builder.append_value(number as f64)?,
1073+
(FieldValue::Float64(number), builder) => builder.append_value(number)?,
10411074
(FieldValue::String(s), builder) => match s.parse::<f64>() {
10421075
Ok(v) => builder.append_value(v)?,
10431076
Err(error) => {

0 commit comments

Comments
 (0)