Skip to content

Commit fbf6637

Browse files
committed
feat(cubeorchestrator): Split numeric query result into typed variants
Replace the single f64 `DBResponsePrimitive::Number` / `FieldValue::Number` variant with precise `Int64`/`UInt64`/`Float64` variants so integer-vs-float type is preserved through the query result path and large 64-bit integers no longer lose precision via f64.
1 parent 44bd455 commit fbf6637

5 files changed

Lines changed: 198 additions & 22 deletions

File tree

packages/cubejs-backend-native/src/orchestrator.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,11 @@ impl ResultWrapper {
132132
fn db_primitive_to_field_value(value: &DBResponsePrimitive) -> FieldValue<'_> {
133133
match value {
134134
DBResponsePrimitive::String(s) => FieldValue::String(Cow::Borrowed(s)),
135-
DBResponsePrimitive::Number(n) => FieldValue::Number(*n),
135+
DBResponsePrimitive::Int64(n) => FieldValue::Int64(*n),
136+
DBResponsePrimitive::UInt64(n) => FieldValue::UInt64(*n),
137+
DBResponsePrimitive::Float64(n) => FieldValue::Float64(*n),
138+
DBResponsePrimitive::Int128(s) => FieldValue::String(Cow::Borrowed(s)),
139+
DBResponsePrimitive::UInt128(s) => FieldValue::String(Cow::Borrowed(s)),
136140
DBResponsePrimitive::Boolean(b) => FieldValue::Bool(*b),
137141
DBResponsePrimitive::Uncommon(v) => FieldValue::String(Cow::Owned(
138142
serde_json::to_string(&v).unwrap_or_else(|_| v.to_string()),

packages/cubejs-backend-native/src/stream.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ impl ValueObject for JsValueObject<'_> {
280280
if let Ok(s) = value.downcast::<JsString, _>(&mut self.cx) {
281281
Ok(FieldValue::String(Cow::Owned(s.value(&mut self.cx))))
282282
} else if let Ok(n) = value.downcast::<JsNumber, _>(&mut self.cx) {
283-
Ok(FieldValue::Number(n.value(&mut self.cx)))
283+
Ok(FieldValue::Float64(n.value(&mut self.cx)))
284284
} else if let Ok(b) = value.downcast::<JsBoolean, _>(&mut self.cx) {
285285
Ok(FieldValue::Bool(b.value(&mut self.cx)))
286286
} else if value.downcast::<JsUndefined, _>(&mut self.cx).is_ok()

rust/cube/cubeorchestrator/benches/common/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ pub fn build_dataset(
5656
members.push(alias.clone());
5757
let mut col = ColumnarArray::with_capacity(row_count);
5858
for i in 0..row_count {
59-
col.push(DBResponsePrimitive::Number(((i * (j + 1)) as f64) * 0.5));
59+
col.push(DBResponsePrimitive::Float64(((i * (j + 1)) as f64) * 0.5));
6060
}
6161
columns.push(col);
6262
}

rust/cube/cubeorchestrator/src/query_result_transform.rs

Lines changed: 159 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1152,16 +1152,40 @@ pub struct RequestResultArray {
11521152
pub results: Vec<RequestResultData>,
11531153
}
11541154

1155-
#[derive(Debug, Clone, Serialize, PartialEq)]
1156-
#[serde(untagged)]
1155+
#[derive(Debug, Clone, PartialEq)]
11571156
pub enum DBResponsePrimitive {
11581157
Null,
11591158
Boolean(bool),
1160-
Number(f64),
1159+
Int64(i64),
1160+
UInt64(u64),
1161+
Float64(f64),
1162+
/// 128-bit integers that don't fit into `i64`/`u64`. Kept as their decimal
1163+
/// string form to avoid precision loss and to stay safe for JSON/JS consumers
1164+
/// (which can't represent integers beyond 2^53). Serialized as a JSON string.
1165+
Int128(String),
1166+
UInt128(String),
11611167
String(String),
11621168
Uncommon(Value),
11631169
}
11641170

1171+
// Hand-written `Serialize`. Numeric variants (`Int64`/`UInt64`/`Float64`) are
1172+
// rendered as JSON strings, not numbers.
1173+
impl Serialize for DBResponsePrimitive {
1174+
fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
1175+
match self {
1176+
DBResponsePrimitive::Null => serializer.serialize_none(),
1177+
DBResponsePrimitive::Boolean(b) => serializer.serialize_bool(*b),
1178+
DBResponsePrimitive::Int64(n) => serializer.collect_str(n),
1179+
DBResponsePrimitive::UInt64(n) => serializer.collect_str(n),
1180+
DBResponsePrimitive::Float64(n) => serializer.collect_str(n),
1181+
DBResponsePrimitive::Int128(s) => serializer.serialize_str(s),
1182+
DBResponsePrimitive::UInt128(s) => serializer.serialize_str(s),
1183+
DBResponsePrimitive::String(s) => serializer.serialize_str(s),
1184+
DBResponsePrimitive::Uncommon(v) => v.serialize(serializer),
1185+
}
1186+
}
1187+
}
1188+
11651189
// Hand-written `Deserialize` that avoids serde's untagged-enum buffering.
11661190
impl<'de> Deserialize<'de> for DBResponsePrimitive {
11671191
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
@@ -1182,23 +1206,35 @@ impl<'de> Deserialize<'de> for DBResponsePrimitive {
11821206
}
11831207

11841208
fn visit_i64<E: de::Error>(self, v: i64) -> Result<Self::Value, E> {
1185-
Ok(DBResponsePrimitive::Number(v as f64))
1209+
Ok(DBResponsePrimitive::Int64(v))
11861210
}
11871211

11881212
fn visit_i128<E: de::Error>(self, v: i128) -> Result<Self::Value, E> {
1189-
Ok(DBResponsePrimitive::Number(v as f64))
1213+
if let Ok(n) = i64::try_from(v) {
1214+
Ok(DBResponsePrimitive::Int64(n))
1215+
} else if let Ok(n) = u64::try_from(v) {
1216+
Ok(DBResponsePrimitive::UInt64(n))
1217+
} else {
1218+
Ok(DBResponsePrimitive::Int128(v.to_string()))
1219+
}
11901220
}
11911221

11921222
fn visit_u64<E: de::Error>(self, v: u64) -> Result<Self::Value, E> {
1193-
Ok(DBResponsePrimitive::Number(v as f64))
1223+
Ok(DBResponsePrimitive::UInt64(v))
11941224
}
11951225

11961226
fn visit_u128<E: de::Error>(self, v: u128) -> Result<Self::Value, E> {
1197-
Ok(DBResponsePrimitive::Number(v as f64))
1227+
if let Ok(n) = i64::try_from(v) {
1228+
Ok(DBResponsePrimitive::Int64(n))
1229+
} else if let Ok(n) = u64::try_from(v) {
1230+
Ok(DBResponsePrimitive::UInt64(n))
1231+
} else {
1232+
Ok(DBResponsePrimitive::UInt128(v.to_string()))
1233+
}
11981234
}
11991235

12001236
fn visit_f64<E: de::Error>(self, v: f64) -> Result<Self::Value, E> {
1201-
Ok(DBResponsePrimitive::Number(v))
1237+
Ok(DBResponsePrimitive::Float64(v))
12021238
}
12031239

12041240
fn visit_str<E: de::Error>(self, v: &str) -> Result<Self::Value, E> {
@@ -1254,7 +1290,11 @@ impl Display for DBResponsePrimitive {
12541290
let str = match self {
12551291
DBResponsePrimitive::Null => "null".to_string(),
12561292
DBResponsePrimitive::Boolean(b) => b.to_string(),
1257-
DBResponsePrimitive::Number(n) => n.to_string(),
1293+
DBResponsePrimitive::Int64(n) => n.to_string(),
1294+
DBResponsePrimitive::UInt64(n) => n.to_string(),
1295+
DBResponsePrimitive::Float64(n) => n.to_string(),
1296+
DBResponsePrimitive::Int128(s) => s.clone(),
1297+
DBResponsePrimitive::UInt128(s) => s.clone(),
12581298
DBResponsePrimitive::String(s) => s.clone(),
12591299
DBResponsePrimitive::Uncommon(v) => {
12601300
serde_json::to_string(&v).unwrap_or_else(|_| v.to_string())
@@ -1324,6 +1364,116 @@ mod tests {
13241364

13251365
type TestSuiteData = HashMap<String, TestData>;
13261366

1367+
/// Guards the in-memory size of the per-cell primitive. It is materialized
1368+
/// once per cell across every parse/transform path, so an accidental growth
1369+
/// (e.g. a fat new variant) would regress memory and throughput for large
1370+
/// result sets. Bump this deliberately if the layout must change.
1371+
#[test]
1372+
fn test_db_response_primitive_size() {
1373+
assert_eq!(std::mem::size_of::<DBResponsePrimitive>(), 32);
1374+
}
1375+
1376+
#[test]
1377+
fn test_numeric_primitives_serialize_as_json_strings() {
1378+
use serde_json::{json, to_value, Value as JsonValue};
1379+
1380+
assert_eq!(to_value(DBResponsePrimitive::Int64(5)).unwrap(), json!("5"));
1381+
assert_eq!(
1382+
to_value(DBResponsePrimitive::UInt64(42)).unwrap(),
1383+
json!("42")
1384+
);
1385+
assert_eq!(
1386+
to_value(DBResponsePrimitive::Float64(2.0)).unwrap(),
1387+
json!("2")
1388+
);
1389+
assert_eq!(
1390+
to_value(DBResponsePrimitive::Float64(0.6666666666666666)).unwrap(),
1391+
json!("0.6666666666666666")
1392+
);
1393+
assert_eq!(
1394+
to_value(DBResponsePrimitive::Int128(
1395+
"170141183460469231731687303715884105727".to_string()
1396+
))
1397+
.unwrap(),
1398+
json!("170141183460469231731687303715884105727")
1399+
);
1400+
1401+
assert_eq!(
1402+
to_value(DBResponsePrimitive::Boolean(true)).unwrap(),
1403+
json!(true)
1404+
);
1405+
assert_eq!(
1406+
to_value(DBResponsePrimitive::Null).unwrap(),
1407+
JsonValue::Null
1408+
);
1409+
assert_eq!(
1410+
to_value(DBResponsePrimitive::String("x".to_string())).unwrap(),
1411+
json!("x")
1412+
);
1413+
}
1414+
1415+
#[test]
1416+
fn test_from_js_raw_data_parses_numeric_variants() -> Result<()> {
1417+
use serde_json::{from_value, json};
1418+
1419+
let raw: JsRawColumnarData = from_value(json!({
1420+
"members": ["nonneg", "neg", "float", "mixed"],
1421+
"columns": [
1422+
[0, 1, u64::MAX],
1423+
[-1, -42, i64::MIN],
1424+
[1.5, 2.0, 0.6666666666666666],
1425+
["text", true, null],
1426+
],
1427+
}))?;
1428+
1429+
let result = QueryResult::from_js_raw_data(raw)?;
1430+
1431+
assert_eq!(result.row_count(), 3);
1432+
assert_eq!(
1433+
result.members().to_vec(),
1434+
vec!["nonneg", "neg", "float", "mixed"]
1435+
);
1436+
1437+
// Non-negative integers -> UInt64, including u64::MAX.
1438+
assert_eq!(
1439+
result.column(0)?.as_slice(),
1440+
&[
1441+
DBResponsePrimitive::UInt64(0),
1442+
DBResponsePrimitive::UInt64(1),
1443+
DBResponsePrimitive::UInt64(u64::MAX),
1444+
]
1445+
);
1446+
// Negative integers -> Int64, including i64::MIN.
1447+
assert_eq!(
1448+
result.column(1)?.as_slice(),
1449+
&[
1450+
DBResponsePrimitive::Int64(-1),
1451+
DBResponsePrimitive::Int64(-42),
1452+
DBResponsePrimitive::Int64(i64::MIN),
1453+
]
1454+
);
1455+
// Fractional values -> Float64 (`2.0` keeps its float token).
1456+
assert_eq!(
1457+
result.column(2)?.as_slice(),
1458+
&[
1459+
DBResponsePrimitive::Float64(1.5),
1460+
DBResponsePrimitive::Float64(2.0),
1461+
DBResponsePrimitive::Float64(0.6666666666666666),
1462+
]
1463+
);
1464+
// Non-numeric cells pass through.
1465+
assert_eq!(
1466+
result.column(3)?.as_slice(),
1467+
&[
1468+
DBResponsePrimitive::String("text".to_string()),
1469+
DBResponsePrimitive::Boolean(true),
1470+
DBResponsePrimitive::Null,
1471+
]
1472+
);
1473+
1474+
Ok(())
1475+
}
1476+
13271477
#[derive(Clone, Deserialize)]
13281478
#[serde(rename_all = "camelCase")]
13291479
struct TestData {

rust/cubesql/cubesql/src/compile/engine/df/scan.rs

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,9 @@ pub enum FieldValue<'a> {
305305
// because V8 uses UTF-16 It allocates/converts a new strings while doing JsString.value()
306306
// @see v8 WriteUtf8 for more details. Cow::Owned is used for this variant
307307
String(Cow<'a, str>),
308-
Number(f64),
308+
Int64(i64),
309+
UInt64(u64),
310+
Float64(f64),
309311
Bool(bool),
310312
Null,
311313
}
@@ -345,9 +347,17 @@ impl JsonValueObject {
345347
fn json_value_to_field_value(value: &Value) -> std::result::Result<FieldValue<'_>, CubeError> {
346348
Ok(match value {
347349
Value::String(s) => FieldValue::String(Cow::Borrowed(s)),
348-
Value::Number(n) => FieldValue::Number(n.as_f64().ok_or_else(|| {
349-
DataFusionError::Execution(format!("Can't convert {:?} to float", n))
350-
})?),
350+
Value::Number(n) => {
351+
if let Some(i) = n.as_i64() {
352+
FieldValue::Int64(i)
353+
} else if let Some(u) = n.as_u64() {
354+
FieldValue::UInt64(u)
355+
} else if let Some(f) = n.as_f64() {
356+
FieldValue::Float64(f)
357+
} else {
358+
return Err(CubeError::user(format!("Can't convert {:?} to number", n)));
359+
}
360+
}
351361
Value::Bool(b) => FieldValue::Bool(*b),
352362
Value::Null => FieldValue::Null,
353363
x => {
@@ -918,7 +928,9 @@ macro_rules! transform_response_body {
918928
{
919929
(FieldValue::String(v), builder) => builder.append_value(v)?,
920930
(FieldValue::Bool(v), builder) => builder.append_value(if v { "true" } else { "false" })?,
921-
(FieldValue::Number(v), builder) => builder.append_value(v.to_string())?,
931+
(FieldValue::Int64(v), builder) => builder.append_value(v.to_string())?,
932+
(FieldValue::UInt64(v), builder) => builder.append_value(v.to_string())?,
933+
(FieldValue::Float64(v), builder) => builder.append_value(v.to_string())?,
922934
},
923935
{
924936
(ScalarValue::Utf8(v), builder) => builder.append_option(v.as_ref())?,
@@ -933,7 +945,9 @@ macro_rules! transform_response_body {
933945
$response,
934946
field_name,
935947
{
936-
(FieldValue::Number(number), builder) => builder.append_value(number.round() as i16)?,
948+
(FieldValue::Int64(number), builder) => builder.append_value(number as i16)?,
949+
(FieldValue::UInt64(number), builder) => builder.append_value(number as i16)?,
950+
(FieldValue::Float64(number), builder) => builder.append_value(number.round() as i16)?,
937951
(FieldValue::String(s), builder) => match s.parse::<i16>() {
938952
Ok(v) => builder.append_value(v)?,
939953
Err(error) => {
@@ -959,7 +973,9 @@ macro_rules! transform_response_body {
959973
$response,
960974
field_name,
961975
{
962-
(FieldValue::Number(number), builder) => builder.append_value(number.round() as i32)?,
976+
(FieldValue::Int64(number), builder) => builder.append_value(number as i32)?,
977+
(FieldValue::UInt64(number), builder) => builder.append_value(number as i32)?,
978+
(FieldValue::Float64(number), builder) => builder.append_value(number.round() as i32)?,
963979
(FieldValue::String(s), builder) => match s.parse::<i32>() {
964980
Ok(v) => builder.append_value(v)?,
965981
Err(error) => {
@@ -985,7 +1001,9 @@ macro_rules! transform_response_body {
9851001
$response,
9861002
field_name,
9871003
{
988-
(FieldValue::Number(number), builder) => builder.append_value(number.round() as i64)?,
1004+
(FieldValue::Int64(number), builder) => builder.append_value(number)?,
1005+
(FieldValue::UInt64(number), builder) => builder.append_value(number as i64)?,
1006+
(FieldValue::Float64(number), builder) => builder.append_value(number.round() as i64)?,
9891007
(FieldValue::String(s), builder) => match s.parse::<i64>() {
9901008
Ok(v) => builder.append_value(v)?,
9911009
Err(error) => {
@@ -1011,7 +1029,9 @@ macro_rules! transform_response_body {
10111029
$response,
10121030
field_name,
10131031
{
1014-
(FieldValue::Number(number), builder) => builder.append_value(number as f32)?,
1032+
(FieldValue::Int64(number), builder) => builder.append_value(number as f32)?,
1033+
(FieldValue::UInt64(number), builder) => builder.append_value(number as f32)?,
1034+
(FieldValue::Float64(number), builder) => builder.append_value(number as f32)?,
10151035
(FieldValue::String(s), builder) => match s.parse::<f32>() {
10161036
Ok(v) => builder.append_value(v)?,
10171037
Err(error) => {
@@ -1037,7 +1057,9 @@ macro_rules! transform_response_body {
10371057
$response,
10381058
field_name,
10391059
{
1040-
(FieldValue::Number(number), builder) => builder.append_value(number)?,
1060+
(FieldValue::Int64(number), builder) => builder.append_value(number as f64)?,
1061+
(FieldValue::UInt64(number), builder) => builder.append_value(number as f64)?,
1062+
(FieldValue::Float64(number), builder) => builder.append_value(number)?,
10411063
(FieldValue::String(s), builder) => match s.parse::<f64>() {
10421064
Ok(v) => builder.append_value(v)?,
10431065
Err(error) => {

0 commit comments

Comments
 (0)