Skip to content

Commit 93d82fc

Browse files
authored
chore(query): pick improve parse json performance to release/v1.2.636-rc8.5 (#18468)
* perf(query): Improve parse json performance * ignore wasmtime audit
1 parent f74b30b commit 93d82fc

16 files changed

Lines changed: 157 additions & 99 deletions

File tree

.cargo/audit.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,4 +85,6 @@ ignore = [
8585
"RUSTSEC-2025-0021",
8686
# openssl: Use-After-Free in `Md::fetch` and `Cipher::fetch`
8787
"RUSTSEC-2025-0022",
88+
# Host panic with `fd_renumber` WASIp1 function
89+
"RUSTSEC-2025-0046",
8890
]

Cargo.lock

Lines changed: 13 additions & 13 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ geozero = { version = "0.13.0", features = ["default", "with-wkb", "with-geos",
245245
hashbrown = { version = "0.14.3", default-features = false }
246246
http = "1"
247247
itertools = "0.10.5"
248-
jsonb = "0.5.2"
248+
jsonb = "0.5.3"
249249
jwt-simple = { version = "0.12.12", default-features = false, features = ["pure-rust"] }
250250
match-template = "0.0.1"
251251
mysql_async = { version = "0.34", default-features = false, features = ["native-tls-tls"] }
@@ -414,7 +414,7 @@ async-recursion = { git = "https://github.com/zhang2014/async-recursion.git", re
414414
backtrace = { git = "https://github.com/rust-lang/backtrace-rs.git", rev = "72265be" }
415415
color-eyre = { git = "https://github.com/eyre-rs/eyre.git", rev = "e5d92c3" }
416416
deltalake = { git = "https://github.com/delta-io/delta-rs", rev = "57795da" }
417-
jsonb = { git = "https://github.com/databendlabs/jsonb.git", rev = "edd714b" }
417+
jsonb = { git = "https://github.com/databendlabs/jsonb.git", rev = "6df281f" }
418418
openai_api_rust = { git = "https://github.com/datafuse-extras/openai-api", rev = "819a0ed" }
419419
# patched opendal which categories the XML dersierialization errors as recoverable
420420
opendal = { git = "https://github.com/datafuse-extras/opendal-for-release-v1.2.636", tag = "v0.49.0-xml-deser-retry" }

src/query/expression/src/types/variant.rs

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -299,17 +299,15 @@ pub fn cast_scalar_to_variant(scalar: ScalarRef, tz: TzLUT, buf: &mut Vec<u8>) {
299299
}
300300
ScalarRef::Geometry(bytes) => {
301301
let geom = Ewkb(bytes).to_json().expect("failed to decode wkb data");
302-
jsonb::parse_value(geom.as_bytes())
303-
.expect("failed to parse geojson to json value")
304-
.write_to_vec(buf);
302+
jsonb::parse_owned_jsonb_with_buf(geom.as_bytes(), buf)
303+
.expect("failed to parse geojson to json value");
305304
return;
306305
}
307306
ScalarRef::Geography(bytes) => {
308307
// todo: Implement direct conversion, omitting intermediate processes
309308
let geom = Ewkb(bytes.0).to_json().expect("failed to decode wkb data");
310-
jsonb::parse_value(geom.as_bytes())
311-
.expect("failed to parse geojson to json value")
312-
.write_to_vec(buf);
309+
jsonb::parse_owned_jsonb_with_buf(geom.as_bytes(), buf)
310+
.expect("failed to parse geojson to json value");
313311
return;
314312
}
315313
};

src/query/expression/src/utils/variant_transform.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
use databend_common_exception::ErrorCode;
1616
use databend_common_exception::Result;
17-
use jsonb::parse_value;
17+
use jsonb::parse_owned_jsonb;
1818
use jsonb::RawJsonb;
1919

2020
use crate::types::AnyType;
@@ -97,7 +97,7 @@ fn transform_scalar(scalar: ScalarRef<'_>, decode: bool) -> Result<Scalar> {
9797
let raw_jsonb = RawJsonb::new(data);
9898
Scalar::Variant(raw_jsonb.to_string().into_bytes())
9999
} else {
100-
let value = parse_value(data).map_err(|err| {
100+
let value = parse_owned_jsonb(data).map_err(|err| {
101101
ErrorCode::UDFDataError(format!("parse json value error: {err}"))
102102
})?;
103103
Scalar::Variant(value.to_vec())

src/query/expression/tests/it/row.rs

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,7 @@ use databend_common_expression::FromData;
3131
use databend_common_expression::RowConverter;
3232
use databend_common_expression::SortField;
3333
use itertools::Itertools;
34-
use jsonb::parse_value;
35-
use jsonb::RawJsonb;
34+
use jsonb::parse_owned_jsonb;
3635
use rand::distributions::Alphanumeric;
3736
use rand::distributions::Standard;
3837
use rand::prelude::Distribution;
@@ -401,9 +400,8 @@ fn test_variant() {
401400
for value in values {
402401
if let Some(value) = value {
403402
validity.push(true);
404-
let val = parse_value(value.as_bytes()).unwrap();
405-
let buf = val.to_vec();
406-
let raw_jsonb = RawJsonb::new(&buf);
403+
let owned_jsonb = parse_owned_jsonb(value.as_bytes()).unwrap();
404+
let raw_jsonb = owned_jsonb.as_raw();
407405
let compare_buf = raw_jsonb.convert_to_comparable();
408406
builder.put_slice(&compare_buf);
409407
} else {

src/query/formats/src/field_decoder/fast_values.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ use databend_common_io::geography::geography_from_ewkt_bytes;
5757
use databend_common_io::parse_bitmap;
5858
use databend_common_io::parse_bytes_to_ewkb;
5959
use databend_common_io::prelude::FormatSettings;
60-
use jsonb::parse_value;
60+
use jsonb::parse_owned_jsonb_with_buf;
6161
use lexical_core::FromLexical;
6262
use num::cast::AsPrimitive;
6363
use num_traits::NumCast;
@@ -469,9 +469,8 @@ impl FastFieldDecoderValues {
469469
) -> Result<()> {
470470
let mut buf = Vec::new();
471471
self.read_string_inner(reader, &mut buf, positions)?;
472-
match parse_value(&buf) {
473-
Ok(value) => {
474-
value.write_to_vec(&mut column.data);
472+
match parse_owned_jsonb_with_buf(&buf, &mut column.data) {
473+
Ok(_) => {
475474
column.commit_row();
476475
}
477476
Err(_) => {

src/query/formats/src/field_decoder/nested.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ use databend_common_io::cursor_ext::ReadNumberExt;
5050
use databend_common_io::geography::geography_from_ewkt_bytes;
5151
use databend_common_io::parse_bitmap;
5252
use databend_common_io::parse_bytes_to_ewkb;
53-
use jsonb::parse_value;
53+
use jsonb::parse_owned_jsonb_with_buf;
5454
use lexical_core::FromLexical;
5555

5656
use crate::binary::decode_binary;
@@ -311,9 +311,8 @@ impl NestedValues {
311311
) -> Result<()> {
312312
let mut buf = Vec::new();
313313
self.read_string_inner(reader, &mut buf)?;
314-
match parse_value(&buf) {
315-
Ok(value) => {
316-
value.write_to_vec(&mut column.data);
314+
match parse_owned_jsonb_with_buf(&buf, &mut column.data) {
315+
Ok(_) => {
317316
column.commit_row();
318317
}
319318
Err(e) => {

src/query/formats/src/field_decoder/separated_text.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ use databend_common_io::parse_bitmap;
5050
use databend_common_io::parse_bytes_to_ewkb;
5151
use databend_common_meta_app::principal::CsvFileFormatParams;
5252
use databend_common_meta_app::principal::TsvFileFormatParams;
53-
use jsonb::parse_value;
53+
use jsonb::parse_owned_jsonb_with_buf;
5454
use lexical_core::FromLexical;
5555
use num_traits::NumCast;
5656

@@ -301,9 +301,8 @@ impl SeparatedTextDecoder {
301301
}
302302

303303
fn read_variant(&self, column: &mut BinaryColumnBuilder, data: &[u8]) -> Result<()> {
304-
match parse_value(data) {
305-
Ok(value) => {
306-
value.write_to_vec(&mut column.data);
304+
match parse_owned_jsonb_with_buf(data, &mut column.data) {
305+
Ok(_) => {
307306
column.commit_row();
308307
}
309308
Err(e) => {

src/query/functions/src/scalars/binary.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ use databend_common_expression::types::NumberType;
3131
use databend_common_expression::types::StringType;
3232
use databend_common_expression::types::UInt8Type;
3333
use databend_common_expression::types::ValueType;
34+
use databend_common_expression::types::VariantType;
3435
use databend_common_expression::Column;
3536
use databend_common_expression::EvalContext;
3637
use databend_common_expression::Function;
@@ -76,6 +77,15 @@ pub fn register(registry: &mut FunctionRegistry) {
7677
error_to_null(eval_binary_to_string),
7778
);
7879

80+
registry.register_passthrough_nullable_1_arg::<VariantType, BinaryType, _, _>(
81+
"to_jsonb_binary",
82+
|_, _| FunctionDomain::Full,
83+
|val, _| match val {
84+
ValueRef::Scalar(val) => Value::Scalar(val.to_vec()),
85+
ValueRef::Column(col) => Value::Column(col),
86+
},
87+
);
88+
7989
registry.register_passthrough_nullable_1_arg::<StringType, BinaryType, _, _>(
8090
"to_binary",
8191
|_, _| FunctionDomain::Full,

0 commit comments

Comments
 (0)