Skip to content

Commit f6bb0b6

Browse files
authored
feat: Nested Type String Support Double Quoting (#732)
* feat: Nested Type String Support Double Quoting * fix * fix * fix arrow decoder * fix
1 parent 4d31a3b commit f6bb0b6

5 files changed

Lines changed: 82 additions & 35 deletions

File tree

cli/tests/00-base.result

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,9 @@ Asia/Shanghai
3232
1 1.00
3333
2 2.00
3434
2
35-
[1,2,3] NULL (1,'ab')
36-
NULL {'k1':'v1','k2':'v2'} (2,NULL)
35+
[1,2,3] NULL "(1,""ab"")"
36+
NULL "{""k1"":""v1"",""k2"":""v2""}" (2,NULL)
3737
1 NULL 1 ab
3838
NULL v1 2 NULL
39-
{'k1':'v1','k2':'v2'} [6162,78797A] ('[1,2]','2024-04-10')
39+
"{""k1"":""v1"",""k2"":""v2""}" [6162,78797A] "([1,2],""2024-04-10"")"
4040
bye

sql/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,5 +35,5 @@ memchr = "2.7"
3535
roaring = { version = "0.10.12", features = ["serde"] }
3636
jiff = { workspace = true }
3737
serde = { version = "1.0", default-features = false, features = ["derive"] }
38-
serde_json = { version = "1.0", default-features = false, features = ["std"] }
38+
serde_json = { version = "1.0", default-features = false, features = ["std", "raw_value"] }
3939
url = { version = "2.5", default-features = false }

sql/src/value/format/display.rs

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -64,32 +64,37 @@ impl Value {
6464
}
6565
Value::Number(n) => write!(f, "{n}"),
6666
Value::Binary(s) => write!(f, "{}", hex::encode_upper(s)),
67-
Value::String(s)
68-
| Value::Bitmap(s)
69-
| Value::Variant(s)
70-
| Value::Interval(s)
71-
| Value::Geometry(s)
72-
| Value::Geography(s) => {
67+
Value::String(s) | Value::Bitmap(s) | Value::Interval(s) => {
7368
if raw {
7469
write!(f, "{s}")
7570
} else {
76-
write!(f, "'{s}'")
71+
write!(f, "\"{s}\"")
72+
}
73+
}
74+
Value::Variant(s) => {
75+
write!(f, "{s}")
76+
}
77+
Value::Geometry(s) | Value::Geography(s) => {
78+
if raw || s.starts_with('{') {
79+
write!(f, "{s}")
80+
} else {
81+
write!(f, "\"{s}\"")
7782
}
7883
}
7984
Value::Timestamp(dt) => {
8085
let formatted = dt.strftime(TIMESTAMP_FORMAT);
8186
if raw {
8287
write!(f, "{formatted}")
8388
} else {
84-
write!(f, "'{formatted}'")
89+
write!(f, "\"{formatted}\"")
8590
}
8691
}
8792
Value::TimestampTz(dt) => {
8893
let formatted = dt.strftime(TIMESTAMP_TIMEZONE_FORMAT);
8994
if raw {
9095
write!(f, "{formatted}")
9196
} else {
92-
write!(f, "'{formatted}'")
97+
write!(f, "\"{formatted}\"")
9398
}
9499
}
95100
Value::Date(i) => {
@@ -98,7 +103,7 @@ impl Value {
98103
if raw {
99104
write!(f, "{d}")
100105
} else {
101-
write!(f, "'{d}'")
106+
write!(f, "\"{d}\"")
102107
}
103108
}
104109
Value::Array(vals) => {

sql/src/value/format/result_encode.rs

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -81,14 +81,19 @@ impl Value {
8181
}
8282
},
8383
Value::Binary(s) => bytes.extend_from_slice(hex::encode_upper(s).as_bytes()),
84-
Value::String(s)
85-
| Value::Bitmap(s)
86-
| Value::Variant(s)
87-
| Value::Interval(s)
88-
| Value::Geometry(s)
89-
| Value::Geography(s) => {
84+
Value::String(s) | Value::Bitmap(s) | Value::Interval(s) => {
9085
Self::write_string(bytes, s, raw);
9186
}
87+
Value::Variant(s) => {
88+
bytes.extend_from_slice(s.as_bytes());
89+
}
90+
Value::Geometry(s) | Value::Geography(s) => {
91+
if s.starts_with('{') {
92+
bytes.extend_from_slice(s.as_bytes());
93+
} else {
94+
Self::write_string(bytes, s, raw);
95+
}
96+
}
9297
Value::Timestamp(dt) => {
9398
let s = dt.strftime(TIMESTAMP_FORMAT).to_string();
9499
Self::write_string(bytes, &s, raw);
@@ -150,9 +155,9 @@ impl Value {
150155

151156
fn write_string(bytes: &mut Vec<u8>, string: &String, raw: bool) {
152157
if !raw {
153-
bytes.push(b'\'');
154-
write_quoted_string_min_escape(string.as_bytes(), bytes, b'\'');
155-
bytes.push(b'\'');
158+
bytes.push(b'"');
159+
write_quoted_string_min_escape(string.as_bytes(), bytes, b'"');
160+
bytes.push(b'"');
156161
} else {
157162
bytes.extend_from_slice(string.as_bytes());
158163
}
@@ -202,7 +207,7 @@ fn write_quoted_string_min_escape(bytes: &[u8], buf: &mut Vec<u8>, quote: u8) {
202207
if start < i {
203208
buf.extend_from_slice(&bytes[start..i]);
204209
}
205-
buf.push(quote);
210+
buf.push(b'\\');
206211
buf.push(quote);
207212
start = i + 1;
208213
}

sql/src/value/string_decoder.rs

Lines changed: 48 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ use databend_client::schema::{DataType, DecimalDataType, DecimalSize, NumberData
2424
use ethnum::i256;
2525
use hex;
2626
use jiff::{civil::DateTime as JiffDateTime, tz::TimeZone, Zoned};
27+
use serde::Deserialize;
28+
use serde_json::{value::RawValue, Deserializer};
2729
use std::io::{BufRead, Cursor};
2830
use std::str::FromStr;
2931

@@ -281,7 +283,9 @@ impl ValueDecoder {
281283

282284
fn read_string<R: AsRef<[u8]>>(&self, reader: &mut Cursor<R>) -> Result<Value> {
283285
let mut buf = Vec::new();
284-
reader.read_quoted_text(&mut buf, b'\'')?;
286+
if reader.read_quoted_text(&mut buf, b'"').is_err() {
287+
reader.read_quoted_text(&mut buf, b'\'')?;
288+
}
285289
Ok(Value::String(unsafe { String::from_utf8_unchecked(buf) }))
286290
}
287291

@@ -295,54 +299,78 @@ impl ValueDecoder {
295299

296300
fn read_date<R: AsRef<[u8]>>(&self, reader: &mut Cursor<R>) -> Result<Value> {
297301
let mut buf = Vec::new();
298-
reader.read_quoted_text(&mut buf, b'\'')?;
302+
if reader.read_quoted_text(&mut buf, b'"').is_err() {
303+
reader.read_quoted_text(&mut buf, b'\'')?;
304+
}
299305
let v = unsafe { std::str::from_utf8_unchecked(&buf) };
300306
let days = NaiveDate::parse_from_str(v, "%Y-%m-%d")?.num_days_from_ce() - DAYS_FROM_CE;
301307
Ok(Value::Date(days))
302308
}
303309

304310
fn read_timestamp<R: AsRef<[u8]>>(&self, reader: &mut Cursor<R>) -> Result<Value> {
305311
let mut buf = Vec::new();
306-
reader.read_quoted_text(&mut buf, b'\'')?;
312+
if reader.read_quoted_text(&mut buf, b'"').is_err() {
313+
reader.read_quoted_text(&mut buf, b'\'')?;
314+
}
307315
let v = unsafe { std::str::from_utf8_unchecked(&buf) };
308316
parse_timestamp(v, &self.timezone)
309317
}
310318

311319
fn read_timestamp_tz<R: AsRef<[u8]>>(&self, reader: &mut Cursor<R>) -> Result<Value> {
312320
let mut buf = Vec::new();
313-
reader.read_quoted_text(&mut buf, b'\'')?;
321+
if reader.read_quoted_text(&mut buf, b'"').is_err() {
322+
reader.read_quoted_text(&mut buf, b'\'')?;
323+
}
314324
let v = unsafe { std::str::from_utf8_unchecked(&buf) };
315325
let t = Zoned::strptime(TIMESTAMP_TIMEZONE_FORMAT, v)?;
316326
Ok(Value::TimestampTz(t))
317327
}
318328

319329
fn read_interval<R: AsRef<[u8]>>(&self, reader: &mut Cursor<R>) -> Result<Value> {
320330
let mut buf = Vec::new();
321-
reader.read_quoted_text(&mut buf, b'\'')?;
331+
if reader.read_quoted_text(&mut buf, b'"').is_err() {
332+
reader.read_quoted_text(&mut buf, b'\'')?;
333+
}
322334
Ok(Value::Interval(unsafe { String::from_utf8_unchecked(buf) }))
323335
}
324336

325337
fn read_bitmap<R: AsRef<[u8]>>(&self, reader: &mut Cursor<R>) -> Result<Value> {
326338
let mut buf = Vec::new();
327-
reader.read_quoted_text(&mut buf, b'\'')?;
339+
if reader.read_quoted_text(&mut buf, b'"').is_err() {
340+
reader.read_quoted_text(&mut buf, b'\'')?;
341+
}
328342
Ok(Value::Bitmap(unsafe { String::from_utf8_unchecked(buf) }))
329343
}
330344

331345
fn read_variant<R: AsRef<[u8]>>(&self, reader: &mut Cursor<R>) -> Result<Value> {
332-
let mut buf = Vec::new();
333-
reader.read_quoted_text(&mut buf, b'\'')?;
334-
Ok(Value::Variant(unsafe { String::from_utf8_unchecked(buf) }))
346+
if let Ok(val) = self.read_json(reader) {
347+
Ok(Value::Variant(val))
348+
} else {
349+
let mut buf = Vec::new();
350+
reader.read_quoted_text(&mut buf, b'\'')?;
351+
Ok(Value::Variant(unsafe { String::from_utf8_unchecked(buf) }))
352+
}
335353
}
336354

337355
fn read_geometry<R: AsRef<[u8]>>(&self, reader: &mut Cursor<R>) -> Result<Value> {
338356
let mut buf = Vec::new();
339-
reader.read_quoted_text(&mut buf, b'\'')?;
357+
if reader.read_quoted_text(&mut buf, b'"').is_err() {
358+
if let Ok(val) = self.read_json(reader) {
359+
return Ok(Value::Variant(val));
360+
}
361+
reader.read_quoted_text(&mut buf, b'\'')?;
362+
}
340363
Ok(Value::Geometry(unsafe { String::from_utf8_unchecked(buf) }))
341364
}
342365

343366
fn read_geography<R: AsRef<[u8]>>(&self, reader: &mut Cursor<R>) -> Result<Value> {
344367
let mut buf = Vec::new();
345-
reader.read_quoted_text(&mut buf, b'\'')?;
368+
if reader.read_quoted_text(&mut buf, b'"').is_err() {
369+
if let Ok(val) = self.read_json(reader) {
370+
return Ok(Value::Variant(val));
371+
}
372+
reader.read_quoted_text(&mut buf, b'\'')?;
373+
}
346374
Ok(Value::Geography(unsafe {
347375
String::from_utf8_unchecked(buf)
348376
}))
@@ -457,6 +485,15 @@ impl ValueDecoder {
457485
reader.must_ignore_byte(b')')?;
458486
Ok(Value::Tuple(vals))
459487
}
488+
489+
fn read_json<R: AsRef<[u8]>>(&self, reader: &mut Cursor<R>) -> Result<String> {
490+
let start = reader.position() as usize;
491+
let data = reader.get_ref().as_ref();
492+
let mut deserializer = Deserializer::from_slice(&data[start..]);
493+
let raw: Box<RawValue> = Box::<RawValue>::deserialize(&mut deserializer)?;
494+
reader.set_position((start + raw.get().len()) as u64);
495+
Ok(raw.to_string())
496+
}
460497
}
461498

462499
fn parse_timestamp(ts_string: &str, tz: &TimeZone) -> Result<Value> {

0 commit comments

Comments
 (0)