Skip to content

Commit ef8ee54

Browse files
committed
fix(sqlite): reconstruct List<Utf8View> on read-back
1 parent 7393253 commit ef8ee54

2 files changed

Lines changed: 77 additions & 0 deletions

File tree

src/sqlite_provider.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1218,6 +1218,24 @@ fn sql_values_to_arrow(dt: &DataType, values: Vec<SqlValue>) -> DFResult<ArrayRe
12181218
}
12191219
Arc::new(b.finish())
12201220
}
1221+
DataType::Utf8View => {
1222+
let mut b = ListBuilder::new(StringViewBuilder::new())
1223+
.with_field(item_field.as_ref().clone());
1224+
for v in &values {
1225+
match v {
1226+
SqlValue::Text(s) => {
1227+
let items: Vec<Option<String>> =
1228+
serde_json::from_str(s).unwrap_or_default();
1229+
for item in items {
1230+
b.values().append_option(item);
1231+
}
1232+
b.append(true);
1233+
}
1234+
_ => b.append(false),
1235+
}
1236+
}
1237+
Arc::new(b.finish())
1238+
}
12211239
DataType::Int64 => {
12221240
let mut b =
12231241
ListBuilder::new(Int64Builder::new()).with_field(item_field.as_ref().clone());

tests/sqlite_provider_test.rs

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,65 @@ async fn test_string_view_and_large_utf8_roundtrip() {
374374
assert!(view.contains(&None));
375375
}
376376

377+
/// Companion to the scalar regression above: a `List<Utf8View>` payload must
378+
/// also round-trip. The write side serializes list elements to JSON TEXT, so a
379+
/// missing `Utf8View` reconstruction arm would write real values then fail on
380+
/// read-back with "unsupported list item type Utf8View".
381+
#[tokio::test]
382+
async fn test_list_utf8view_roundtrip() {
383+
use arrow_array::ListArray;
384+
use arrow_array::builder::{ListBuilder, StringViewBuilder};
385+
386+
let dir = tempdir().unwrap();
387+
388+
let item_field = Arc::new(Field::new("item", DataType::Utf8View, true));
389+
let schema = Arc::new(Schema::new(vec![
390+
Field::new("rowid", DataType::Int64, false),
391+
Field::new("tags", DataType::List(item_field.clone()), true),
392+
]));
393+
394+
// Two rows, each a list of Utf8View strings (including a null element).
395+
let mut lb = ListBuilder::new(StringViewBuilder::new()).with_field(item_field);
396+
lb.values().append_value("red");
397+
lb.values().append_null();
398+
lb.append(true);
399+
lb.values().append_value("blue");
400+
lb.values().append_value("green");
401+
lb.append(true);
402+
let tags = lb.finish();
403+
404+
let batch = RecordBatch::try_new(
405+
schema.clone(),
406+
vec![Arc::new(Int64Array::from(vec![1_i64, 2])), Arc::new(tags)],
407+
)
408+
.unwrap();
409+
410+
let db_path = dir.path().join("lists.db");
411+
let mut builder =
412+
SqliteSidecarBuilder::begin(db_path.to_str().unwrap(), "models", 2, schema, 0, vec![1])
413+
.unwrap();
414+
builder.push_batch(&batch).unwrap();
415+
let provider = builder.finish().unwrap();
416+
417+
let batches = provider.fetch_by_keys(&[1], "rowid", None).await.unwrap();
418+
assert_eq!(batches.iter().map(|b| b.num_rows()).sum::<usize>(), 1);
419+
420+
let list = batches[0]
421+
.column_by_name("tags")
422+
.unwrap()
423+
.as_any()
424+
.downcast_ref::<ListArray>()
425+
.expect("tags should reconstruct as a List");
426+
let inner = list.value(0);
427+
let strs = inner
428+
.as_any()
429+
.downcast_ref::<StringViewArray>()
430+
.expect("list items should reconstruct as StringViewArray");
431+
assert_eq!(strs.len(), 2);
432+
assert_eq!(strs.value(0), "red");
433+
assert!(strs.is_null(1));
434+
}
435+
377436
#[tokio::test]
378437
async fn test_projection() {
379438
let dir = tempdir().unwrap();

0 commit comments

Comments
 (0)