Skip to content

Commit db16cc7

Browse files
adriangbclaude
andcommitted
refactor: deprecate TableSchema::new()/from_file_schema(); single builder + From
Consolidate `TableSchema` construction on `TableSchemaBuilder` plus the idiomatic `From<SchemaRef>` conversion, giving a single way to do each thing: - with partition columns: `TableSchema::builder(file_schema) .with_table_partition_cols(cols).build()` - without: `TableSchema::from(file_schema)` / `file_schema.into()` `TableSchema::new`, `TableSchema::from_file_schema`, and the mutating `TableSchema::with_table_partition_cols` setter are all deprecated. `new` and `from_file_schema` can only ever express a subset of the eventual column groups (partition, and later virtual), so the builder is the single complete path; `from_file_schema` was also redundant with the `From<SchemaRef>` impl. All in-tree callers are migrated accordingly. Documented in the 55.0.0 upgrade guide. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 3e9191e commit db16cc7

19 files changed

Lines changed: 161 additions & 114 deletions

File tree

datafusion/catalog-listing/src/table.rs

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -321,14 +321,15 @@ impl ListingTable {
321321

322322
/// Creates a file source for this table
323323
fn create_file_source(&self) -> Arc<dyn FileSource> {
324-
let table_schema = TableSchema::new(
325-
Arc::clone(&self.file_schema),
326-
self.options
327-
.table_partition_cols
328-
.iter()
329-
.map(|(col, field)| Arc::new(Field::new(col, field.clone(), false)))
330-
.collect(),
331-
);
324+
let table_schema = TableSchema::builder(Arc::clone(&self.file_schema))
325+
.with_table_partition_cols(
326+
self.options
327+
.table_partition_cols
328+
.iter()
329+
.map(|(col, field)| Arc::new(Field::new(col, field.clone(), false)))
330+
.collect::<Vec<_>>(),
331+
)
332+
.build();
332333

333334
self.options.format.file_source(table_schema)
334335
}

datafusion/core/src/datasource/file_format/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ pub(crate) mod test_util {
6767
.await?
6868
};
6969

70-
let table_schema = TableSchema::new(file_schema.clone(), vec![]);
70+
let table_schema = TableSchema::from(file_schema.clone());
7171

7272
let statistics = format
7373
.infer_stats(state, &store, file_schema.clone(), &meta)

datafusion/core/src/datasource/physical_plan/avro.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -223,10 +223,13 @@ mod tests {
223223
partitioned_file.partition_values = vec![ScalarValue::from("2021-10-26")];
224224

225225
let projection = Some(vec![0, 1, file_schema.fields().len(), 2]);
226-
let table_schema = TableSchema::new(
227-
file_schema.clone(),
228-
vec![Arc::new(Field::new("date", DataType::Utf8, false))],
229-
);
226+
let table_schema = TableSchema::builder(file_schema.clone())
227+
.with_table_partition_cols(vec![Arc::new(Field::new(
228+
"date",
229+
DataType::Utf8,
230+
false,
231+
))])
232+
.build();
230233
let source = Arc::new(AvroSource::new(table_schema.clone()));
231234
let conf = FileScanConfigBuilder::new(object_store_url, source)
232235
// select specific columns of the files as well as the partitioning

datafusion/core/src/datasource/physical_plan/csv.rs

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ mod tests {
122122
quote: b'"',
123123
..Default::default()
124124
};
125-
let table_schema = TableSchema::from_file_schema(Arc::clone(&file_schema));
125+
let table_schema = TableSchema::from(Arc::clone(&file_schema));
126126
let source =
127127
Arc::new(CsvSource::new(table_schema.clone()).with_csv_options(options));
128128
let config =
@@ -194,7 +194,7 @@ mod tests {
194194
quote: b'"',
195195
..Default::default()
196196
};
197-
let table_schema = TableSchema::from_file_schema(Arc::clone(&file_schema));
197+
let table_schema = TableSchema::from(Arc::clone(&file_schema));
198198
let source =
199199
Arc::new(CsvSource::new(table_schema.clone()).with_csv_options(options));
200200
let config =
@@ -265,7 +265,7 @@ mod tests {
265265
quote: b'"',
266266
..Default::default()
267267
};
268-
let table_schema = TableSchema::from_file_schema(Arc::clone(&file_schema));
268+
let table_schema = TableSchema::from(Arc::clone(&file_schema));
269269
let source =
270270
Arc::new(CsvSource::new(table_schema.clone()).with_csv_options(options));
271271
let config =
@@ -335,7 +335,7 @@ mod tests {
335335
quote: b'"',
336336
..Default::default()
337337
};
338-
let table_schema = TableSchema::from_file_schema(Arc::clone(&file_schema));
338+
let table_schema = TableSchema::from(Arc::clone(&file_schema));
339339
let source =
340340
Arc::new(CsvSource::new(table_schema.clone()).with_csv_options(options));
341341
let config =
@@ -400,10 +400,13 @@ mod tests {
400400
quote: b'"',
401401
..Default::default()
402402
};
403-
let table_schema = TableSchema::new(
404-
Arc::clone(&file_schema),
405-
vec![Arc::new(Field::new("date", DataType::Utf8, false))],
406-
);
403+
let table_schema = TableSchema::builder(Arc::clone(&file_schema))
404+
.with_table_partition_cols(vec![Arc::new(Field::new(
405+
"date",
406+
DataType::Utf8,
407+
false,
408+
))])
409+
.build();
407410
let source =
408411
Arc::new(CsvSource::new(table_schema.clone()).with_csv_options(options));
409412
let config =
@@ -508,7 +511,7 @@ mod tests {
508511
quote: b'"',
509512
..Default::default()
510513
};
511-
let table_schema = TableSchema::from_file_schema(Arc::clone(&file_schema));
514+
let table_schema = TableSchema::from(Arc::clone(&file_schema));
512515
let source =
513516
Arc::new(CsvSource::new(table_schema.clone()).with_csv_options(options));
514517
let config =

datafusion/core/src/datasource/physical_plan/parquet.rs

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1642,9 +1642,8 @@ mod tests {
16421642
),
16431643
]);
16441644

1645-
let table_schema = TableSchema::new(
1646-
Arc::clone(&schema),
1647-
vec![
1645+
let table_schema = TableSchema::builder(Arc::clone(&schema))
1646+
.with_table_partition_cols(vec![
16481647
Arc::new(Field::new("year", DataType::Utf8, false)),
16491648
Arc::new(Field::new("month", DataType::UInt8, false)),
16501649
Arc::new(Field::new(
@@ -1655,8 +1654,8 @@ mod tests {
16551654
),
16561655
false,
16571656
)),
1658-
],
1659-
);
1657+
])
1658+
.build();
16601659
let source = Arc::new(ParquetSource::new(table_schema.clone()));
16611660
let config = FileScanConfigBuilder::new(object_store_url, source)
16621661
.with_file(partitioned_file)

datafusion/core/src/test/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ pub fn scan_partitioned_csv(
103103
quote: b'"',
104104
..Default::default()
105105
};
106-
let table_schema = TableSchema::from_file_schema(schema);
106+
let table_schema = TableSchema::from(schema);
107107
let source = Arc::new(CsvSource::new(table_schema.clone()).with_csv_options(options));
108108
let config =
109109
FileScanConfigBuilder::from(partitioned_csv_config(file_groups, source)?)

datafusion/core/tests/physical_optimizer/projection_pushdown.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1574,10 +1574,13 @@ fn partitioned_data_source() -> Arc<DataSourceExec> {
15741574
quote: b'"',
15751575
..Default::default()
15761576
};
1577-
let table_schema = TableSchema::new(
1578-
Arc::clone(&file_schema),
1579-
vec![Arc::new(Field::new("partition_col", DataType::Utf8, true))],
1580-
);
1577+
let table_schema = TableSchema::builder(Arc::clone(&file_schema))
1578+
.with_table_partition_cols(vec![Arc::new(Field::new(
1579+
"partition_col",
1580+
DataType::Utf8,
1581+
true,
1582+
))])
1583+
.build();
15811584
let config = FileScanConfigBuilder::new(
15821585
ObjectStoreUrl::parse("test:///").unwrap(),
15831586
Arc::new(CsvSource::new(table_schema).with_csv_options(options)),

datafusion/core/tests/physical_optimizer/pushdown_utils.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ pub struct TestSource {
111111

112112
impl TestSource {
113113
pub fn new(schema: SchemaRef, support: bool, batches: Vec<RecordBatch>) -> Self {
114-
let table_schema = datafusion_datasource::TableSchema::new(schema, vec![]);
114+
let table_schema = datafusion_datasource::TableSchema::from(schema);
115115
Self {
116116
support,
117117
metrics: ExecutionPlanMetricsSet::new(),

datafusion/datasource-arrow/src/file_format.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -197,10 +197,9 @@ impl FileFormat for ArrowFormat {
197197
.object_meta
198198
.location;
199199

200-
let table_schema = TableSchema::new(
201-
Arc::clone(conf.file_schema()),
202-
conf.table_partition_cols().to_vec(),
203-
);
200+
let table_schema = TableSchema::builder(Arc::clone(conf.file_schema()))
201+
.with_table_partition_cols(conf.table_partition_cols().clone())
202+
.build();
204203

205204
let mut source: Arc<dyn FileSource> =
206205
match is_object_in_arrow_ipc_file_format(object_store, object_location).await

datafusion/datasource-parquet/src/opener/mod.rs

Lines changed: 29 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1495,7 +1495,7 @@ mod test {
14951495

14961496
/// Create a simple table schema from a file schema (for files without partition columns).
14971497
fn with_schema(mut self, file_schema: SchemaRef) -> Self {
1498-
self.table_schema = Some(TableSchema::from_file_schema(file_schema));
1498+
self.table_schema = Some(TableSchema::from(file_schema));
14991499
self
15001500
}
15011501

@@ -1882,10 +1882,13 @@ mod test {
18821882
Field::new("a", DataType::Int32, false),
18831883
]));
18841884

1885-
let table_schema_for_opener = TableSchema::new(
1886-
file_schema.clone(),
1887-
vec![Arc::new(Field::new("part", DataType::Int32, false))],
1888-
);
1885+
let table_schema_for_opener = TableSchema::builder(file_schema.clone())
1886+
.with_table_partition_cols(vec![Arc::new(Field::new(
1887+
"part",
1888+
DataType::Int32,
1889+
false,
1890+
))])
1891+
.build();
18891892
let make_opener = |predicate| {
18901893
ParquetMorselizerBuilder::new()
18911894
.with_store(Arc::clone(&store))
@@ -1951,10 +1954,13 @@ mod test {
19511954
Field::new("a", DataType::Int32, false),
19521955
Field::new("b", DataType::Float32, true),
19531956
]));
1954-
let table_schema_for_opener = TableSchema::new(
1955-
file_schema.clone(),
1956-
vec![Arc::new(Field::new("part", DataType::Int32, false))],
1957-
);
1957+
let table_schema_for_opener = TableSchema::builder(file_schema.clone())
1958+
.with_table_partition_cols(vec![Arc::new(Field::new(
1959+
"part",
1960+
DataType::Int32,
1961+
false,
1962+
))])
1963+
.build();
19581964
let make_opener = |predicate| {
19591965
ParquetMorselizerBuilder::new()
19601966
.with_store(Arc::clone(&store))
@@ -2023,10 +2029,13 @@ mod test {
20232029
Field::new("a", DataType::Int32, false),
20242030
]));
20252031

2026-
let table_schema_for_opener = TableSchema::new(
2027-
file_schema.clone(),
2028-
vec![Arc::new(Field::new("part", DataType::Int32, false))],
2029-
);
2032+
let table_schema_for_opener = TableSchema::builder(file_schema.clone())
2033+
.with_table_partition_cols(vec![Arc::new(Field::new(
2034+
"part",
2035+
DataType::Int32,
2036+
false,
2037+
))])
2038+
.build();
20302039
let make_opener = |predicate| {
20312040
ParquetMorselizerBuilder::new()
20322041
.with_store(Arc::clone(&store))
@@ -2104,10 +2113,13 @@ mod test {
21042113
Field::new("part", DataType::Int32, false),
21052114
]));
21062115

2107-
let table_schema_for_opener = TableSchema::new(
2108-
file_schema.clone(),
2109-
vec![Arc::new(Field::new("part", DataType::Int32, false))],
2110-
);
2116+
let table_schema_for_opener = TableSchema::builder(file_schema.clone())
2117+
.with_table_partition_cols(vec![Arc::new(Field::new(
2118+
"part",
2119+
DataType::Int32,
2120+
false,
2121+
))])
2122+
.build();
21112123
let make_opener = |predicate| {
21122124
ParquetMorselizerBuilder::new()
21132125
.with_store(Arc::clone(&store))

0 commit comments

Comments
 (0)