Skip to content

Commit e6d7f3f

Browse files
adriangbclaude
andcommitted
fix: avoid panic in TableSchema::with_table_partition_cols on shared Arc
`with_table_partition_cols` appended to an existing partition-column list via `Arc::get_mut(...).expect(...)`. The `expect` message assumed that owning `self` implied sole ownership of the inner `Arc<Vec<FieldRef>>`, which is false: `TableSchema` is `Clone`, and cloning bumps the `Arc` refcount without copying. Building a `TableSchema`, cloning it, and then calling `with_table_partition_cols` on either copy panicked. Use `Arc::make_mut`, which mutates in place when uniquely owned and copy-on-writes when the `Arc` is shared. This also removes the empty/non-empty branch since `make_mut` handles both. Add a regression test that clones a `TableSchema` and appends partition columns to the clone, verifying no panic and copy-on-write isolation. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 50013e5 commit e6d7f3f

1 file changed

Lines changed: 36 additions & 9 deletions

File tree

datafusion/datasource/src/table_schema.rs

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -140,15 +140,12 @@ impl TableSchema {
140140
/// into [`TableSchema::with_table_partition_cols`] if you have partition columns at construction time
141141
/// since it avoids re-computing the table schema.
142142
pub fn with_table_partition_cols(mut self, partition_cols: Vec<FieldRef>) -> Self {
143-
if self.table_partition_cols.is_empty() {
144-
self.table_partition_cols = Arc::new(partition_cols);
145-
} else {
146-
// Append to existing partition columns
147-
let table_partition_cols = Arc::get_mut(&mut self.table_partition_cols).expect(
148-
"Expected to be the sole owner of table_partition_cols since this function accepts mut self",
149-
);
150-
table_partition_cols.extend(partition_cols);
151-
}
143+
// Append to existing partition columns. `Arc::make_mut` copies the
144+
// inner `Vec` if the `Arc` is shared (e.g. with a clone of this
145+
// `TableSchema`) and otherwise mutates in place. The previous
146+
// `Arc::get_mut().expect()` panicked whenever the `Arc` was shared:
147+
// owning `self` does not imply sole ownership of the inner `Arc`.
148+
Arc::make_mut(&mut self.table_partition_cols).extend(partition_cols);
152149
let mut builder = SchemaBuilder::from(self.file_schema.as_ref());
153150
builder.extend(self.table_partition_cols.iter().cloned());
154151
self.table_schema = Arc::new(builder.finish());
@@ -276,4 +273,34 @@ mod tests {
276273
&expected_schema
277274
);
278275
}
276+
277+
#[test]
278+
fn test_with_table_partition_cols_after_clone_does_not_panic() {
279+
// `TableSchema` is cheaply clonable because its partition columns are
280+
// stored behind an `Arc`. Appending more partition columns to a clone
281+
// must not panic just because the `Arc` is shared, and must not mutate
282+
// the other clone (copy-on-write isolation).
283+
let file_schema =
284+
Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)]));
285+
let original = TableSchema::new(
286+
file_schema,
287+
vec![Arc::new(Field::new("country", DataType::Utf8, false))],
288+
);
289+
290+
let cloned = original.clone();
291+
let extended = cloned.with_table_partition_cols(vec![Arc::new(Field::new(
292+
"city",
293+
DataType::Utf8,
294+
false,
295+
))]);
296+
297+
// The extended schema sees both partition columns...
298+
assert_eq!(extended.table_partition_cols().len(), 2);
299+
assert_eq!(extended.table_partition_cols()[0].name(), "country");
300+
assert_eq!(extended.table_partition_cols()[1].name(), "city");
301+
302+
// ...while the original clone is left untouched.
303+
assert_eq!(original.table_partition_cols().len(), 1);
304+
assert_eq!(original.table_partition_cols()[0].name(), "country");
305+
}
279306
}

0 commit comments

Comments
 (0)