Skip to content

Commit c134a84

Browse files
authored
Explicitly declare spill codec dependency in physical-plan (#21917)
## Which issue does this PR close? * Closes #21914. --- ## Rationale for this change The spill subsystem implicitly relied on workspace-level Cargo feature unification for codec support (e.g., `arrow-ipc/zstd`). This created a fragile setup where changes in unrelated dependencies or feature flags could silently break spill compression. This PR establishes an explicit, crate-local contract for codec availability, improving correctness, maintainability, and visibility of dependencies. --- ## What changes are included in this PR? * Add an explicit `arrow-ipc` dependency with `lz4` and `zstd` features in `datafusion-physical-plan` to ensure required codecs are always available. * Document the codec contract directly in `IPCStreamWriter::new`, clarifying expectations and failure modes. * Update compression handling: * Convert `SpillCompression` into `Option<CompressionType>` before passing to `try_with_compression`. * Rename parameter `compression_type` → `spill_compression` for clarity. * Add `arrow-ipc` as a dependency in the workspace lockfile. * Minor cleanup: * Adjust `cargo-machete` ignored field to array format. * Improve comments around `serde_json` usage. --- ## Are these changes tested? No new tests are included in this PR. The changes rely on existing spill-related tests. No modifications to tests were made in this diff. --- ## Are there any user-facing changes? No direct user-facing changes. This PR improves internal reliability and makes spill compression behavior more robust against dependency or feature configuration changes. --- ## LLM-generated code disclosure This PR includes LLM-generated code and comments. All LLM-generated content has been manually reviewed and tested.
1 parent 7d107f0 commit c134a84

5 files changed

Lines changed: 26 additions & 8 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion-cli/Cargo.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ testcontainers-modules = { workspace = true, features = ["minio"] }
8282
# feature unification with dependencies
8383
serde_json = { workspace = true, features = ["preserve_order"] }
8484

85-
# Required because we pull serde_json with a feature to get consistent pg display,
86-
# but its not directly used.
85+
# serde_json is pulled with a feature to get
86+
# consistent pg display, but is not directly used in this crate.
8787
[package.metadata.cargo-machete]
88-
ignored = "serde_json"
88+
ignored = ["serde_json"]

datafusion/physical-plan/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,10 @@ name = "datafusion_physical_plan"
4949
[dependencies]
5050
arrow = { workspace = true }
5151
arrow-data = { workspace = true }
52+
# Spill IPC writes require lz4 and zstd codec support. Keep these features in
53+
# sync with the SpillCompression variants in datafusion-common so codec
54+
# availability is explicit in the crate that owns spill handling.
55+
arrow-ipc = { workspace = true, features = ["lz4", "zstd"] }
5256
arrow-ord = { workspace = true }
5357
arrow-schema = { workspace = true }
5458
async-trait = { workspace = true }

datafusion/physical-plan/src/spill/mod.rs

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ use arrow::ipc::{
4848
};
4949
use arrow::record_batch::RecordBatch;
5050
use arrow_data::ArrayDataBuilder;
51+
use arrow_ipc::CompressionType;
5152

5253
use datafusion_common::config::SpillCompression;
5354
use datafusion_common::{DataFusionError, Result, exec_datafusion_err, exec_err};
@@ -290,10 +291,21 @@ struct IPCStreamWriter {
290291

291292
impl IPCStreamWriter {
292293
/// Create new writer
294+
///
295+
/// # Codec contract
296+
///
297+
/// `arrow-ipc` must be compiled with the `lz4` and `zstd` features
298+
/// (declared explicitly in `datafusion-physical-plan/Cargo.toml`). If
299+
/// those features are absent, `try_with_compression` will return an
300+
/// error at runtime for [`SpillCompression::Lz4Frame`] and
301+
/// [`SpillCompression::Zstd`] variants. The Cargo dependency keeps this
302+
/// contract local and build-visible during Cargo feature resolution,
303+
/// rather than relying solely on workspace-level feature unification;
304+
/// see #21917.
293305
pub fn new(
294306
path: &Path,
295307
schema: &Schema,
296-
compression_type: SpillCompression,
308+
spill_compression: SpillCompression,
297309
) -> Result<Self> {
298310
let file = File::create(path).map_err(|e| {
299311
exec_datafusion_err!("(Hint: you may increase the file descriptor limit with shell command 'ulimit -n 4096') Failed to create partition file at {path:?}: {e:?}")
@@ -308,7 +320,8 @@ impl IPCStreamWriter {
308320
let alignment = get_max_alignment_for_schema(schema);
309321
let mut write_options =
310322
IpcWriteOptions::try_new(alignment, false, metadata_version)?;
311-
write_options = write_options.try_with_compression(compression_type.into())?;
323+
let compression_type = Option::<CompressionType>::from(spill_compression);
324+
write_options = write_options.try_with_compression(compression_type)?;
312325

313326
let writer = StreamWriter::try_new_with_options(file, schema, write_options)?;
314327
Ok(Self {

datafusion/sqllogictest/Cargo.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ harness = false
9494
name = "sqllogictests"
9595
path = "bin/sqllogictests.rs"
9696

97-
# Required because we pull serde_json with a feature to get consistent pg display,
98-
# but its not directly used.
97+
# serde_json is pulled with a feature to get
98+
# consistent pg display, but is not directly used in this crate.
9999
[package.metadata.cargo-machete]
100-
ignored = "serde_json"
100+
ignored = ["serde_json"]

0 commit comments

Comments
 (0)