Skip to content

Commit 4246c25

Browse files
committed
refactor: convert CdcOptions <-> parquet-rs via From impls
Replace the inline field-by-field mapping in the parquet writer path with two `From` impls that encapsulate the enabled-flag <-> Option-presence translation: * `From<&CdcOptions> for Option<parquet::file::properties::CdcOptions>` — a disabled `CdcOptions` maps to `None`, an enabled one to `Some(..)`. * `From<Option<&parquet::file::properties::CdcOptions>> for CdcOptions` — `Some` implies `enabled: true`; `None` yields the disabled default. The writer set/read sites collapse to `content_defined_chunking.into()` and `props.content_defined_chunking().into()` respectively.
1 parent 3796eb7 commit 4246c25

1 file changed

Lines changed: 38 additions & 19 deletions

File tree

datafusion/common/src/file_options/parquet_writer.rs

Lines changed: 38 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ use std::sync::Arc;
2121

2222
use crate::{
2323
_internal_datafusion_err, DataFusionError, Result,
24-
config::{ParquetOptions, TableParquetOptions},
24+
config::{CdcOptions, ParquetOptions, TableParquetOptions},
2525
};
2626

2727
use arrow::datatypes::Schema;
@@ -166,6 +166,41 @@ impl TryFrom<&TableParquetOptions> for WriterPropertiesBuilder {
166166
}
167167
}
168168

169+
/// Convert DataFusion's [`CdcOptions`] into parquet-rs's `Option<CdcOptions>`.
170+
///
171+
/// parquet-rs has no `enabled` flag; CDC is on when the option is `Some`. So a
172+
/// disabled [`CdcOptions`] maps to `None`, and an enabled one to `Some` with the
173+
/// chunking parameters.
174+
impl From<&CdcOptions> for Option<parquet::file::properties::CdcOptions> {
175+
fn from(value: &CdcOptions) -> Self {
176+
value
177+
.enabled
178+
.then_some(parquet::file::properties::CdcOptions {
179+
min_chunk_size: value.min_chunk_size,
180+
max_chunk_size: value.max_chunk_size,
181+
norm_level: value.norm_level,
182+
})
183+
}
184+
}
185+
186+
/// Convert parquet-rs's `Option<&CdcOptions>` back into DataFusion's [`CdcOptions`].
187+
///
188+
/// The presence of parquet-rs options means CDC was enabled, so `Some` maps to
189+
/// `enabled: true`; `None` yields the disabled default.
190+
impl From<Option<&parquet::file::properties::CdcOptions>> for CdcOptions {
191+
fn from(value: Option<&parquet::file::properties::CdcOptions>) -> Self {
192+
match value {
193+
Some(cdc) => CdcOptions {
194+
enabled: true,
195+
min_chunk_size: cdc.min_chunk_size,
196+
max_chunk_size: cdc.max_chunk_size,
197+
norm_level: cdc.norm_level,
198+
},
199+
None => CdcOptions::default(),
200+
}
201+
}
202+
}
203+
169204
impl ParquetOptions {
170205
/// Convert the global session options, [`ParquetOptions`], into a single write action's [`WriterPropertiesBuilder`].
171206
///
@@ -249,15 +284,7 @@ impl ParquetOptions {
249284
if let Some(encoding) = encoding {
250285
builder = builder.set_encoding(parse_encoding_string(encoding)?);
251286
}
252-
if content_defined_chunking.enabled {
253-
builder = builder.set_content_defined_chunking(Some(
254-
parquet::file::properties::CdcOptions {
255-
min_chunk_size: content_defined_chunking.min_chunk_size,
256-
max_chunk_size: content_defined_chunking.max_chunk_size,
257-
norm_level: content_defined_chunking.norm_level,
258-
},
259-
));
260-
}
287+
builder = builder.set_content_defined_chunking(content_defined_chunking.into());
261288

262289
Ok(builder)
263290
}
@@ -592,15 +619,7 @@ mod tests {
592619
skip_arrow_metadata: global_options_defaults.skip_arrow_metadata,
593620
coerce_int96: None,
594621
coerce_int96_tz: None,
595-
content_defined_chunking: props
596-
.content_defined_chunking()
597-
.map(|c| CdcOptions {
598-
enabled: true,
599-
min_chunk_size: c.min_chunk_size,
600-
max_chunk_size: c.max_chunk_size,
601-
norm_level: c.norm_level,
602-
})
603-
.unwrap_or_default(),
622+
content_defined_chunking: props.content_defined_chunking().into(),
604623
},
605624
column_specific_options,
606625
key_value_metadata,

0 commit comments

Comments
 (0)