Skip to content

Commit 5b2209e

Browse files
committed
refactor: un-nest proto CDC message as top-level ParquetCdcOptions
Replace the nested `ParquetOptions.CdcOptions` proto message with a top-level `ParquetCdcOptions` message, matching the config type name. The explicit name carries the parquet context, so nesting is no longer needed, and config <-> proto names line up. Defined after `ParquetOptions` as the original standalone message was. Field tags are unchanged (message tags 1-4, parent field 35), so the wire format is unaffected. Regenerated prost/pbjson for proto-common and proto-models and updated the `From` mappings to the top-level type.
1 parent 85bcfa1 commit 5b2209e

7 files changed

Lines changed: 195 additions & 197 deletions

File tree

datafusion/proto-common/proto/datafusion_common.proto

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -627,7 +627,7 @@ message ParquetOptions {
627627
uint64 max_predicate_cache_size = 33;
628628
}
629629

630-
CdcOptions content_defined_chunking = 35;
630+
ParquetCdcOptions content_defined_chunking = 35;
631631

632632
// Optional timezone applied to INT96-coerced timestamps when `coerce_int96`
633633
// is set. When `Some`, INT96 columns coerce to
@@ -636,15 +636,14 @@ message ParquetOptions {
636636
oneof coerce_int96_tz_opt {
637637
string coerce_int96_tz = 36;
638638
}
639+
}
639640

640-
// Content-defined chunking (CDC) options. Nested here as it is a parquet
641-
// write option.
642-
message CdcOptions {
643-
bool enabled = 1;
644-
uint64 min_chunk_size = 2;
645-
uint64 max_chunk_size = 3;
646-
int32 norm_level = 4;
647-
}
641+
// Content-defined chunking (CDC) options for writing parquet files.
642+
message ParquetCdcOptions {
643+
bool enabled = 1;
644+
uint64 min_chunk_size = 2;
645+
uint64 max_chunk_size = 3;
646+
int32 norm_level = 4;
648647
}
649648

650649
enum JoinSide {

datafusion/proto-common/src/from_proto/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1135,8 +1135,8 @@ impl TryFrom<&protobuf::ParquetOptions> for ParquetOptions {
11351135
}
11361136
}
11371137

1138-
impl From<protobuf::parquet_options::CdcOptions> for ParquetCdcOptions {
1139-
fn from(value: protobuf::parquet_options::CdcOptions) -> Self {
1138+
impl From<protobuf::ParquetCdcOptions> for ParquetCdcOptions {
1139+
fn from(value: protobuf::ParquetCdcOptions) -> Self {
11401140
ParquetCdcOptions {
11411141
enabled: value.enabled,
11421142
min_chunk_size: value.min_chunk_size as usize,

datafusion/proto-common/src/generated/pbjson.rs

Lines changed: 155 additions & 155 deletions
Original file line numberDiff line numberDiff line change
@@ -5758,6 +5758,161 @@ impl<'de> serde::Deserialize<'de> for NullEquality {
57585758
deserializer.deserialize_any(GeneratedVisitor)
57595759
}
57605760
}
5761+
impl serde::Serialize for ParquetCdcOptions {
5762+
#[allow(deprecated)]
5763+
fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
5764+
where
5765+
S: serde::Serializer,
5766+
{
5767+
use serde::ser::SerializeStruct;
5768+
let mut len = 0;
5769+
if self.enabled {
5770+
len += 1;
5771+
}
5772+
if self.min_chunk_size != 0 {
5773+
len += 1;
5774+
}
5775+
if self.max_chunk_size != 0 {
5776+
len += 1;
5777+
}
5778+
if self.norm_level != 0 {
5779+
len += 1;
5780+
}
5781+
let mut struct_ser = serializer.serialize_struct("datafusion_common.ParquetCdcOptions", len)?;
5782+
if self.enabled {
5783+
struct_ser.serialize_field("enabled", &self.enabled)?;
5784+
}
5785+
if self.min_chunk_size != 0 {
5786+
#[allow(clippy::needless_borrow)]
5787+
#[allow(clippy::needless_borrows_for_generic_args)]
5788+
struct_ser.serialize_field("minChunkSize", ToString::to_string(&self.min_chunk_size).as_str())?;
5789+
}
5790+
if self.max_chunk_size != 0 {
5791+
#[allow(clippy::needless_borrow)]
5792+
#[allow(clippy::needless_borrows_for_generic_args)]
5793+
struct_ser.serialize_field("maxChunkSize", ToString::to_string(&self.max_chunk_size).as_str())?;
5794+
}
5795+
if self.norm_level != 0 {
5796+
struct_ser.serialize_field("normLevel", &self.norm_level)?;
5797+
}
5798+
struct_ser.end()
5799+
}
5800+
}
5801+
impl<'de> serde::Deserialize<'de> for ParquetCdcOptions {
5802+
#[allow(deprecated)]
5803+
fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
5804+
where
5805+
D: serde::Deserializer<'de>,
5806+
{
5807+
const FIELDS: &[&str] = &[
5808+
"enabled",
5809+
"min_chunk_size",
5810+
"minChunkSize",
5811+
"max_chunk_size",
5812+
"maxChunkSize",
5813+
"norm_level",
5814+
"normLevel",
5815+
];
5816+
5817+
#[allow(clippy::enum_variant_names)]
5818+
enum GeneratedField {
5819+
Enabled,
5820+
MinChunkSize,
5821+
MaxChunkSize,
5822+
NormLevel,
5823+
}
5824+
impl<'de> serde::Deserialize<'de> for GeneratedField {
5825+
fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
5826+
where
5827+
D: serde::Deserializer<'de>,
5828+
{
5829+
struct GeneratedVisitor;
5830+
5831+
impl serde::de::Visitor<'_> for GeneratedVisitor {
5832+
type Value = GeneratedField;
5833+
5834+
fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
5835+
write!(formatter, "expected one of: {:?}", &FIELDS)
5836+
}
5837+
5838+
#[allow(unused_variables)]
5839+
fn visit_str<E>(self, value: &str) -> std::result::Result<GeneratedField, E>
5840+
where
5841+
E: serde::de::Error,
5842+
{
5843+
match value {
5844+
"enabled" => Ok(GeneratedField::Enabled),
5845+
"minChunkSize" | "min_chunk_size" => Ok(GeneratedField::MinChunkSize),
5846+
"maxChunkSize" | "max_chunk_size" => Ok(GeneratedField::MaxChunkSize),
5847+
"normLevel" | "norm_level" => Ok(GeneratedField::NormLevel),
5848+
_ => Err(serde::de::Error::unknown_field(value, FIELDS)),
5849+
}
5850+
}
5851+
}
5852+
deserializer.deserialize_identifier(GeneratedVisitor)
5853+
}
5854+
}
5855+
struct GeneratedVisitor;
5856+
impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
5857+
type Value = ParquetCdcOptions;
5858+
5859+
fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
5860+
formatter.write_str("struct datafusion_common.ParquetCdcOptions")
5861+
}
5862+
5863+
fn visit_map<V>(self, mut map_: V) -> std::result::Result<ParquetCdcOptions, V::Error>
5864+
where
5865+
V: serde::de::MapAccess<'de>,
5866+
{
5867+
let mut enabled__ = None;
5868+
let mut min_chunk_size__ = None;
5869+
let mut max_chunk_size__ = None;
5870+
let mut norm_level__ = None;
5871+
while let Some(k) = map_.next_key()? {
5872+
match k {
5873+
GeneratedField::Enabled => {
5874+
if enabled__.is_some() {
5875+
return Err(serde::de::Error::duplicate_field("enabled"));
5876+
}
5877+
enabled__ = Some(map_.next_value()?);
5878+
}
5879+
GeneratedField::MinChunkSize => {
5880+
if min_chunk_size__.is_some() {
5881+
return Err(serde::de::Error::duplicate_field("minChunkSize"));
5882+
}
5883+
min_chunk_size__ =
5884+
Some(map_.next_value::<::pbjson::private::NumberDeserialize<_>>()?.0)
5885+
;
5886+
}
5887+
GeneratedField::MaxChunkSize => {
5888+
if max_chunk_size__.is_some() {
5889+
return Err(serde::de::Error::duplicate_field("maxChunkSize"));
5890+
}
5891+
max_chunk_size__ =
5892+
Some(map_.next_value::<::pbjson::private::NumberDeserialize<_>>()?.0)
5893+
;
5894+
}
5895+
GeneratedField::NormLevel => {
5896+
if norm_level__.is_some() {
5897+
return Err(serde::de::Error::duplicate_field("normLevel"));
5898+
}
5899+
norm_level__ =
5900+
Some(map_.next_value::<::pbjson::private::NumberDeserialize<_>>()?.0)
5901+
;
5902+
}
5903+
}
5904+
}
5905+
Ok(ParquetCdcOptions {
5906+
enabled: enabled__.unwrap_or_default(),
5907+
min_chunk_size: min_chunk_size__.unwrap_or_default(),
5908+
max_chunk_size: max_chunk_size__.unwrap_or_default(),
5909+
norm_level: norm_level__.unwrap_or_default(),
5910+
})
5911+
}
5912+
}
5913+
deserializer.deserialize_struct("datafusion_common.ParquetCdcOptions", FIELDS, GeneratedVisitor)
5914+
}
5915+
}
57615916
impl serde::Serialize for ParquetColumnOptions {
57625917
#[allow(deprecated)]
57635918
fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
@@ -6955,161 +7110,6 @@ impl<'de> serde::Deserialize<'de> for ParquetOptions {
69557110
deserializer.deserialize_struct("datafusion_common.ParquetOptions", FIELDS, GeneratedVisitor)
69567111
}
69577112
}
6958-
impl serde::Serialize for parquet_options::CdcOptions {
6959-
#[allow(deprecated)]
6960-
fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
6961-
where
6962-
S: serde::Serializer,
6963-
{
6964-
use serde::ser::SerializeStruct;
6965-
let mut len = 0;
6966-
if self.enabled {
6967-
len += 1;
6968-
}
6969-
if self.min_chunk_size != 0 {
6970-
len += 1;
6971-
}
6972-
if self.max_chunk_size != 0 {
6973-
len += 1;
6974-
}
6975-
if self.norm_level != 0 {
6976-
len += 1;
6977-
}
6978-
let mut struct_ser = serializer.serialize_struct("datafusion_common.ParquetOptions.CdcOptions", len)?;
6979-
if self.enabled {
6980-
struct_ser.serialize_field("enabled", &self.enabled)?;
6981-
}
6982-
if self.min_chunk_size != 0 {
6983-
#[allow(clippy::needless_borrow)]
6984-
#[allow(clippy::needless_borrows_for_generic_args)]
6985-
struct_ser.serialize_field("minChunkSize", ToString::to_string(&self.min_chunk_size).as_str())?;
6986-
}
6987-
if self.max_chunk_size != 0 {
6988-
#[allow(clippy::needless_borrow)]
6989-
#[allow(clippy::needless_borrows_for_generic_args)]
6990-
struct_ser.serialize_field("maxChunkSize", ToString::to_string(&self.max_chunk_size).as_str())?;
6991-
}
6992-
if self.norm_level != 0 {
6993-
struct_ser.serialize_field("normLevel", &self.norm_level)?;
6994-
}
6995-
struct_ser.end()
6996-
}
6997-
}
6998-
impl<'de> serde::Deserialize<'de> for parquet_options::CdcOptions {
6999-
#[allow(deprecated)]
7000-
fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
7001-
where
7002-
D: serde::Deserializer<'de>,
7003-
{
7004-
const FIELDS: &[&str] = &[
7005-
"enabled",
7006-
"min_chunk_size",
7007-
"minChunkSize",
7008-
"max_chunk_size",
7009-
"maxChunkSize",
7010-
"norm_level",
7011-
"normLevel",
7012-
];
7013-
7014-
#[allow(clippy::enum_variant_names)]
7015-
enum GeneratedField {
7016-
Enabled,
7017-
MinChunkSize,
7018-
MaxChunkSize,
7019-
NormLevel,
7020-
}
7021-
impl<'de> serde::Deserialize<'de> for GeneratedField {
7022-
fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
7023-
where
7024-
D: serde::Deserializer<'de>,
7025-
{
7026-
struct GeneratedVisitor;
7027-
7028-
impl serde::de::Visitor<'_> for GeneratedVisitor {
7029-
type Value = GeneratedField;
7030-
7031-
fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
7032-
write!(formatter, "expected one of: {:?}", &FIELDS)
7033-
}
7034-
7035-
#[allow(unused_variables)]
7036-
fn visit_str<E>(self, value: &str) -> std::result::Result<GeneratedField, E>
7037-
where
7038-
E: serde::de::Error,
7039-
{
7040-
match value {
7041-
"enabled" => Ok(GeneratedField::Enabled),
7042-
"minChunkSize" | "min_chunk_size" => Ok(GeneratedField::MinChunkSize),
7043-
"maxChunkSize" | "max_chunk_size" => Ok(GeneratedField::MaxChunkSize),
7044-
"normLevel" | "norm_level" => Ok(GeneratedField::NormLevel),
7045-
_ => Err(serde::de::Error::unknown_field(value, FIELDS)),
7046-
}
7047-
}
7048-
}
7049-
deserializer.deserialize_identifier(GeneratedVisitor)
7050-
}
7051-
}
7052-
struct GeneratedVisitor;
7053-
impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
7054-
type Value = parquet_options::CdcOptions;
7055-
7056-
fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
7057-
formatter.write_str("struct datafusion_common.ParquetOptions.CdcOptions")
7058-
}
7059-
7060-
fn visit_map<V>(self, mut map_: V) -> std::result::Result<parquet_options::CdcOptions, V::Error>
7061-
where
7062-
V: serde::de::MapAccess<'de>,
7063-
{
7064-
let mut enabled__ = None;
7065-
let mut min_chunk_size__ = None;
7066-
let mut max_chunk_size__ = None;
7067-
let mut norm_level__ = None;
7068-
while let Some(k) = map_.next_key()? {
7069-
match k {
7070-
GeneratedField::Enabled => {
7071-
if enabled__.is_some() {
7072-
return Err(serde::de::Error::duplicate_field("enabled"));
7073-
}
7074-
enabled__ = Some(map_.next_value()?);
7075-
}
7076-
GeneratedField::MinChunkSize => {
7077-
if min_chunk_size__.is_some() {
7078-
return Err(serde::de::Error::duplicate_field("minChunkSize"));
7079-
}
7080-
min_chunk_size__ =
7081-
Some(map_.next_value::<::pbjson::private::NumberDeserialize<_>>()?.0)
7082-
;
7083-
}
7084-
GeneratedField::MaxChunkSize => {
7085-
if max_chunk_size__.is_some() {
7086-
return Err(serde::de::Error::duplicate_field("maxChunkSize"));
7087-
}
7088-
max_chunk_size__ =
7089-
Some(map_.next_value::<::pbjson::private::NumberDeserialize<_>>()?.0)
7090-
;
7091-
}
7092-
GeneratedField::NormLevel => {
7093-
if norm_level__.is_some() {
7094-
return Err(serde::de::Error::duplicate_field("normLevel"));
7095-
}
7096-
norm_level__ =
7097-
Some(map_.next_value::<::pbjson::private::NumberDeserialize<_>>()?.0)
7098-
;
7099-
}
7100-
}
7101-
}
7102-
Ok(parquet_options::CdcOptions {
7103-
enabled: enabled__.unwrap_or_default(),
7104-
min_chunk_size: min_chunk_size__.unwrap_or_default(),
7105-
max_chunk_size: max_chunk_size__.unwrap_or_default(),
7106-
norm_level: norm_level__.unwrap_or_default(),
7107-
})
7108-
}
7109-
}
7110-
deserializer.deserialize_struct("datafusion_common.ParquetOptions.CdcOptions", FIELDS, GeneratedVisitor)
7111-
}
7112-
}
71137113
impl serde::Serialize for Precision {
71147114
#[allow(deprecated)]
71157115
fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>

0 commit comments

Comments
 (0)