Skip to content
Open
207 changes: 182 additions & 25 deletions datafusion/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2862,28 +2862,42 @@ impl ConfigField for ConfigFileEncryptionProperties {
}

#[cfg(feature = "parquet_encryption")]
impl From<ConfigFileEncryptionProperties> for FileEncryptionProperties {
fn from(val: ConfigFileEncryptionProperties) -> Self {
impl TryFrom<ConfigFileEncryptionProperties> for FileEncryptionProperties {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we need entry in upgrade guide.

Copy link
Copy Markdown
Author

@Soham-Bhattacharjee-work Soham-Bhattacharjee-work May 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

type Error = DataFusionError;

fn try_from(val: ConfigFileEncryptionProperties) -> Result<Self> {
let mut fep = FileEncryptionProperties::builder(
hex::decode(val.footer_key_as_hex).unwrap(),
hex::decode(val.footer_key_as_hex)
.map_err(|e| {
DataFusionError::Configuration(format!("Unable to decode hex footer key from ConfigFileEncryptionProperties: {e}"))
})?,
)
.with_plaintext_footer(!val.encrypt_footer)
.with_aad_prefix_storage(val.store_aad_prefix);

if !val.footer_key_metadata_as_hex.is_empty() {
fep = fep.with_footer_key_metadata(
hex::decode(&val.footer_key_metadata_as_hex)
.expect("Invalid footer key metadata"),
.map_err(|e| {
DataFusionError::Configuration(format!("Unable to decode hex footer key metadata from ConfigFileEncryptionProperties: {e}"))
})?,
);
}

for (column_name, encryption_props) in val.column_encryption_properties.iter() {
let encryption_key = hex::decode(&encryption_props.column_key_as_hex)
.expect("Invalid column encryption key");
.map_err(|e| {
DataFusionError::Configuration(format!("Unable to decode hex encryption key for column {column_name}: {e}"))
})?;
let key_metadata = encryption_props
.column_metadata_as_hex
.as_ref()
.map(|x| hex::decode(x).expect("Invalid column metadata"));
.map(hex::decode)
.transpose()
.map_err(|e| {
DataFusionError::Configuration(format!("Unable to decode hex column metadata for column {column_name}: {e}"))
})?;

match key_metadata {
Some(key_metadata) => {
fep = fep.with_column_key_and_metadata(
Expand All @@ -2899,11 +2913,18 @@ impl From<ConfigFileEncryptionProperties> for FileEncryptionProperties {
}

if !val.aad_prefix_as_hex.is_empty() {
let aad_prefix: Vec<u8> =
hex::decode(&val.aad_prefix_as_hex).expect("Invalid AAD prefix");
let aad_prefix: Vec<u8> = hex::decode(&val.aad_prefix_as_hex).map_err(|e| {
DataFusionError::Configuration(format!(
"Unable to decode hex AAD prefix from ConfigFileEncryptionProperties: {e}"
))
})?;
fep = fep.with_aad_prefix(aad_prefix);
}
Arc::unwrap_or_clone(fep.build().unwrap())
Ok(Arc::unwrap_or_clone(fep.build().map_err(|e| {
DataFusionError::Configuration(format!(
"Could not build FileEncryptionProperties: {e}"
))
})?))
}
}

Expand Down Expand Up @@ -3019,36 +3040,54 @@ impl ConfigField for ConfigFileDecryptionProperties {
}

#[cfg(feature = "parquet_encryption")]
impl From<ConfigFileDecryptionProperties> for FileDecryptionProperties {
fn from(val: ConfigFileDecryptionProperties) -> Self {
impl TryFrom<ConfigFileDecryptionProperties> for FileDecryptionProperties {
type Error = DataFusionError;

fn try_from(val: ConfigFileDecryptionProperties) -> Result<Self> {
let mut column_names: Vec<&str> = Vec::new();
let mut column_keys: Vec<Vec<u8>> = Vec::new();

for (col_name, decryption_properties) in val.column_decryption_properties.iter() {
let column_key = hex::decode(&decryption_properties.column_key_as_hex).map_err(|e| {
DataFusionError::Configuration(format!
("Could not decode hex column key from ConfigFileDecryptionProperties for column name {col_name}: {e}."))
})?;
column_names.push(col_name.as_str());
column_keys.push(
hex::decode(&decryption_properties.column_key_as_hex)
.expect("Invalid column decryption key"),
);
column_keys.push(column_key);
}

let mut fep = FileDecryptionProperties::builder(
hex::decode(val.footer_key_as_hex).expect("Invalid footer key"),
)
.with_column_keys(column_names, column_keys)
.unwrap();
let footer_key = hex::decode(val.footer_key_as_hex).map_err(|e| {
DataFusionError::Configuration(format!(
"Could not decode hex footer key from ConfigFileDecryptionProperties: {e}."
))
})?;

let mut fep = FileDecryptionProperties::builder(footer_key)
.with_column_keys(column_names, column_keys)
.map_err(|e| {
DataFusionError::Configuration(format!(
"Could not set column keys on FileDecryptionPropertiesBuilder: {e}."
))
})?;

if !val.footer_signature_verification {
fep = fep.disable_footer_signature_verification();
}

if !val.aad_prefix_as_hex.is_empty() {
let aad_prefix =
hex::decode(&val.aad_prefix_as_hex).expect("Invalid AAD prefix");
let aad_prefix = hex::decode(&val.aad_prefix_as_hex).map_err(|e| {
DataFusionError::Configuration(format!(
"Could not decode hex AAD prefix from ConfigFileDecryptionProperties: {e}."
))
})?;
fep = fep.with_aad_prefix(aad_prefix);
}

Arc::unwrap_or_clone(fep.build().unwrap())
Ok(Arc::unwrap_or_clone(fep.build().map_err(|e| {
DataFusionError::Configuration(format!(
"Could not build FileDecryptionProperties: {e}."
))
})?))
}
}

Expand Down Expand Up @@ -3586,13 +3625,13 @@ mod tests {
let config_encrypt =
ConfigFileEncryptionProperties::from(&file_encryption_properties);
let encryption_properties_built =
Arc::new(FileEncryptionProperties::from(config_encrypt.clone()));
Arc::new(FileEncryptionProperties::try_from(config_encrypt.clone()).unwrap());
assert_eq!(file_encryption_properties, encryption_properties_built);

let config_decrypt =
ConfigFileDecryptionProperties::try_from(&decryption_properties).unwrap();
let decryption_properties_built =
Arc::new(FileDecryptionProperties::from(config_decrypt.clone()));
Arc::new(FileDecryptionProperties::try_from(config_decrypt.clone()).unwrap());
assert_eq!(decryption_properties, decryption_properties_built);

///////////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -3678,6 +3717,124 @@ mod tests {
);
}

#[cfg(feature = "parquet_encryption")]
#[test]
fn parquet_encryption_invalid_hex_errors_encryption() {
use crate::config::ColumnEncryptionProperties;
use crate::config::ConfigFileEncryptionProperties;
use parquet::encryption::encrypt::FileEncryptionProperties;
use std::collections::HashMap;

let valid_footer_key_as_hex = hex::encode(b"0123456789012345");

let mut enc = ConfigFileEncryptionProperties {
encrypt_footer: true,
footer_key_as_hex: valid_footer_key_as_hex.clone(),
footer_key_metadata_as_hex: String::new(),
column_encryption_properties: HashMap::new(),
aad_prefix_as_hex: String::new(),
store_aad_prefix: false,
};

// Encryption: invalid footer key hex
enc.footer_key_as_hex = "not_hex".to_string();
let err = FileEncryptionProperties::try_from(enc.clone())
.unwrap_err()
.to_string();
assert!(err.contains("Unable to decode hex footer key"));
enc.footer_key_as_hex = valid_footer_key_as_hex.clone();

// Encryption: invalid footer key metadata hex
enc.footer_key_metadata_as_hex = "zz".to_string();
let err = FileEncryptionProperties::try_from(enc.clone())
.unwrap_err()
.to_string();
assert!(err.contains("Unable to decode hex footer key metadata"));
enc.footer_key_metadata_as_hex = String::new();

// Encryption: invalid column key hex
enc.column_encryption_properties.insert(
"col1".to_string(),
ColumnEncryptionProperties {
column_key_as_hex: "bad".to_string(),
column_metadata_as_hex: None,
},
);
let err = FileEncryptionProperties::try_from(enc.clone())
.unwrap_err()
.to_string();
assert!(err.contains("Unable to decode hex encryption key for column col1"));
enc.column_encryption_properties.clear();

// Encryption: invalid column metadata hex
enc.column_encryption_properties.insert(
"col1".to_string(),
ColumnEncryptionProperties {
column_key_as_hex: hex::encode(b"1234567890123450"),
column_metadata_as_hex: Some("zz".to_string()),
},
);
let err = FileEncryptionProperties::try_from(enc.clone())
.unwrap_err()
.to_string();
assert!(err.contains("Unable to decode hex column metadata for column col1"));
enc.column_encryption_properties.clear();

// Encryption: invalid AAD prefix hex
enc.aad_prefix_as_hex = "zz".to_string();
let err = FileEncryptionProperties::try_from(enc.clone())
.unwrap_err()
.to_string();
assert!(err.contains("Unable to decode hex AAD prefix"));
}

#[cfg(feature = "parquet_encryption")]
#[test]
fn parquet_encryption_invalid_hex_errors_decryption() {
use crate::config::ColumnDecryptionProperties;
use crate::config::ConfigFileDecryptionProperties;
use parquet::encryption::decrypt::FileDecryptionProperties;
use std::collections::HashMap;

let valid_footer_key_as_hex = hex::encode(b"0123456789012345");

let mut dec = ConfigFileDecryptionProperties {
footer_key_as_hex: valid_footer_key_as_hex.clone(),
column_decryption_properties: HashMap::new(),
aad_prefix_as_hex: String::new(),
footer_signature_verification: true,
};

// Decryption: invalid column key hex
dec.column_decryption_properties.insert(
"col1".to_string(),
ColumnDecryptionProperties {
column_key_as_hex: "bad".to_string(),
},
);
let err = FileDecryptionProperties::try_from(dec.clone())
.unwrap_err()
.to_string();
assert!(err.contains("Could not decode hex column key"));
assert!(err.contains("col1"));
dec.column_decryption_properties.clear();

// Decryption: invalid footer key hex
dec.footer_key_as_hex = "bad".to_string();
let err = FileDecryptionProperties::try_from(dec.clone())
.unwrap_err()
.to_string();
assert!(err.contains("Could not decode hex footer key"));
dec.footer_key_as_hex = valid_footer_key_as_hex;

// Decryption: invalid AAD prefix hex
dec.aad_prefix_as_hex = "zz".to_string();
let err = FileDecryptionProperties::try_from(dec.clone())
.unwrap_err()
.to_string();
assert!(err.contains("Could not decode hex AAD prefix"));
}

#[cfg(feature = "parquet_encryption")]
#[test]
fn parquet_encryption_factory_config() {
Expand Down
4 changes: 2 additions & 2 deletions datafusion/datasource-parquet/src/file_format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ async fn get_file_decryption_properties(
file_path: &Path,
) -> Result<Option<Arc<FileDecryptionProperties>>> {
Ok(match &options.crypto.file_decryption {
Some(cfd) => Some(Arc::new(FileDecryptionProperties::from(cfd.clone()))),
Some(cfd) => Some(Arc::new(FileDecryptionProperties::try_from(cfd.clone())?)),
None => match &options.crypto.factory_id {
Some(factory_id) => {
let factory =
Expand Down Expand Up @@ -1284,7 +1284,7 @@ async fn set_writer_encryption_properties(
if let Some(file_encryption_properties) = parquet_opts.crypto.file_encryption {
// Encryption properties have been specified directly
return Ok(builder.with_file_encryption_properties(Arc::new(
FileEncryptionProperties::from(file_encryption_properties),
FileEncryptionProperties::try_from(file_encryption_properties)?,
)));
} else if let Some(encryption_factory_id) = &parquet_opts.crypto.factory_id.as_ref() {
// Encryption properties will be generated by an encryption factory
Expand Down
3 changes: 2 additions & 1 deletion datafusion/datasource-parquet/src/source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -543,7 +543,8 @@ impl FileSource for ParquetSource {
.crypto
.file_decryption
.clone()
.map(FileDecryptionProperties::from)
.map(FileDecryptionProperties::try_from)
.transpose()?
.map(Arc::new);

let coerce_int96 = self
Expand Down
42 changes: 42 additions & 0 deletions docs/source/library-user-guide/upgrading/54.0.0.md
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,48 @@ let config_decryption_properties = ConfigFileDecryptionProperties::try_from(&dec
See [#21602](https://github.com/apache/datafusion/issues/21602) and
[PR #21603](https://github.com/apache/datafusion/pull/21603) for details.

### Conversion from `ConfigFileEncryptionProperties` / `ConfigFileDecryptionProperties` is now fallible

Previously, `datafusion_common::config::ConfigFileEncryptionProperties` and
`datafusion_common::config::ConfigFileDecryptionProperties` implemented infallible
conversions into Parquet's encryption/decryption types (via `From` / `Into`).
These conversions may need to decode hex-encoded keys and other configuration values, which can fail.

They now use `TryFrom` / `TryInto` and return a `Result`:

- `impl TryFrom<ConfigFileEncryptionProperties> for parquet::encryption::encrypt::FileEncryptionProperties`
- `impl TryFrom<ConfigFileDecryptionProperties> for parquet::encryption::decrypt::FileDecryptionProperties`

**Migration guide:**

Replace `from()` / `into()` with `try_from()` / `try_into()` and handle the resulting `Result`.

**Before:**

```rust,ignore
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This upgrade guide section feels too long for the actual API change. The migration is just replacing from/into with try_from/try_into and handling the Result. Could we shorten this to a small before/after snippet instead of showing the full encryption and decryption config setup? Take inspiration from the work just above for the similar change.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

let file_encryption_properties: FileEncryptionProperties = config_encryption_properties.into();
// or
let file_decryption_properties = FileDecryptionProperties::from(config_decryption_properties);
```

(
where `config_encryption_properties` is a `ConfigFileEncryptionProperties` and
`config_decryption_properties` is a `ConfigFileDecryptionProperties`
)

**After:**

```rust,ignore
let file_encryption_properties: FileEncryptionProperties =
config_encryption_properties.try_into()?;
// or
let file_decryption_properties =
FileDecryptionProperties::try_from(config_decryption_properties)?;
```

See [#21974](https://github.com/apache/datafusion/issues/21974) and
[PR #21985](https://github.com/apache/datafusion/pull/21985) for details.

### `approx_percentile_cont`, `approx_percentile_cont_with_weight`, `approx_median` now coerce to floats

The type signatures of `approx_percentile_cont`, `approx_percentile_cont_with_weight`, and
Expand Down
Loading