Skip to content

Commit bb86364

Browse files
adamreevekumarUjjawalnuno-faria
authored
fix: Make conversion from FileDecryptionProperties to ConfigFileDecryptionProperties fallible (#21603)
## Which issue does this PR close? - Closes #21602. ## Rationale for this change Fail quickly with a helpful error if we're unable to represent a `FileDecryptionProperties` instance as `ConfigFileDecryptionProperties` ## What changes are included in this PR? * Change the implementation of `From<&Arc<FileDecryptionProperties>>` for `ConfigFileDecryptionProperties` to `TryFrom`. * Fail the conversion if we can't get the footer key from the `FileDecryptionProperties` with empty metadata ## Are these changes tested? Yes I've added a new unit test. I also tested this with a branch of delta-rs that uses Datafusion with Parquet encryption, and this required only minor changes to tests and examples: corwinjoy/delta-rs@file_format_options_squashed...adamreeve:delta-rs:test-datafusion-change ## Are there any user-facing changes? Yes, this is a breaking API change. --------- Co-authored-by: Kumar Ujjawal <ujjawalpathak6@gmail.com> Co-authored-by: Nuno Faria <nunofpfaria@gmail.com>
1 parent d59bc72 commit bb86364

5 files changed

Lines changed: 95 additions & 13 deletions

File tree

datafusion-examples/examples/data_io/parquet_encrypted.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ pub async fn parquet_encrypted() -> datafusion::common::Result<()> {
7171
// Read encrypted parquet back as a DataFrame using matching decryption config
7272
let ctx: SessionContext = SessionContext::new();
7373
let read_options =
74-
ParquetReadOptions::default().file_decryption_properties((&decrypt).into());
74+
ParquetReadOptions::default().file_decryption_properties((&decrypt).try_into()?);
7575

7676
let encrypted_parquet_df = ctx
7777
.read_parquet(tempfile.to_str().unwrap(), read_options)

datafusion/common/src/config.rs

Lines changed: 53 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3053,8 +3053,18 @@ impl From<ConfigFileDecryptionProperties> for FileDecryptionProperties {
30533053
}
30543054

30553055
#[cfg(feature = "parquet_encryption")]
3056-
impl From<&Arc<FileDecryptionProperties>> for ConfigFileDecryptionProperties {
3057-
fn from(f: &Arc<FileDecryptionProperties>) -> Self {
3056+
impl TryFrom<&Arc<FileDecryptionProperties>> for ConfigFileDecryptionProperties {
3057+
type Error = DataFusionError;
3058+
3059+
fn try_from(f: &Arc<FileDecryptionProperties>) -> Result<Self> {
3060+
let footer_key = f.footer_key(None).map_err(|e| {
3061+
DataFusionError::Configuration(format!(
3062+
"Could not retrieve footer key from FileDecryptionProperties. \
3063+
Note that conversion to ConfigFileDecryptionProperties is not supported \
3064+
when using a key retriever: {e}"
3065+
))
3066+
})?;
3067+
30583068
let (column_names_vec, column_keys_vec) = f.column_keys();
30593069
let mut column_decryption_properties: HashMap<
30603070
String,
@@ -3068,14 +3078,12 @@ impl From<&Arc<FileDecryptionProperties>> for ConfigFileDecryptionProperties {
30683078
}
30693079

30703080
let aad_prefix = f.aad_prefix().cloned().unwrap_or_default();
3071-
ConfigFileDecryptionProperties {
3072-
footer_key_as_hex: hex::encode(
3073-
f.footer_key(None).unwrap_or_default().as_ref(),
3074-
),
3081+
Ok(ConfigFileDecryptionProperties {
3082+
footer_key_as_hex: hex::encode(footer_key.as_ref()),
30753083
column_decryption_properties,
30763084
aad_prefix_as_hex: hex::encode(aad_prefix),
30773085
footer_signature_verification: f.check_plaintext_footer_integrity(),
3078-
}
3086+
})
30793087
}
30803088
}
30813089

@@ -3581,7 +3589,8 @@ mod tests {
35813589
Arc::new(FileEncryptionProperties::from(config_encrypt.clone()));
35823590
assert_eq!(file_encryption_properties, encryption_properties_built);
35833591

3584-
let config_decrypt = ConfigFileDecryptionProperties::from(&decryption_properties);
3592+
let config_decrypt =
3593+
ConfigFileDecryptionProperties::try_from(&decryption_properties).unwrap();
35853594
let decryption_properties_built =
35863595
Arc::new(FileDecryptionProperties::from(config_decrypt.clone()));
35873596
assert_eq!(decryption_properties, decryption_properties_built);
@@ -3699,6 +3708,42 @@ mod tests {
36993708
assert_eq!(factory_options.get("key2"), Some(&"value 2".to_string()));
37003709
}
37013710

3711+
#[cfg(feature = "parquet_encryption")]
3712+
struct ParquetEncryptionKeyRetriever {}
3713+
3714+
#[cfg(feature = "parquet_encryption")]
3715+
impl parquet::encryption::decrypt::KeyRetriever for ParquetEncryptionKeyRetriever {
3716+
fn retrieve_key(&self, key_metadata: &[u8]) -> parquet::errors::Result<Vec<u8>> {
3717+
if !key_metadata.is_empty() {
3718+
Ok(b"1234567890123450".to_vec())
3719+
} else {
3720+
Err(parquet::errors::ParquetError::General(
3721+
"Key metadata not provided".to_string(),
3722+
))
3723+
}
3724+
}
3725+
}
3726+
3727+
#[cfg(feature = "parquet_encryption")]
3728+
#[test]
3729+
fn conversion_from_key_retriever_to_config_file_decryption_properties() {
3730+
use crate::Result;
3731+
use crate::config::ConfigFileDecryptionProperties;
3732+
use crate::encryption::FileDecryptionProperties;
3733+
3734+
let retriever = std::sync::Arc::new(ParquetEncryptionKeyRetriever {});
3735+
let decryption_properties =
3736+
FileDecryptionProperties::with_key_retriever(retriever)
3737+
.build()
3738+
.unwrap();
3739+
let config_file_decryption_properties: Result<ConfigFileDecryptionProperties> =
3740+
(&decryption_properties).try_into();
3741+
assert!(config_file_decryption_properties.is_err());
3742+
let err = config_file_decryption_properties.unwrap_err().to_string();
3743+
assert!(err.contains("key retriever"));
3744+
assert!(err.contains("Key metadata not provided"));
3745+
}
3746+
37023747
#[cfg(feature = "parquet")]
37033748
#[test]
37043749
fn parquet_table_options_config_entry() {

datafusion/core/src/dataframe/parquet.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -312,8 +312,8 @@ mod tests {
312312

313313
// Read encrypted parquet
314314
let ctx: SessionContext = SessionContext::new();
315-
let read_options =
316-
ParquetReadOptions::default().file_decryption_properties((&decrypt).into());
315+
let read_options = ParquetReadOptions::default()
316+
.file_decryption_properties((&decrypt).try_into()?);
317317

318318
ctx.register_parquet("roundtrip_parquet", &tempfile_str, read_options.clone())
319319
.await?;

datafusion/core/tests/parquet/encryption.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,8 @@ async fn round_trip_encryption() {
115115

116116
// Read encrypted parquet
117117
let ctx: SessionContext = SessionContext::new();
118-
let options =
119-
ParquetReadOptions::default().file_decryption_properties((&decrypt).into());
118+
let options = ParquetReadOptions::default()
119+
.file_decryption_properties((&decrypt).try_into().unwrap());
120120

121121
let encrypted_batches = read_parquet_test_data(
122122
tempfile.into_os_string().into_string().unwrap(),

docs/source/library-user-guide/upgrading/54.0.0.md

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,43 @@ caller-computed `NullBuffer`.
410410

411411
[#17861]: https://github.com/apache/datafusion/pull/17861
412412

413+
### Conversion from `FileDecryptionProperties` to `ConfigFileDecryptionProperties` is now fallible
414+
415+
Previously, `datafusion_common::config::ConfigFileDecryptionProperties`
416+
implemented `From<&Arc<parquet::encryption::decrypt::FileDecryptionProperties>>`.
417+
If an error was encountered when retrieving the footer key without providing key metadata,
418+
the error would be ignored and an empty footer key set in the result.
419+
This could lead to obscure errors later.
420+
421+
`ConfigFileDecryptionProperties` now instead implements `TryFrom<&Arc<FileDecryptionProperties>>`,
422+
and errors retrieving the footer key will be propagated up.
423+
424+
**Migration guide:**
425+
426+
Replace calls to `ConfigFileDecryptionProperties::from` with `ConfigFileDecryptionProperties::try_from`,
427+
and affected calls to `into` with `try_into`, with appropriate error handling added.
428+
429+
**Before:**
430+
431+
```rust,ignore
432+
let config_decryption_properties: ConfigFileDecryptionProperties = (&decryption_properties).into();
433+
// or
434+
let config_decryption_properties = ConfigFileDecryptionProperties::from(&decryption_properties);
435+
```
436+
437+
(where `decryption_properties` is an `Arc<FileDecryptionProperties>`)
438+
439+
**After:**
440+
441+
```rust,ignore
442+
let config_decryption_properties: ConfigFileDecryptionProperties = (&decryption_properties).try_into()?;
443+
// or
444+
let config_decryption_properties = ConfigFileDecryptionProperties::try_from(&decryption_properties)?;
445+
```
446+
447+
See [#21602](https://github.com/apache/datafusion/issues/21602) and
448+
[PR #21603](https://github.com/apache/datafusion/pull/21603) for details.
449+
413450
### `approx_percentile_cont`, `approx_percentile_cont_with_weight`, `approx_median` now coerce to floats
414451

415452
The type signatures of `approx_percentile_cont`, `approx_percentile_cont_with_weight`, and

0 commit comments

Comments
 (0)