diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index 58d433e7dd0c1..ad254d61b5c0b 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -1184,7 +1184,7 @@ impl SessionContext { builder.with_object_list_cache_limit(limit) } "list_files_cache_ttl" => { - let duration = Self::parse_duration(value)?; + let duration = Self::parse_duration(variable, value)?; builder.with_object_list_cache_ttl(Some(duration)) } _ => return plan_err!("Unknown runtime configuration: {variable}"), @@ -1323,36 +1323,66 @@ impl SessionContext { } } - fn parse_duration(duration: &str) -> Result { + fn parse_duration(config_name: &str, duration: &str) -> Result { + if duration.trim().is_empty() { + return Err(plan_datafusion_err!( + "Duration should not be empty or blank for '{config_name}'" + )); + } + let mut minutes = None; let mut seconds = None; for duration in duration.split_inclusive(&['m', 's']) { let (number, unit) = duration.split_at(duration.len() - 1); let number: u64 = number.parse().map_err(|_| { - plan_datafusion_err!("Failed to parse number from duration '{duration}'") + plan_datafusion_err!("Failed to parse number from duration '{duration}' for '{config_name}'") })?; match unit { "m" if minutes.is_none() && seconds.is_none() => minutes = Some(number), "s" if seconds.is_none() => seconds = Some(number), - _ => plan_err!( - "Invalid duration, unit must be either 'm' (minutes), or 's' (seconds), and be in the correct order" + other => plan_err!( + "Invalid duration unit: '{other}'. The unit must be either 'm' (minutes), or 's' (seconds), and be in the correct order for '{config_name}'" )?, } } - let duration = Duration::from_secs( - minutes.unwrap_or_default() * 60 + seconds.unwrap_or_default(), - ); + let secs = Self::check_overflow(config_name, minutes, 60, seconds)?; + let duration = Duration::from_secs(secs); if duration.is_zero() { - return plan_err!("Duration must be greater than 0 seconds"); + return plan_err!( + "Duration must be greater than 0 seconds for '{config_name}'" + ); } Ok(duration) } + fn check_overflow( + config_name: &str, + mins: Option, + multiplier: u64, + secs: Option, + ) -> Result { + let first_part_of_secs = mins.unwrap_or_default().checked_mul(multiplier); + if first_part_of_secs.is_none() { + plan_err!( + "Duration has overflowed allowed maximum limit due to 'mins * {multiplier}' when setting '{config_name}'" + )? + } + let second_part_of_secs = first_part_of_secs + .unwrap() + .checked_add(secs.unwrap_or_default()); + if second_part_of_secs.is_none() { + plan_err!( + "Duration has overflowed allowed maximum limit due to 'mins * {multiplier} + secs' when setting '{config_name}'" + )? + } + Ok(second_part_of_secs.unwrap()) + } + async fn create_custom_table( &self, cmd: &CreateExternalTable, @@ -2803,6 +2833,8 @@ mod tests { #[test] fn test_parse_duration() { + const LIST_FILES_CACHE_TTL: &str = "datafusion.runtime.list_files_cache_ttl"; + // Valid durations for (duration, want) in [ ("1s", Duration::from_secs(1)), @@ -2810,14 +2842,81 @@ mod tests { ("1m0s", Duration::from_secs(60)), ("1m1s", Duration::from_secs(61)), ] { - let have = SessionContext::parse_duration(duration).unwrap(); + let have = + SessionContext::parse_duration(LIST_FILES_CACHE_TTL, duration).unwrap(); assert_eq!(want, have); } // Invalid durations - for duration in ["0s", "0m", "1s0m", "1s1m"] { - let have = SessionContext::parse_duration(duration); + for duration in [ + "0s", "0m", "1s0m", "1s1m", "XYZ", "1h", "XYZm2s", "", " ", "-1m", "1m 1s", + "1m1s ", " 1m1s", + ] { + let have = SessionContext::parse_duration(LIST_FILES_CACHE_TTL, duration); assert!(have.is_err()); + assert!( + have.unwrap_err() + .message() + .to_string() + .contains(LIST_FILES_CACHE_TTL) + ); + } + } + + #[test] + fn test_parse_duration_with_overflow_check() { + const LIST_FILES_CACHE_TTL: &str = "datafusion.runtime.list_files_cache_ttl"; + + // Valid durations which are close to max allowed limit + for (duration, want) in [ + ( + "18446744073709551615s", + Duration::from_secs(18446744073709551615), + ), + ( + "307445734561825860m", + Duration::from_secs(307445734561825860 * 60), + ), + ( + "307445734561825860m10s", + Duration::from_secs(307445734561825860 * 60 + 10), + ), + ( + "1m18446744073709551555s", + Duration::from_secs(60 + 18446744073709551555), + ), + ] { + let have = + SessionContext::parse_duration(LIST_FILES_CACHE_TTL, duration).unwrap(); + assert_eq!(want, have); + } + + // Invalid durations which overflow max allowed limit + for (duration, error_message_prefix) in [ + ( + "18446744073709551616s", + "Failed to parse number from duration", + ), + ( + "307445734561825861m", + "Duration has overflowed allowed maximum limit due to", + ), + ( + "307445734561825860m60s", + "Duration has overflowed allowed maximum limit due to", + ), + ( + "1m18446744073709551556s", + "Duration has overflowed allowed maximum limit due to", + ), + ] { + let have = SessionContext::parse_duration(LIST_FILES_CACHE_TTL, duration); + assert!(have.is_err()); + let error_message = have.unwrap_err().message().to_string(); + assert!( + error_message.contains(error_message_prefix) + && error_message.contains(LIST_FILES_CACHE_TTL) + ); } } diff --git a/datafusion/sqllogictest/test_files/set_variable.slt b/datafusion/sqllogictest/test_files/set_variable.slt index 7be353f0573cb..375d34925114e 100644 --- a/datafusion/sqllogictest/test_files/set_variable.slt +++ b/datafusion/sqllogictest/test_files/set_variable.slt @@ -450,3 +450,33 @@ datafusion.runtime.temp_directory statement error DataFusion error: Error during planning: Unsupported value Null SET datafusion.runtime.memory_limit = NULL + +statement error DataFusion error: Error during planning: Unsupported value Null +SET datafusion.runtime.list_files_cache_ttl = NULL + +statement error DataFusion error: Error during planning: Duration should not be empty or blank for 'datafusion.runtime.list_files_cache_ttl' +SET datafusion.runtime.list_files_cache_ttl = ' ' + +statement ok +SET datafusion.runtime.list_files_cache_ttl = '18446744073709551615s' + +statement error DataFusion error: Error during planning: Failed to parse number from duration '18446744073709551616s' for 'datafusion.runtime.list_files_cache_ttl' +SET datafusion.runtime.list_files_cache_ttl = '18446744073709551616s' + +statement ok +SET datafusion.runtime.list_files_cache_ttl = '307445734561825860m' + +statement ok +SET datafusion.runtime.list_files_cache_ttl = '307445734561825860m10s' + +statement error DataFusion error: Error during planning: Duration has overflowed allowed maximum limit due to 'mins \* 60' when setting 'datafusion\.runtime\.list_files_cache_ttl' +SET datafusion.runtime.list_files_cache_ttl = '307445734561825861m' + +statement error DataFusion error: Error during planning: Duration has overflowed allowed maximum limit due to 'mins \* 60 \+ secs' when setting 'datafusion\.runtime\.list_files_cache_ttl' +SET datafusion.runtime.list_files_cache_ttl = '307445734561825860m60s' + +statement ok +SET datafusion.runtime.list_files_cache_ttl = '1m18446744073709551555s' + +statement error DataFusion error: Error during planning: Duration has overflowed allowed maximum limit due to 'mins \* 60 \+ secs' when setting 'datafusion\.runtime\.list_files_cache_ttl' +SET datafusion.runtime.list_files_cache_ttl = '1m18446744073709551556s' diff --git a/docs/source/library-user-guide/upgrading/52.0.0.md b/docs/source/library-user-guide/upgrading/52.0.0.md index 4c659b6118fe4..8bf2f803bede6 100644 --- a/docs/source/library-user-guide/upgrading/52.0.0.md +++ b/docs/source/library-user-guide/upgrading/52.0.0.md @@ -51,7 +51,7 @@ those changes until the `ListingTableProvider` instance is dropped and recreated You can configure the maximum cache size and cache entry expiration time via configuration options: - `datafusion.runtime.list_files_cache_limit` - Limits the size of the cache in bytes -- `datafusion.runtime.list_files_cache_ttl` - Limits the TTL (time-to-live) of an entry in seconds +- `datafusion.runtime.list_files_cache_ttl` - Limits the TTL (time-to-live) of an entry in minutes and/or seconds Detailed configuration information can be found in the [DataFusion Runtime Configuration](https://datafusion.apache.org/user-guide/configs.html#runtime-configuration-settings) user's guide.