From df3afaa0804ca019303be24a6056448999afb111 Mon Sep 17 00:00:00 2001 From: Sean Doherty Date: Sun, 17 May 2026 03:29:05 -0500 Subject: [PATCH] Guard runtime SET UTF-8 suffix parsing --- datafusion/core/src/execution/context/mod.rs | 17 ++++++--- datafusion/core/tests/sql/runtime_config.rs | 37 ++++++++++++++++++++ 2 files changed, 50 insertions(+), 4 deletions(-) diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index d84ef0c898313..b380245459e87 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -305,6 +305,11 @@ impl Default for SessionContext { } impl SessionContext { + fn split_number_and_unit(value: &str) -> (&str, &str) { + let unit_start = value.char_indices().next_back().map_or(0, |(idx, _)| idx); + value.split_at(unit_start) + } + /// Creates a new `SessionContext` using the default [`SessionConfig`]. pub fn new() -> Self { Self::new_with_config(SessionConfig::new()) @@ -1256,7 +1261,7 @@ impl SessionContext { if limit.trim().is_empty() { return Err(plan_datafusion_err!("Empty limit value found!")); } - let (number, unit) = limit.split_at(limit.len() - 1); + let (number, unit) = Self::split_number_and_unit(limit); let number: f64 = number.parse().map_err(|_| { plan_datafusion_err!("Failed to parse number from memory limit '{limit}'") })?; @@ -1299,7 +1304,7 @@ impl SessionContext { if limit == "0" { return Ok(0); } - let (number, unit) = limit.split_at(limit.len() - 1); + let (number, unit) = Self::split_number_and_unit(limit); let number: f64 = number.parse().map_err(|_| { plan_datafusion_err!( "Failed to parse number from '{config_name}', limit '{limit}'" @@ -1333,7 +1338,7 @@ impl SessionContext { let mut seconds = None; for duration in duration.split_inclusive(&['m', 's']) { - let (number, unit) = duration.split_at(duration.len() - 1); + let (number, unit) = Self::split_number_and_unit(duration); let number: u64 = number.parse().map_err(|_| { plan_datafusion_err!("Failed to parse number from duration '{duration}' for '{config_name}'") })?; @@ -2864,7 +2869,7 @@ mod tests { // Invalid durations for duration in [ "0s", "0m", "1s0m", "1s1m", "XYZ", "1h", "XYZm2s", "", " ", "-1m", "1m 1s", - "1m1s ", " 1m1s", + "1m1s ", " 1m1s", "é", "1mé", ] { let have = SessionContext::parse_duration(LIST_FILES_CACHE_TTL, duration); assert!(have.is_err()); @@ -2960,6 +2965,8 @@ mod tests { "G", "1024B", "invalid_size", + "é", + "1é", ] { #[expect(deprecated)] let have = SessionContext::parse_memory_limit(limit); @@ -2995,6 +3002,8 @@ mod tests { "G", "1024B", "invalid_size", + "é", + "1é", ] { let have = SessionContext::parse_capacity_limit(MEMORY_LIMIT, limit); assert!(have.is_err()); diff --git a/datafusion/core/tests/sql/runtime_config.rs b/datafusion/core/tests/sql/runtime_config.rs index cf5237d725805..3e12cb865177d 100644 --- a/datafusion/core/tests/sql/runtime_config.rs +++ b/datafusion/core/tests/sql/runtime_config.rs @@ -176,6 +176,43 @@ async fn test_invalid_memory_limit_when_limit_is_not_numeric() { )); } +#[tokio::test] +async fn test_runtime_config_non_ascii_suffix_returns_error() { + let ctx = SessionContext::new(); + + for (sql, expected) in [ + ( + "SET datafusion.runtime.memory_limit = 'é'", + "Failed to parse number from 'datafusion.runtime.memory_limit', limit 'é'", + ), + ( + "SET datafusion.runtime.max_temp_directory_size = 'é'", + "Failed to parse number from 'datafusion.runtime.max_temp_directory_size', limit 'é'", + ), + ( + "SET datafusion.runtime.metadata_cache_limit = 'é'", + "Failed to parse number from 'datafusion.runtime.metadata_cache_limit', limit 'é'", + ), + ( + "SET datafusion.runtime.list_files_cache_limit = 'é'", + "Failed to parse number from 'datafusion.runtime.list_files_cache_limit', limit 'é'", + ), + ( + "SET datafusion.runtime.list_files_cache_ttl = '1mé'", + "Failed to parse number from duration 'é' for 'datafusion.runtime.list_files_cache_ttl'", + ), + ] { + let result = ctx.sql(sql).await; + + assert!(result.is_err()); + let error_message = result.unwrap_err().to_string(); + assert!( + error_message.contains(expected), + "expected {error_message:?} to contain {expected:?}" + ); + } +} + #[tokio::test] async fn test_max_temp_directory_size_enforcement() { let ctx = SessionContext::new();