apache · shivbhatia10 · Feb 27, 2026 · Feb 27, 2026 · Feb 27, 2026 · Feb 27, 2026
diff --git a/datafusion/spark/src/function/math/floor.rs b/datafusion/spark/src/function/math/floor.rs
@@ -0,0 +1,263 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+use std::sync::Arc;
+
+use arrow::array::{AsArray, Decimal128Array};
+use arrow::compute::cast;
+use arrow::datatypes::{DataType, Decimal128Type, Float32Type, Float64Type, Int64Type};
+use datafusion_common::utils::take_function_args;
+use datafusion_common::{Result, exec_err};
+use datafusion_expr::{
+    ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
+};
+
+/// Spark-compatible `floor` expression
+/// <https://spark.apache.org/docs/latest/api/sql/index.html#floor>
+///
+/// Differences with DataFusion floor:
+///  - Spark's floor returns Int64 for float and integer inputs; DataFusion preserves
+///    the input type (Float32→Float32, Float64→Float64, integers coerced to Float64)
+///  - Spark's floor on Decimal128(p, s) returns Decimal128(p−s+1, 0), reducing scale
+///    to 0; DataFusion preserves the original precision and scale
+///  - Spark only supports Decimal128; DataFusion also supports Decimal32/64/256
+///  - Spark does not check for decimal overflow; DataFusion errors on overflow
+#[derive(Debug, PartialEq, Eq, Hash)]
+pub struct SparkFloor {
+    signature: Signature,
+}
+
+impl Default for SparkFloor {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl SparkFloor {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::numeric(1, Volatility::Immutable),
+        }
+    }
+}
+
+impl ScalarUDFImpl for SparkFloor {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "floor"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        match &arg_types[0] {
+            DataType::Decimal128(p, s) if *s > 0 => {
+                let new_p = ((*p as i64) - (*s as i64) + 1).clamp(1, 38) as u8;
+                Ok(DataType::Decimal128(new_p, 0))
+            }
+            DataType::Decimal128(p, s) => Ok(DataType::Decimal128(*p, *s)),
+            _ => Ok(DataType::Int64),
+        }
+    }
+
+    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
+        let return_type = args.return_type().clone();
+        spark_floor(&args.args, &return_type)
+    }
+}
+
+fn spark_floor(args: &[ColumnarValue], return_type: &DataType) -> Result<ColumnarValue> {
+    let input = match take_function_args("floor", args)? {
+        [ColumnarValue::Scalar(value)] => value.to_array()?,
+        [ColumnarValue::Array(arr)] => Arc::clone(arr),
+    };
+
+    let result = match input.data_type() {
+        DataType::Float32 => Arc::new(
+            input
+                .as_primitive::<Float32Type>()
+                .unary::<_, Int64Type>(|x| x.floor() as i64),
+        ) as _,
+        DataType::Float64 => Arc::new(
+            input
+                .as_primitive::<Float64Type>()
+                .unary::<_, Int64Type>(|x| x.floor() as i64),
+        ) as _,
+        dt if dt.is_integer() => cast(&input, &DataType::Int64)?,
+        DataType::Decimal128(_, s) if *s > 0 => {
+            let div = 10_i128.pow(*s as u32);
+            let result: Decimal128Array =
+                input.as_primitive::<Decimal128Type>().unary(|x| {
+                    let d = x / div;
+                    let r = x % div;
+                    if r < 0 { d - 1 } else { d }
+                });
+            Arc::new(result.with_data_type(return_type.clone()))
+        }
+        DataType::Decimal128(_, _) => input,
+        other => return exec_err!("Unsupported data type {other:?} for function floor"),
+    };
+
+    Ok(ColumnarValue::Array(result))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use arrow::array::{Decimal128Array, Float32Array, Float64Array, Int64Array};
+    use datafusion_common::ScalarValue;
+
+    #[test]
+    fn test_floor_float64() {
+        let input = Float64Array::from(vec![
+            Some(125.9345),
+            Some(15.9999),
+            Some(0.9),
+            Some(-0.1),
+            Some(-1.999),
+            Some(123.0),
+            None,
+        ]);
+        let args = vec![ColumnarValue::Array(Arc::new(input))];
+        let result = spark_floor(&args, &DataType::Int64).unwrap();
+        let result = match result {
+            ColumnarValue::Array(arr) => arr,
+            _ => panic!("Expected array"),
+        };
+        let result = result.as_primitive::<Int64Type>();
+        assert_eq!(
+            result,
+            &Int64Array::from(vec![
+                Some(125),
+                Some(15),
+                Some(0),
+                Some(-1),
+                Some(-2),
+                Some(123),
+                None,
+            ])
+        );
+    }
+
+    #[test]
+    fn test_floor_float32() {
+        let input = Float32Array::from(vec![
+            Some(125.9345f32),
+            Some(15.9999f32),
+            Some(0.9f32),
+            Some(-0.1f32),
+            Some(-1.999f32),
+            Some(123.0f32),
+            None,
+        ]);
+        let args = vec![ColumnarValue::Array(Arc::new(input))];
+        let result = spark_floor(&args, &DataType::Int64).unwrap();
+        let result = match result {
+            ColumnarValue::Array(arr) => arr,
+            _ => panic!("Expected array"),
+        };
+        let result = result.as_primitive::<Int64Type>();
+        assert_eq!(
+            result,
+            &Int64Array::from(vec![
+                Some(125),
+                Some(15),
+                Some(0),
+                Some(-1),
+                Some(-2),
+                Some(123),
+                None,
+            ])
+        );
+    }
+
+    #[test]
+    fn test_floor_int64() {
+        let input = Int64Array::from(vec![Some(1), Some(-1), None]);
+        let args = vec![ColumnarValue::Array(Arc::new(input))];
+        let result = spark_floor(&args, &DataType::Int64).unwrap();
+        let result = match result {
+            ColumnarValue::Array(arr) => arr,
+            _ => panic!("Expected array"),
+        };
+        let result = result.as_primitive::<Int64Type>();
+        assert_eq!(result, &Int64Array::from(vec![Some(1), Some(-1), None]));
+    }
+
+    #[test]
+    fn test_floor_decimal128() {
+        // Decimal128(10, 2): 150 = 1.50, -150 = -1.50, 100 = 1.00
+        let return_type = DataType::Decimal128(9, 0);
+        let input = Decimal128Array::from(vec![Some(150), Some(-150), Some(100), None])
+            .with_data_type(DataType::Decimal128(10, 2));
+        let args = vec![ColumnarValue::Array(Arc::new(input))];
+        let result = spark_floor(&args, &return_type).unwrap();
+        let result = match result {
+            ColumnarValue::Array(arr) => arr,
+            _ => panic!("Expected array"),
+        };
+        let result = result.as_primitive::<Decimal128Type>();
+        let expected = Decimal128Array::from(vec![Some(1), Some(-2), Some(1), None])
+            .with_data_type(return_type);
+        assert_eq!(result, &expected);
+    }
+
+    #[test]
+    fn test_floor_float64_scalar() {
+        let input = ScalarValue::Float64(Some(-1.999));
+        let args = vec![ColumnarValue::Scalar(input)];
+        let result = spark_floor(&args, &DataType::Int64).unwrap();
+        let result = match result {
+            ColumnarValue::Array(arr) => arr,
+            _ => panic!("Expected array"),
+        };
+        let result = result.as_primitive::<Int64Type>();
+        assert_eq!(result, &Int64Array::from(vec![Some(-2)]));
+    }
+
+    #[test]
+    fn test_floor_float32_scalar() {
+        let input = ScalarValue::Float32(Some(125.9345f32));
+        let args = vec![ColumnarValue::Scalar(input)];
+        let result = spark_floor(&args, &DataType::Int64).unwrap();
+        let result = match result {
+            ColumnarValue::Array(arr) => arr,
+            _ => panic!("Expected array"),
+        };
+        let result = result.as_primitive::<Int64Type>();
+        assert_eq!(result, &Int64Array::from(vec![Some(125)]));
+    }
+
+    #[test]
+    fn test_floor_int64_scalar() {
+        let input = ScalarValue::Int64(Some(48));
+        let args = vec![ColumnarValue::Scalar(input)];
+        let result = spark_floor(&args, &DataType::Int64).unwrap();
+        let result = match result {
+            ColumnarValue::Array(arr) => arr,
+            _ => panic!("Expected array"),
+        };
+        let result = result.as_primitive::<Int64Type>();
+        assert_eq!(result, &Int64Array::from(vec![Some(48)]));
+    }
+}
diff --git a/datafusion/spark/src/function/math/mod.rs b/datafusion/spark/src/function/math/mod.rs
@@ -19,6 +19,7 @@ pub mod abs;
 pub mod bin;
 pub mod expm1;
 pub mod factorial;
+pub mod floor;
 pub mod hex;
 pub mod modulus;
 pub mod negative;
@@ -34,6 +35,7 @@ use std::sync::Arc;
 make_udf_function!(abs::SparkAbs, abs);
 make_udf_function!(expm1::SparkExpm1, expm1);
 make_udf_function!(factorial::SparkFactorial, factorial);
+make_udf_function!(floor::SparkFloor, floor);
 make_udf_function!(hex::SparkHex, hex);
 make_udf_function!(modulus::SparkMod, modulus);
 make_udf_function!(modulus::SparkPmod, pmod);
@@ -55,6 +57,11 @@ pub mod expr_fn {
         "Returns the factorial of expr. expr is [0..20]. Otherwise, null.",
         arg1
     ));
+    export_functions!((
+        floor,
+        "Returns the largest integer not greater than expr.",
+        arg1
+    ));
     export_functions!((hex, "Computes hex value of the given column.", arg1));
     export_functions!((modulus, "Returns the remainder of division of the first argument by the second argument.", arg1 arg2));
     export_functions!((pmod, "Returns the positive remainder of division of the first argument by the second argument.", arg1 arg2));
@@ -84,6 +91,7 @@ pub fn functions() -> Vec<Arc<ScalarUDF>> {
         abs(),
         expm1(),
         factorial(),
+        floor(),
         hex(),
         modulus(),
         pmod(),