apache
diff --git a/‎.github/workflows/miri.yml‎
Lines changed: 8 additions & 10 deletions b/‎.github/workflows/miri.yml‎
Lines changed: 8 additions & 10 deletions
diff --git a/‎.github/workflows/pr_rat_check.yml‎
Lines changed: 47 additions & 0 deletions b/‎.github/workflows/pr_rat_check.yml‎
Lines changed: 47 additions & 0 deletions
diff --git a/‎common/src/main/scala/org/apache/comet/CometConf.scala‎
Lines changed: 0 additions & 6 deletions b/‎common/src/main/scala/org/apache/comet/CometConf.scala‎
Lines changed: 0 additions & 6 deletions
diff --git a/‎common/src/main/scala/org/apache/spark/sql/comet/util/Utils.scala‎
Lines changed: 1 addition & 1 deletion b/‎common/src/main/scala/org/apache/spark/sql/comet/util/Utils.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎native/core/src/execution/shuffle/spark_unsafe/row.rs‎
Lines changed: 13 additions & 3 deletions b/‎native/core/src/execution/shuffle/spark_unsafe/row.rs‎
Lines changed: 13 additions & 3 deletions
diff --git a/‎native/spark-expr/src/predicate_funcs/rlike.rs‎
Lines changed: 69 additions & 3 deletions b/‎native/spark-expr/src/predicate_funcs/rlike.rs‎
Lines changed: 69 additions & 3 deletions
diff --git a/‎spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala‎
Lines changed: 2 additions & 0 deletions b/‎spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala‎
Lines changed: 1 addition & 1 deletion b/‎spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎spark/src/test/resources/sql-tests/expressions/string/rlike_enabled.sql‎
Lines changed: 1 addition & 1 deletion b/‎spark/src/test/resources/sql-tests/expressions/string/rlike_enabled.sql‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala‎
Lines changed: 1 addition & 7 deletions b/‎spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala‎
Lines changed: 1 addition & 7 deletions
@@ -28,16 +28,14 @@ on:
       - "native/core/benches/**"
       - "native/spark-expr/benches/**"
       - "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
-# Disabled until Miri compatibility is restored
-# https://github.com/apache/datafusion-comet/issues/3499
-#  pull_request:
-#    paths-ignore:
-#      - "doc/**"
-#      - "docs/**"
-#      - "**.md"
-#      - "native/core/benches/**"
-#      - "native/spark-expr/benches/**"
-#      - "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
+  pull_request:
+    paths-ignore:
+      - "doc/**"
+      - "docs/**"
+      - "**.md"
+      - "native/core/benches/**"
+      - "native/spark-expr/benches/**"
+      - "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
   # manual trigger
   # https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
   workflow_dispatch:
 
@@ -0,0 +1,47 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: RAT License Check
+
+concurrency:
+  group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+
+# No paths-ignore: this workflow must run for ALL changes including docs
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+  workflow_dispatch:
+
+jobs:
+  rat-check:
+    name: RAT License Check
+    runs-on: ubuntu-slim
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Java
+        uses: actions/setup-java@v4
+        with:
+          distribution: temurin
+          java-version: 11
+      - name: Run RAT check
+        run: ./mvnw -B -N apache-rat:check
@@ -114,12 +114,6 @@ object CometConf extends ShimCometConf {
       .booleanConf
       .createWithEnvVarOrDefault("ENABLE_COMET_WRITE", false)
 
-  // Deprecated: native_comet uses mutable buffers incompatible with Arrow FFI best practices
-  // and does not support complex types. Use native_iceberg_compat or auto instead.
-  // This will be removed in a future release.
-  // See: https://github.com/apache/datafusion-comet/issues/2186
-  @deprecated("Use SCAN_AUTO instead. native_comet will be removed in a future release.", "0.9.0")
-  val SCAN_NATIVE_COMET = "native_comet"
   val SCAN_NATIVE_DATAFUSION = "native_datafusion"
   val SCAN_NATIVE_ICEBERG_COMPAT = "native_iceberg_compat"
   val SCAN_AUTO = "auto"
 
@@ -271,7 +271,7 @@ object Utils extends CometTypeShim {
           throw new SparkException(
             s"Comet execution only takes Arrow Arrays, but got ${c.getClass}. " +
               "This typically happens when a Comet scan falls back to Spark due to unsupported " +
-              "data types (e.g., complex types like structs, arrays, or maps with native_comet). " +
+              "data types (e.g., complex types like structs, arrays, or maps). " +
               "To resolve this, you can: " +
               "(1) enable spark.comet.scan.allowIncompatible=true to use a compatible native " +
               "scan variant, or " +
 
@@ -255,8 +255,15 @@ impl SparkUnsafeObject for SparkUnsafeRow {
         self.row_addr
     }
 
-    fn get_element_offset(&self, index: usize, _: usize) -> *const u8 {
-        (self.row_addr + self.row_bitset_width + (index * 8) as i64) as *const u8
+    fn get_element_offset(&self, index: usize, element_size: usize) -> *const u8 {
+        let offset = self.row_bitset_width + (index * 8) as i64;
+        debug_assert!(
+            self.row_size >= 0 && offset + element_size as i64 <= self.row_size as i64,
+            "get_element_offset: access at offset {offset} with size {element_size} \
+             exceeds row_size {} for index {index}",
+            self.row_size
+        );
+        (self.row_addr + offset) as *const u8
     }
 }
 
@@ -1659,7 +1666,10 @@ mod test {
         let fields = Fields::from(vec![Field::new("st", data_type.clone(), true)]);
         let mut struct_builder = StructBuilder::from_fields(fields, 1);
         let mut row = SparkUnsafeRow::new_with_num_fields(1);
-        let data = [0; 8];
+        // 8 bytes null bitset + 8 bytes field value = 16 bytes
+        // Set bit 0 in the null bitset to mark field 0 as null
+        let mut data = [0u8; 16];
+        data[0] = 1;
         row.point_to_slice(&data);
         append_field(&data_type, &mut struct_builder, &row, 0).expect("append field");
         struct_builder.append_null();
 
@@ -21,7 +21,7 @@ use arrow::array::types::Int32Type;
 use arrow::array::{Array, BooleanArray, DictionaryArray, RecordBatch, StringArray};
 use arrow::compute::take;
 use arrow::datatypes::{DataType, Schema};
-use datafusion::common::{internal_err, Result};
+use datafusion::common::{internal_err, Result, ScalarValue};
 use datafusion::physical_expr::PhysicalExpr;
 use datafusion::physical_plan::ColumnarValue;
 use regex::Regex;
@@ -140,8 +140,24 @@ impl PhysicalExpr for RLike {
                 let array = self.is_match(inputs);
                 Ok(ColumnarValue::Array(Arc::new(array)))
             }
-            ColumnarValue::Scalar(_) => {
-                internal_err!("non scalar regexp patterns are not supported")
+            ColumnarValue::Scalar(scalar) => {
+                if scalar.is_null() {
+                    return Ok(ColumnarValue::Scalar(ScalarValue::Boolean(None)));
+                }
+
+                let is_match = match scalar {
+                    ScalarValue::Utf8(Some(s))
+                    | ScalarValue::LargeUtf8(Some(s))
+                    | ScalarValue::Utf8View(Some(s)) => self.pattern.is_match(&s),
+                    _ => {
+                        return internal_err!(
+                            "RLike requires string type for input, got {:?}",
+                            scalar.data_type()
+                        );
+                    }
+                };
+
+                Ok(ColumnarValue::Scalar(ScalarValue::Boolean(Some(is_match))))
             }
         }
     }
@@ -165,3 +181,53 @@ impl PhysicalExpr for RLike {
         Display::fmt(self, f)
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use datafusion::physical_expr::expressions::Literal;
+
+    #[test]
+    fn test_rlike_scalar_string_variants() {
+        let pattern = "R[a-z]+";
+        let scalars = [
+            ScalarValue::Utf8(Some("Rose".to_string())),
+            ScalarValue::LargeUtf8(Some("Rose".to_string())),
+            ScalarValue::Utf8View(Some("Rose".to_string())),
+        ];
+
+        for scalar in scalars {
+            let expr = RLike::try_new(Arc::new(Literal::new(scalar.clone())), pattern).unwrap();
+            let result = expr
+                .evaluate(&RecordBatch::new_empty(Arc::new(Schema::empty())))
+                .unwrap();
+            let ColumnarValue::Scalar(result) = result else {
+                panic!("expected scalar result");
+            };
+            assert_eq!(result, ScalarValue::Boolean(Some(true)));
+        }
+
+        // Null input should produce a null boolean result
+        let expr =
+            RLike::try_new(Arc::new(Literal::new(ScalarValue::Utf8(None))), pattern).unwrap();
+        let result = expr
+            .evaluate(&RecordBatch::new_empty(Arc::new(Schema::empty())))
+            .unwrap();
+        let ColumnarValue::Scalar(result) = result else {
+            panic!("expected scalar result");
+        };
+        assert_eq!(result, ScalarValue::Boolean(None));
+    }
+
+    #[test]
+    fn test_rlike_scalar_non_string_error() {
+        let expr = RLike::try_new(
+            Arc::new(Literal::new(ScalarValue::Boolean(Some(true)))),
+            "R[a-z]+",
+        )
+        .unwrap();
+
+        let result = expr.evaluate(&RecordBatch::new_empty(Arc::new(Schema::empty())));
+        assert!(result.is_err());
+    }
+}
@@ -307,6 +307,8 @@ case class CometScanRule(session: SparkSession)
         if (s.isCometEnabled && schemaSupported) {
           // When reading from Iceberg, we automatically enable type promotion
           SQLConf.get.setConfString(COMET_SCHEMA_EVOLUTION_ENABLED.key, "true")
+          // When reading from Iceberg, we automatically disable native columnar to row
+          SQLConf.get.setConfString(COMET_NATIVE_COLUMNAR_TO_ROW_ENABLED.key, "false")
           CometBatchScanExec(
             scanExec.clone().asInstanceOf[BatchScanExec],
             runtimeFilters = scanExec.runtimeFilters)
 
@@ -51,7 +51,7 @@ import org.apache.comet.parquet.CometParquetFileFormat
  *
  * This is a hybrid scan where the native plan will contain a `ScanExec` that reads batches of
  * data from the JVM via JNI. The ultimate source of data may be a JVM implementation such as
- * Spark readers, or could be the `native_comet` or `native_iceberg_compat` native scans.
+ * Spark readers, or could be the `native_iceberg_compat` native scan.
  *
  * Note that scanImpl can only be `native_datafusion` after CometScanRule runs and before
  * CometExecRule runs. It will never be set to `native_datafusion` at execution time
 
@@ -35,5 +35,5 @@ query
 SELECT s RLIKE '' FROM test_rlike_enabled
 
 -- literal arguments
-query ignore(https://github.com/apache/datafusion-comet/issues/3343)
+query
 SELECT 'hello' RLIKE '^[a-z]+$', '12345' RLIKE '^[a-z]+$', '' RLIKE '', NULL RLIKE 'a'
@@ -812,13 +812,7 @@ class CometArrayExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelp
 
   // https://github.com/apache/datafusion-comet/issues/2612
   test("array_reverse - fallback for binary array") {
-    val fallbackReason =
-      if (CometConf.COMET_NATIVE_SCAN_IMPL.key == CometConf.SCAN_NATIVE_COMET || sys.env
-          .getOrElse("COMET_PARQUET_SCAN_IMPL", "") == CometConf.SCAN_NATIVE_COMET) {
-        "Unsupported schema"
-      } else {
-        CometArrayReverse.unsupportedReason
-      }
+    val fallbackReason = CometArrayReverse.unsupportedReason
     withTable("t1") {
       sql("""create table t1 using parquet as
           select cast(null as array<binary>) c1, cast(array() as array<binary>) c2
Original file line number	Diff line number	Diff line change
`@@ -51,7 +51,7 @@ import org.apache.comet.parquet.CometParquetFileFormat`
`51`	`51`	`*`
`52`	`52`	* This is a hybrid scan where the native plan will contain a `ScanExec` that reads batches of
`53`	`53`	`* data from the JVM via JNI. The ultimate source of data may be a JVM implementation such as`
`54`		- * Spark readers, or could be the `native_comet` or `native_iceberg_compat` native scans.
	`54`	+ * Spark readers, or could be the `native_iceberg_compat` native scan.
`55`	`55`	`*`
`56`	`56`	* Note that scanImpl can only be `native_datafusion` after CometScanRule runs and before
`57`	`57`	* CometExecRule runs. It will never be set to `native_datafusion` at execution time