From b1eaee894035d1e3eaab154cf9f5183bb55409c1 Mon Sep 17 00:00:00 2001
From: Filipp Shpomer <filipps@verily.com>
Date: Wed, 20 May 2026 11:06:36 -0700
Subject: [PATCH] fix: handle pandas 3.0 default StringDtype

---
 .../check_operators/dataframe_operators.py    | 50 +++++++++++--------
 cdisc_rules_engine/check_operators/helpers.py |  2 +-
 cdisc_rules_engine/operations/record_count.py |  2 +-
 3 files changed, 30 insertions(+), 24 deletions(-)

diff --git a/cdisc_rules_engine/check_operators/dataframe_operators.py b/cdisc_rules_engine/check_operators/dataframe_operators.py
index 7f0c94e43..417289858 100644
--- a/cdisc_rules_engine/check_operators/dataframe_operators.py
+++ b/cdisc_rules_engine/check_operators/dataframe_operators.py
@@ -228,8 +228,8 @@ def _check_equality(
             target_val = custom_str_conversion(target_val)
             comparison_val = custom_str_conversion(comparison_val)
         if case_insensitive:
-            target_val = target_val.lower() if target_val else None
-            comparison_val = comparison_val.lower() if comparison_val else None
+            target_val = target_val.lower() if isinstance(target_val, str) and target_val else None
+            comparison_val = comparison_val.lower() if isinstance(comparison_val, str) and comparison_val else None
             return target_val == comparison_val
         return target_val == comparison_val
 
@@ -275,8 +275,8 @@ def _check_inequality(
             target_val = custom_str_conversion(target_val)
             comparison_val = custom_str_conversion(comparison_val)
         if case_insensitive:
-            target_val = target_val.lower() if target_val else None
-            comparison_val = comparison_val.lower() if comparison_val else None
+            target_val = target_val.lower() if isinstance(target_val, str) and target_val else None
+            comparison_val = comparison_val.lower() if isinstance(comparison_val, str) and comparison_val else None
             return target_val != comparison_val
         return target_val != comparison_val
 
@@ -696,6 +696,12 @@ def is_contained_by_case_insensitive(self, other_value):
     def is_not_contained_by_case_insensitive(self, other_value):
         return ~self.is_contained_by_case_insensitive(other_value)
 
+    @staticmethod
+    def _map_regex(series, func):
+        # pandas 3 returns nullable BooleanDtype from .map(); normalize to numpy
+        # bool so ~ and & behave identically for both positive and negated callers.
+        return series.map(func, na_action="ignore").fillna(False).astype(bool)
+
     @log_operator_execution
     @type_operator(FIELD_DATAFRAME)
     def prefix_matches_regex(self, other_value):
@@ -705,10 +711,10 @@ def prefix_matches_regex(self, other_value):
         converted_strings = self.value[target].map(
             lambda x: self._regex_str_conversion(x)
         )
-        results = converted_strings.notna() & converted_strings.astype(str).map(
-            lambda x: re.search(comparator, x[:prefix]) is not None
+        return converted_strings.notna() & self._map_regex(
+            converted_strings.astype(str),
+            lambda x: re.search(comparator, x[:prefix]) is not None,
         )
-        return results
 
     @log_operator_execution
     @type_operator(FIELD_DATAFRAME)
@@ -719,10 +725,10 @@ def not_prefix_matches_regex(self, other_value):
         converted_strings = self.value[target].map(
             lambda x: self._regex_str_conversion(x)
         )
-        results = converted_strings.notna() & ~converted_strings.astype(str).map(
-            lambda x: re.search(comparator, x[:prefix]) is not None
+        return converted_strings.notna() & ~self._map_regex(
+            converted_strings.astype(str),
+            lambda x: re.search(comparator, x[:prefix]) is not None,
         )
-        return results
 
     @log_operator_execution
     @type_operator(FIELD_DATAFRAME)
@@ -733,10 +739,10 @@ def suffix_matches_regex(self, other_value):
         converted_strings = self.value[target].map(
             lambda x: self._regex_str_conversion(x)
         )
-        results = converted_strings.notna() & converted_strings.astype(str).map(
-            lambda x: re.search(comparator, x[-suffix:]) is not None
+        return converted_strings.notna() & self._map_regex(
+            converted_strings.astype(str),
+            lambda x: re.search(comparator, x[-suffix:]) is not None,
         )
-        return results
 
     @log_operator_execution
     @type_operator(FIELD_DATAFRAME)
@@ -747,10 +753,10 @@ def not_suffix_matches_regex(self, other_value):
         converted_strings = self.value[target].map(
             lambda x: self._regex_str_conversion(x)
         )
-        results = converted_strings.notna() & ~converted_strings.astype(str).map(
-            lambda x: re.search(comparator, x[-suffix:]) is not None
+        return converted_strings.notna() & ~self._map_regex(
+            converted_strings.astype(str),
+            lambda x: re.search(comparator, x[-suffix:]) is not None,
         )
-        return results
 
     @log_operator_execution
     @type_operator(FIELD_DATAFRAME)
@@ -760,10 +766,10 @@ def matches_regex(self, other_value):
         converted_strings = self.value[target].map(
             lambda x: self._regex_str_conversion(x)
         )
-        results = converted_strings.notna() & converted_strings.astype(str).str.match(
-            comparator
+        return converted_strings.notna() & self._map_regex(
+            converted_strings.astype(str),
+            lambda x: re.match(comparator, x) is not None,
         )
-        return results
 
     @log_operator_execution
     @type_operator(FIELD_DATAFRAME)
@@ -773,10 +779,10 @@ def not_matches_regex(self, other_value):
         converted_strings = self.value[target].map(
             lambda x: self._regex_str_conversion(x)
         )
-        results = converted_strings.notna() & ~converted_strings.astype(str).str.match(
-            comparator
+        return converted_strings.notna() & ~self._map_regex(
+            converted_strings.astype(str),
+            lambda x: re.match(comparator, x) is not None,
         )
-        return results
 
     @log_operator_execution
     @type_operator(FIELD_DATAFRAME)
diff --git a/cdisc_rules_engine/check_operators/helpers.py b/cdisc_rules_engine/check_operators/helpers.py
index ca3e9f888..0816380b0 100644
--- a/cdisc_rules_engine/check_operators/helpers.py
+++ b/cdisc_rules_engine/check_operators/helpers.py
@@ -56,7 +56,7 @@ def default_value(self):
 
 
 def is_valid_date(date_string: str) -> bool:
-    if date_string is None or not isinstance(date_string, str):
+    if not isinstance(date_string, str):
         return False
     try:
         isoparse(date_string)
diff --git a/cdisc_rules_engine/operations/record_count.py b/cdisc_rules_engine/operations/record_count.py
index 017b260f1..aa4579d08 100644
--- a/cdisc_rules_engine/operations/record_count.py
+++ b/cdisc_rules_engine/operations/record_count.py
@@ -169,7 +169,7 @@ def _build_effective_grouping(self) -> tuple[list, dict]:
                 if self.params.dataframe[col].isna().all():
                     all_na_cols[col] = None
                 elif (
-                    self.params.dataframe[col].dtype == "object"
+                    pd.api.types.is_string_dtype(self.params.dataframe[col])
                     and self.params.dataframe[col].fillna("").str.strip().eq("").all()
                 ):
                     all_na_cols[col] = ""