cdisc-org · SFJohnson24 · May 18, 2026 · Apr 23, 2026 · Apr 23, 2026 · Apr 24, 2026
diff --git a/cdisc_rules_engine/check_operators/dataframe_operators.py b/cdisc_rules_engine/check_operators/dataframe_operators.py
@@ -1664,12 +1664,12 @@ def _verify_neighbor_consistency(
 
         return is_valid
 
-    def check_target_ascending_in_sorted_group(
+    def check_target_ascending_in_sorted_group_with_regex(
         self, group, target, comparator, ascending, na_pos
     ):
         """
         Check if target values are in ascending order within a group
-        already sorted by comparator.
+        already sorted by comparator. Supports regex extraction.
         """
         is_valid = pd.Series(True, index=group.index)
         is_numeric_comparator = pd.api.types.is_numeric_dtype(group[comparator])
@@ -1798,12 +1798,45 @@ def _process_grouped_result(
                 grouped_result = pd.Series(result_list, index=index_list)
         return grouped_result.reindex(sorted_df.index, fill_value=True)
 
+    def _extract_regex_group(self, series: pd.Series, regex_pattern: str) -> pd.Series:
+        """
+        Extract the first capturing group from a regex pattern and convert to numeric if possible.
+        Handles zero-padded numbers by converting to numeric.
+
+        Args:
+            series: Pandas series with string values
+            regex_pattern: Regex pattern with capturing group(s)
+
+        Returns:
+            Series with extracted and converted values
+        """
+
+        def extract_and_convert(value):
+            if pd.isna(value) or value == "":
+                return np.nan
+
+            # YAML escapes backslashes, so we receive ".*\\d+$" which Python interprets as raw \
+            # We need to convert this to the actual regex pattern by replacing \\ with \
+            # However, since strings from YAML come already unescaped, we just use as-is
+            match = re.search(regex_pattern, str(value))
+            if match and match.groups():
+                extracted = match.group(1)  # First capturing group
+                # Try to convert to numeric to handle both padded and non-padded numbers
+                try:
+                    return pd.to_numeric(extracted)
+                except (ValueError, TypeError):
+                    return extracted
+            return np.nan
+
+        return series.apply(extract_and_convert)
+
     @log_operator_execution
     @type_operator(FIELD_DATAFRAME)
     def target_is_sorted_by(self, other_value: dict):
         target = other_value.get("target")
         within_columns = self._normalize_grouping_columns(other_value.get("within"))
         columns = other_value["comparator"]
+        target_regex = other_value.get("regex")  # parameter for regex extraction
 
         result = pd.Series([True] * len(self.value), index=self.value.index)
 
@@ -1816,37 +1849,55 @@ def target_is_sorted_by(self, other_value: dict):
                 dict.fromkeys([target, comparator, *within_columns])
             )
 
-            sorted_df = self.value[selected_columns].sort_values(
-                by=[*within_columns, target],
-                ascending=[True] * (len(within_columns) + 1),
-            )
+            # If regex is provided, extract and convert target values
+            if target_regex:
+                working_df = self.value[selected_columns].copy()
+                # Create a temporary column with extracted regex values
+                working_df[f"{target}_extracted"] = self._extract_regex_group(
+                    working_df[target], target_regex
+                )
+                target_for_sorting = f"{target}_extracted"
+                # Sort by within columns only, preserve original order within groups
+                sorted_df = working_df.sort_values(
+                    by=within_columns,
+                    ascending=[True] * len(within_columns),
+                )
+            else:
+                working_df = self.value[selected_columns]
+                target_for_sorting = target
+                sorted_df = working_df.sort_values(
+                    by=[*within_columns, target],
+                    ascending=[True] * (len(within_columns) + 1),
+                )
 
             grouped_df = sorted_df.groupby(within_columns, sort=False)
 
             target_check = grouped_df.apply(
-                lambda x: self.check_target_ascending_in_sorted_group(
-                    x, target, comparator, ascending, na_pos
+                lambda x: self.check_target_ascending_in_sorted_group_with_regex(
+                    x, target_for_sorting, comparator, ascending, na_pos
                 )
             )
             target_check = self._process_grouped_result(
                 target_check,
                 grouped_df,
                 within_columns,
                 sorted_df,
-                lambda group: self.check_target_ascending_in_sorted_group(
-                    group, target, comparator, ascending, na_pos
+                lambda group: self.check_target_ascending_in_sorted_group_with_regex(
+                    group, target_for_sorting, comparator, ascending, na_pos
                 ),
             )
 
             date_overlap_check = grouped_df.apply(
-                lambda x: self.check_date_overlaps(x, target, comparator)
+                lambda x: self.check_date_overlaps(x, target_for_sorting, comparator)
             )
             date_overlap_check = self._process_grouped_result(
                 date_overlap_check,
                 grouped_df,
                 within_columns,
                 sorted_df,
-                lambda group: self.check_date_overlaps(group, target, comparator),
+                lambda group: self.check_date_overlaps(
+                    group, target_for_sorting, comparator
+                ),
             )
 
             combined_check = target_check & date_overlap_check