Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 28 additions & 22 deletions cdisc_rules_engine/check_operators/dataframe_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,8 +228,8 @@ def _check_equality(
target_val = custom_str_conversion(target_val)
comparison_val = custom_str_conversion(comparison_val)
if case_insensitive:
target_val = target_val.lower() if target_val else None
comparison_val = comparison_val.lower() if comparison_val else None
target_val = target_val.lower() if isinstance(target_val, str) and target_val else None
comparison_val = comparison_val.lower() if isinstance(comparison_val, str) and comparison_val else None
return target_val == comparison_val
return target_val == comparison_val

Expand Down Expand Up @@ -275,8 +275,8 @@ def _check_inequality(
target_val = custom_str_conversion(target_val)
comparison_val = custom_str_conversion(comparison_val)
if case_insensitive:
target_val = target_val.lower() if target_val else None
comparison_val = comparison_val.lower() if comparison_val else None
target_val = target_val.lower() if isinstance(target_val, str) and target_val else None
comparison_val = comparison_val.lower() if isinstance(comparison_val, str) and comparison_val else None
return target_val != comparison_val
return target_val != comparison_val

Expand Down Expand Up @@ -696,6 +696,12 @@ def is_contained_by_case_insensitive(self, other_value):
def is_not_contained_by_case_insensitive(self, other_value):
return ~self.is_contained_by_case_insensitive(other_value)

@staticmethod
def _map_regex(series, func):
# pandas 3 returns nullable BooleanDtype from .map(); normalize to numpy
# bool so ~ and & behave identically for both positive and negated callers.
return series.map(func, na_action="ignore").fillna(False).astype(bool)

@log_operator_execution
@type_operator(FIELD_DATAFRAME)
def prefix_matches_regex(self, other_value):
Expand All @@ -705,10 +711,10 @@ def prefix_matches_regex(self, other_value):
converted_strings = self.value[target].map(
lambda x: self._regex_str_conversion(x)
)
results = converted_strings.notna() & converted_strings.astype(str).map(
lambda x: re.search(comparator, x[:prefix]) is not None
return converted_strings.notna() & self._map_regex(
converted_strings.astype(str),
lambda x: re.search(comparator, x[:prefix]) is not None,
)
return results

@log_operator_execution
@type_operator(FIELD_DATAFRAME)
Expand All @@ -719,10 +725,10 @@ def not_prefix_matches_regex(self, other_value):
converted_strings = self.value[target].map(
lambda x: self._regex_str_conversion(x)
)
results = converted_strings.notna() & ~converted_strings.astype(str).map(
lambda x: re.search(comparator, x[:prefix]) is not None
return converted_strings.notna() & ~self._map_regex(
converted_strings.astype(str),
lambda x: re.search(comparator, x[:prefix]) is not None,
)
return results

@log_operator_execution
@type_operator(FIELD_DATAFRAME)
Expand All @@ -733,10 +739,10 @@ def suffix_matches_regex(self, other_value):
converted_strings = self.value[target].map(
lambda x: self._regex_str_conversion(x)
)
results = converted_strings.notna() & converted_strings.astype(str).map(
lambda x: re.search(comparator, x[-suffix:]) is not None
return converted_strings.notna() & self._map_regex(
converted_strings.astype(str),
lambda x: re.search(comparator, x[-suffix:]) is not None,
)
return results

@log_operator_execution
@type_operator(FIELD_DATAFRAME)
Expand All @@ -747,10 +753,10 @@ def not_suffix_matches_regex(self, other_value):
converted_strings = self.value[target].map(
lambda x: self._regex_str_conversion(x)
)
results = converted_strings.notna() & ~converted_strings.astype(str).map(
lambda x: re.search(comparator, x[-suffix:]) is not None
return converted_strings.notna() & ~self._map_regex(
converted_strings.astype(str),
lambda x: re.search(comparator, x[-suffix:]) is not None,
)
return results

@log_operator_execution
@type_operator(FIELD_DATAFRAME)
Expand All @@ -760,10 +766,10 @@ def matches_regex(self, other_value):
converted_strings = self.value[target].map(
lambda x: self._regex_str_conversion(x)
)
results = converted_strings.notna() & converted_strings.astype(str).str.match(
comparator
return converted_strings.notna() & self._map_regex(
converted_strings.astype(str),
lambda x: re.match(comparator, x) is not None,
)
return results

@log_operator_execution
@type_operator(FIELD_DATAFRAME)
Expand All @@ -773,10 +779,10 @@ def not_matches_regex(self, other_value):
converted_strings = self.value[target].map(
lambda x: self._regex_str_conversion(x)
)
results = converted_strings.notna() & ~converted_strings.astype(str).str.match(
comparator
return converted_strings.notna() & ~self._map_regex(
converted_strings.astype(str),
lambda x: re.match(comparator, x) is not None,
)
return results

@log_operator_execution
@type_operator(FIELD_DATAFRAME)
Expand Down
2 changes: 1 addition & 1 deletion cdisc_rules_engine/check_operators/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def default_value(self):


def is_valid_date(date_string: str) -> bool:
if date_string is None or not isinstance(date_string, str):
if not isinstance(date_string, str):
return False
try:
isoparse(date_string)
Expand Down
2 changes: 1 addition & 1 deletion cdisc_rules_engine/operations/record_count.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ def _build_effective_grouping(self) -> tuple[list, dict]:
if self.params.dataframe[col].isna().all():
all_na_cols[col] = None
elif (
self.params.dataframe[col].dtype == "object"
pd.api.types.is_string_dtype(self.params.dataframe[col])
and self.params.dataframe[col].fillna("").str.strip().eq("").all()
):
all_na_cols[col] = ""
Expand Down