Skip to content

Commit 07dd926

Browse files
operator rework (#1276)
Co-authored-by: RamilCDISC <113539111+RamilCDISC@users.noreply.github.com>
1 parent f19e750 commit 07dd926

1 file changed

Lines changed: 43 additions & 16 deletions

File tree

cdisc_rules_engine/check_operators/dataframe_operators.py

Lines changed: 43 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1121,26 +1121,53 @@ def is_not_unique_relationship(self, other_value):
11211121
comparator = self.replace_all_prefixes(comparator)
11221122
else:
11231123
comparator = self.replace_prefix(comparator)
1124-
# remove repeating rows
1125-
df_without_duplicates: DatasetInterface = self.value[
1126-
[target, comparator]
1127-
].drop_duplicates()
1128-
# we need to check if ANY of the columns (target or comparator) is duplicated
1129-
duplicated_comparator = df_without_duplicates[comparator].duplicated(keep=False)
1130-
duplicated_target = df_without_duplicates[target].duplicated(keep=False)
1124+
df_subset = self.value[[target, comparator]].dropna(how="all")
1125+
df_without_duplicates = df_subset.drop_duplicates()
1126+
violated_targets = self._find_relationship_violations(
1127+
df_without_duplicates, target, comparator
1128+
)
11311129
result = self.value.convert_to_series([False] * len(self.value))
1132-
if duplicated_comparator.any():
1133-
duplicated_comparator_values = set(
1134-
df_without_duplicates[duplicated_comparator][comparator]
1135-
)
1136-
result += self.value[comparator].isin(duplicated_comparator_values)
1137-
if duplicated_target.any():
1138-
duplicated_target_values = set(
1139-
df_without_duplicates[duplicated_target][target]
1130+
if violated_targets:
1131+
clean_targets = {
1132+
v for v in violated_targets if pd.notna(v) and v != "" and v is not None
1133+
}
1134+
has_null_target = any(
1135+
pd.isna(v) or v == "" or v is None for v in violated_targets
11401136
)
1141-
result += self.value[target].isin(duplicated_target_values)
1137+
if clean_targets:
1138+
result = result | self.value[target].isin(clean_targets)
1139+
if has_null_target:
1140+
result = result | self.value[target].isna()
11421141
return result
11431142

1143+
def _find_relationship_violations(self, df_without_duplicates, target, comparator):
1144+
"""Find all target values that violate one-to-one relationship constraints."""
1145+
violated_targets = set()
1146+
for target_val in df_without_duplicates[target].dropna().unique():
1147+
target_rows = df_without_duplicates[
1148+
df_without_duplicates[target] == target_val
1149+
]
1150+
comparator_values = target_rows[comparator]
1151+
unique_comparators = set()
1152+
for comp_val in comparator_values:
1153+
if pd.isna(comp_val) or comp_val == "" or comp_val is None:
1154+
unique_comparators.add("NULL_PLACEHOLDER")
1155+
else:
1156+
unique_comparators.add(comp_val)
1157+
if len(unique_comparators) > 1:
1158+
violated_targets.add(target_val)
1159+
for comp_val in df_without_duplicates[comparator].dropna().unique():
1160+
if comp_val == "" or pd.isna(comp_val):
1161+
continue
1162+
comp_rows = df_without_duplicates[
1163+
df_without_duplicates[comparator] == comp_val
1164+
]
1165+
target_values = comp_rows[target]
1166+
if len(target_values) > 1:
1167+
for t_val in target_values:
1168+
violated_targets.add(t_val)
1169+
return violated_targets
1170+
11441171
@log_operator_execution
11451172
@type_operator(FIELD_DATAFRAME)
11461173
def is_unique_relationship(self, other_value):

0 commit comments

Comments
 (0)