@@ -1121,26 +1121,53 @@ def is_not_unique_relationship(self, other_value):
11211121 comparator = self .replace_all_prefixes (comparator )
11221122 else :
11231123 comparator = self .replace_prefix (comparator )
1124- # remove repeating rows
1125- df_without_duplicates : DatasetInterface = self .value [
1126- [target , comparator ]
1127- ].drop_duplicates ()
1128- # we need to check if ANY of the columns (target or comparator) is duplicated
1129- duplicated_comparator = df_without_duplicates [comparator ].duplicated (keep = False )
1130- duplicated_target = df_without_duplicates [target ].duplicated (keep = False )
1124+ df_subset = self .value [[target , comparator ]].dropna (how = "all" )
1125+ df_without_duplicates = df_subset .drop_duplicates ()
1126+ violated_targets = self ._find_relationship_violations (
1127+ df_without_duplicates , target , comparator
1128+ )
11311129 result = self .value .convert_to_series ([False ] * len (self .value ))
1132- if duplicated_comparator .any ():
1133- duplicated_comparator_values = set (
1134- df_without_duplicates [duplicated_comparator ][comparator ]
1135- )
1136- result += self .value [comparator ].isin (duplicated_comparator_values )
1137- if duplicated_target .any ():
1138- duplicated_target_values = set (
1139- df_without_duplicates [duplicated_target ][target ]
1130+ if violated_targets :
1131+ clean_targets = {
1132+ v for v in violated_targets if pd .notna (v ) and v != "" and v is not None
1133+ }
1134+ has_null_target = any (
1135+ pd .isna (v ) or v == "" or v is None for v in violated_targets
11401136 )
1141- result += self .value [target ].isin (duplicated_target_values )
1137+ if clean_targets :
1138+ result = result | self .value [target ].isin (clean_targets )
1139+ if has_null_target :
1140+ result = result | self .value [target ].isna ()
11421141 return result
11431142
1143+ def _find_relationship_violations (self , df_without_duplicates , target , comparator ):
1144+ """Find all target values that violate one-to-one relationship constraints."""
1145+ violated_targets = set ()
1146+ for target_val in df_without_duplicates [target ].dropna ().unique ():
1147+ target_rows = df_without_duplicates [
1148+ df_without_duplicates [target ] == target_val
1149+ ]
1150+ comparator_values = target_rows [comparator ]
1151+ unique_comparators = set ()
1152+ for comp_val in comparator_values :
1153+ if pd .isna (comp_val ) or comp_val == "" or comp_val is None :
1154+ unique_comparators .add ("NULL_PLACEHOLDER" )
1155+ else :
1156+ unique_comparators .add (comp_val )
1157+ if len (unique_comparators ) > 1 :
1158+ violated_targets .add (target_val )
1159+ for comp_val in df_without_duplicates [comparator ].dropna ().unique ():
1160+ if comp_val == "" or pd .isna (comp_val ):
1161+ continue
1162+ comp_rows = df_without_duplicates [
1163+ df_without_duplicates [comparator ] == comp_val
1164+ ]
1165+ target_values = comp_rows [target ]
1166+ if len (target_values ) > 1 :
1167+ for t_val in target_values :
1168+ violated_targets .add (t_val )
1169+ return violated_targets
1170+
11441171 @log_operator_execution
11451172 @type_operator (FIELD_DATAFRAME )
11461173 def is_unique_relationship (self , other_value ):
0 commit comments