1010 SOURCE_ROW_NUMBER ,
1111)
1212from cdisc_rules_engine .enums .sensitivity import Sensitivity
13- from cdisc_rules_engine .exceptions .custom_exceptions import InvalidOutputVariables
1413from cdisc_rules_engine .models .sdtm_dataset_metadata import SDTMDatasetMetadata
1514from cdisc_rules_engine .models .dataset_variable import DatasetVariable
1615from cdisc_rules_engine .models .validation_error_container import (
@@ -120,26 +119,41 @@ def generate_targeted_error_object(
120119 df_columns : set = set (data )
121120 targets_in_dataset = targets .intersection (df_columns )
122121 targets_not_in_dataset = targets .difference (df_columns )
123- errors_df = data [list (targets_in_dataset )]
122+ all_targets_missing = (
123+ len (targets_in_dataset ) == 0 and len (targets_not_in_dataset ) > 0
124+ )
125+ if targets_in_dataset :
126+ errors_df = data [list (targets_in_dataset )]
127+ else :
128+ errors_df = data
124129 if not targets :
125130 errors_df = data
126- if errors_df .empty :
127- raise InvalidOutputVariables (
128- f"Output variables: { list (targets )} not found in dataset"
129- )
131+
130132 if self .rule .get ("sensitivity" ) == Sensitivity .DATASET .value :
131133 # Only generate one error for rules with dataset sensitivity
134+ missing_vars = {
135+ target : "Not in dataset" for target in targets_not_in_dataset
136+ }
137+
138+ # Create the initial error
139+ error_value = (
140+ dict (errors_df .iloc [0 ].to_dict ()) if not all_targets_missing else {}
141+ )
142+
143+ # Add missing variables to the error value
144+ if missing_vars :
145+ error_value = {** error_value , ** missing_vars }
146+
132147 errors_list = [
133148 ValidationErrorEntity (
134- value = dict ( errors_df . iloc [ 0 ]. to_dict ()) ,
149+ value = error_value ,
135150 dataset = self ._get_dataset_name (data ),
136151 )
137152 ]
138153 elif self .rule .get ("sensitivity" ) == Sensitivity .RECORD .value :
139- errors_series : pd . Series = errors_df . apply (
140- lambda df_row : self . _create_error_object ( df_row , data ), axis = 1
154+ errors_list = self . _generate_errors_by_target_presence (
155+ data , targets_not_in_dataset , all_targets_missing , errors_df
141156 )
142- errors_list : List [ValidationErrorEntity ] = errors_series .tolist ()
143157 elif (
144158 self .rule .get ("sensitivity" ) is not None
145159 ): # rule sensitivity is incorrectly defined
@@ -163,14 +177,9 @@ def generate_targeted_error_object(
163177 errors = [error_entity ],
164178 )
165179 else : # rule sensitivity is undefined
166- errors_series : pd . Series = errors_df . apply (
167- lambda df_row : self . _create_error_object ( df_row , data ), axis = 1
180+ errors_list = self . _generate_errors_by_target_presence (
181+ data , targets_not_in_dataset , all_targets_missing , errors_df
168182 )
169- errors_list : List [ValidationErrorEntity ] = errors_series .tolist ()
170- missing_vars = {target : "Not in dataset" for target in targets_not_in_dataset }
171- if missing_vars :
172- for error in errors_list :
173- error .value = {** error .value , ** missing_vars }
174183 return ValidationErrorContainer (
175184 ** {
176185 "domain" : (
@@ -187,6 +196,69 @@ def generate_targeted_error_object(
187196 }
188197 )
189198
199+ def _generate_errors_by_target_presence (
200+ self ,
201+ data : pd .DataFrame ,
202+ targets_not_in_dataset : Set [str ],
203+ all_targets_missing : bool ,
204+ errors_df : pd .DataFrame ,
205+ ) -> List [ValidationErrorEntity ]:
206+ """
207+ Generate error list based on presence of target variables in the dataset.
208+ Handles two cases: (1) when all targets are missing, or (2) when some targets are present.
209+
210+ Args:
211+ data: The original dataframe
212+ targets_not_in_dataset: Set of target variables not found in the dataset
213+ all_targets_missing: Boolean indicating if all targets are missing
214+ errors_df: DataFrame subset with only the target variables (if any exist)
215+
216+ Returns:
217+ List of ValidationErrorEntity objects
218+ """
219+ missing_vars = {target : "Not in dataset" for target in targets_not_in_dataset }
220+
221+ if all_targets_missing :
222+ errors_list = []
223+ for idx , row in data .iterrows ():
224+ error = ValidationErrorEntity (
225+ value = {
226+ target : "Not in dataset" for target in targets_not_in_dataset
227+ },
228+ dataset = self ._get_dataset_name (pd .DataFrame ([row ])),
229+ row = int (row .get (SOURCE_ROW_NUMBER , idx + 1 )),
230+ usubjid = (
231+ str (row .get ("USUBJID" ))
232+ if "USUBJID" in row and not pd .isna (row ["USUBJID" ])
233+ else None
234+ ),
235+ sequence = (
236+ int (row .get (f"{ self .dataset_metadata .domain or '' } SEQ" ))
237+ if f"{ self .dataset_metadata .domain or '' } SEQ" in row
238+ and self ._sequence_exists (
239+ pd .Series (
240+ {
241+ idx : row .get (
242+ f"{ self .dataset_metadata .domain or '' } SEQ"
243+ )
244+ }
245+ ),
246+ idx ,
247+ )
248+ else None
249+ ),
250+ )
251+ errors_list .append (error )
252+ else :
253+ errors_series : pd .Series = errors_df .apply (
254+ lambda df_row : self ._create_error_object (df_row , data ), axis = 1
255+ )
256+ errors_list : List [ValidationErrorEntity ] = errors_series .tolist ()
257+ if missing_vars :
258+ for error in errors_list :
259+ error .value = {** error .value , ** missing_vars }
260+ return errors_list
261+
190262 def _get_dataset_name (self , data : pd .DataFrame ) -> str :
191263 source_pathnames = data .get (SOURCE_FILENAME , [])
192264 source_filenames = [
0 commit comments