|
kwargs = {} |
|
builder = self.get_dataset_builder(rule, dataset_path, datasets, domain) |
|
dataset = builder.get_dataset() |
|
# Update rule for certain rule types |
|
# SPECIAL CASES FOR RULE TYPES ############################### |
|
# TODO: Handle these special cases better. |
|
if self.library_metadata: |
|
kwargs[ |
|
"variable_codelist_map" |
|
] = self.library_metadata.variable_codelist_map |
|
kwargs[ |
|
"codelist_term_maps" |
|
] = self.library_metadata.get_all_ct_package_metadata() |
|
if rule.get("rule_type") == RuleTypes.DEFINE_ITEM_METADATA_CHECK.value: |
|
if self.library_metadata: |
|
kwargs[ |
|
"variable_codelist_map" |
|
] = self.library_metadata.variable_codelist_map |
|
kwargs[ |
|
"codelist_term_maps" |
|
] = self.library_metadata.get_all_ct_package_metadata() |
|
|
|
elif ( |
|
rule.get("rule_type") |
|
== RuleTypes.VARIABLE_METADATA_CHECK_AGAINST_DEFINE.value |
|
): |
|
self.rule_processor.add_comparator_to_rule_conditions( |
|
rule, comparator=None, target_prefix="define_" |
|
) |
|
elif ( |
|
rule.get("rule_type") |
|
== RuleTypes.VALUE_LEVEL_METADATA_CHECK_AGAINST_DEFINE.value |
|
): |
|
value_level_metadata: List[dict] = self.get_define_xml_value_level_metadata( |
|
dataset_path, domain |
|
) |
|
kwargs["value_level_metadata"] = value_level_metadata |
|
|
|
elif ( |
|
rule.get("rule_type") |
|
== RuleTypes.DATASET_CONTENTS_CHECK_AGAINST_DEFINE_AND_LIBRARY.value |
|
): |
|
library_metadata: dict = self.library_metadata.variables_metadata.get( |
|
domain, {} |
|
) |
|
define_metadata: List[dict] = builder.get_define_xml_variables_metadata() |
|
targets: List[ |
|
str |
|
] = self.data_processor.filter_dataset_columns_by_metadata_and_rule( |
|
dataset.columns.tolist(), define_metadata, library_metadata, rule |
|
) |
|
rule_copy = deepcopy(rule) |
|
updated_conditions = RuleProcessor.duplicate_conditions_for_all_targets( |
|
rule_copy["conditions"], targets |
|
) |
|
rule_copy["conditions"].set_conditions(updated_conditions) |
|
# When duplicating conditions, |
|
# rule should be copied to prevent updates to concurrent rule executions |
|
return self.execute_rule( |
|
rule_copy, dataset, dataset_path, datasets, domain, **kwargs |
|
) |
|
|
|
kwargs["ct_packages"] = list(self.ct_packages) |
|
|
|
logger.info(f"Using dataset build by: {builder.__class__}") |
Reporting
For each of the following, we want
type,name,number of calls,cumulative time,mean time,median time,min time,max timeTime taken for each dataset #908
Time taken to preprocess each dataset #909
cdisc-rules-engine/cdisc_rules_engine/rules_engine.py
Lines 334 to 339 in 68017ec
Time taken for each Operator #910
Refer to feature: valid_codelist operator working, needs extensibility logic #898 - dataframe_operators.py has a new wrapper that can be used
Time taken for each Operation #911
cdisc-rules-engine/cdisc_rules_engine/operations/base_operation.py
Line 47 in 68017ec
Time taken for each Dataset Builder #912
cdisc-rules-engine/cdisc_rules_engine/rules_engine.py
Lines 236 to 300 in 68017ec
Any remaining time unaccounted for #913
Telemetry reports (OpenTelemetry) #914
Test Data #915
Test data should include:
Output #916