Skip to content

Commit f19e750

Browse files
Define metadata (#1273)
* work * update * builder * tests * ok * error handling * update docs --------- Co-authored-by: RamilCDISC <113539111+RamilCDISC@users.noreply.github.com>
1 parent 37333d2 commit f19e750

12 files changed

Lines changed: 75 additions & 6 deletions

cdisc_rules_engine/dataset_builders/dataset_metadata_define_dataset_builder.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ def build(self):
2525
define_dataset_class - dataset class
2626
define_dataset_structure - dataset structure
2727
define_dataset_is_non_standard - whether a dataset is a standard
28+
define_dataset_variables - dataset variables
29+
define_dataset_key_sequence - dataset key sequence
2830
2931
...,
3032
"""

cdisc_rules_engine/dataset_builders/define_variables_dataset_builder.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ def build(self):
2222
"define_variable_order_number",
2323
"define_variable_has_codelist",
2424
"define_variable_codelist_coded_values",
25+
"define_variable_codelist_coded_codes",
2526
"define_variable_mandatory",
2627
"""
2728
# get Define XML metadata for domain and use it as a rule comparator

cdisc_rules_engine/dataset_builders/define_variables_with_library_metadata.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ def build(self):
2121
"define_variable_order_number",
2222
"define_variable_has_codelist",
2323
"define_variable_codelist_coded_values",
24+
"define_variable_codelist_coded_codes",
2425
"define_variable_mandatory",
2526
"define_variable_has_comment",
2627
"library_variable_name",

cdisc_rules_engine/dataset_builders/variables_metadata_with_define_and_library_dataset_builder.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,9 @@ def build(self):
2626
define_variable_has_no_data,
2727
define_variable_order_number,
2828
define_variable_has_codelist,
29-
define_variable_codelist_coded_values
30-
define_variable_mandatory
29+
define_variable_codelist_coded_values,
30+
define_variable_codelist_coded_codes,
31+
define_variable_mandatory,
3132
library_variable_name,
3233
library_variable_label,
3334
library_variable_data_type,

cdisc_rules_engine/dataset_builders/variables_metadata_with_define_dataset_builder.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ def build(self):
2626
define_variable_has_no_data,
2727
define_variable_order_number,
2828
define_variable_has_codelist,
29-
define_variable_codelist_coded_values
29+
define_variable_codelist_coded_values,
30+
define_variable_codelist_coded_codes,
3031
define_variable_mandatory
3132
"""
3233
# get Define XML metadata for domain and use it as a rule comparator

cdisc_rules_engine/operations/codelist_terms.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,9 +86,18 @@ def _handle_single_version(self) -> pd.Series:
8686
ct_packages = self.library_metadata._ct_package_metadata
8787
if "define_XML_merged_CT" in ct_packages:
8888
ct_package_data = ct_packages["define_XML_merged_CT"]
89+
elif not ct_packages:
90+
raise MissingDataError(
91+
"CT package data is not populated. "
92+
"A valid define.xml file or -ct command is required to execute."
93+
)
8994
else:
9095
ct_package_data = next(
91-
(pkg for name, pkg in ct_packages.items() if name != "extensible")
96+
(
97+
pkg
98+
for name, pkg in ct_packages.items()
99+
if name != "extensible" and not name.startswith("define-xml")
100+
)
92101
)
93102
except AttributeError as e:
94103
logger.warning(

cdisc_rules_engine/rules_engine.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -454,6 +454,22 @@ def handle_validation_exceptions( # noqa
454454
message=message,
455455
status=ExecutionStatus.SKIPPED.value,
456456
)
457+
elif isinstance(
458+
exception, AttributeError
459+
) and "'NoneType' object has no attribute" in str(exception):
460+
error_obj = ValidationErrorContainer(
461+
dataset=os.path.basename(dataset_path),
462+
message="Missing field during execution, rule may not be applicable- unable to process dataset",
463+
status=ExecutionStatus.SKIPPED.value,
464+
)
465+
message = "rule evaluation skipped - missing metadata"
466+
errors = [error_obj]
467+
return ValidationErrorContainer(
468+
dataset=os.path.basename(dataset_path),
469+
errors=errors,
470+
message=message,
471+
status=ExecutionStatus.SKIPPED.value,
472+
)
457473
else:
458474
error_obj = FailedValidationEntity(
459475
dataset=os.path.basename(dataset_path),

cdisc_rules_engine/services/define_xml/base_define_xml_reader.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,7 @@ def _get_item_def_representation(self, itemdef, itemref, codelists, index) -> di
349349
"define_variable_length": None,
350350
"define_variable_has_codelist": False,
351351
"define_variable_codelist_coded_values": [],
352+
"define_variable_codelist_coded_codes": [],
352353
"define_variable_mandatory": None,
353354
"define_variable_has_comment": False,
354355
}
@@ -377,6 +378,9 @@ def _get_item_def_representation(self, itemdef, itemref, codelists, index) -> di
377378
data["define_variable_codelist_coded_values"].extend(
378379
self._get_codelist_coded_values(codelist)
379380
)
381+
data["define_variable_codelist_coded_codes"].extend(
382+
self._get_codelist_coded_codes(codelist)
383+
)
380384
if itemdef.Origin:
381385
data["define_variable_origin_type"] = self._get_origin_type(itemdef)
382386
data["define_variable_has_no_data"] = getattr(itemref, "HasNoData", "")
@@ -405,6 +409,14 @@ def _get_codelist_coded_values(self, codelist):
405409
for codelist_item in codelist.CodeListItem + codelist.EnumeratedItem:
406410
yield codelist_item.CodedValue
407411

412+
def _get_codelist_coded_codes(self, codelist):
413+
if codelist:
414+
for codelist_item in codelist.CodeListItem + codelist.EnumeratedItem:
415+
if hasattr(codelist_item, "Alias") and codelist_item.Alias:
416+
for alias in codelist_item.Alias:
417+
if hasattr(alias, "Name"):
418+
yield alias.Name
419+
408420
@abstractmethod
409421
def _get_origin_type(self, itemdef):
410422
pass

resources/schema/Rule_Type.md

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ any:
119119
- `define_variable_order_number`
120120
- `define_variable_has_codelist`
121121
- `define_variable_codelist_coded_values`
122+
- `define_variable_codelist_coded_values`
122123
- `define_variable_has_comment`
123124

124125
#### Rule Macro
@@ -143,6 +144,8 @@ Attach variable codelist and codelist terms
143144
- `define_variable_order_number`
144145
- `define_variable_has_codelist`
145146
- `define_variable_codelist_coded_values`
147+
- `define_variable_codelist_coded_codes`
148+
- `define_variable_mandatory`
146149
- `define_variable_has_comment`
147150
- `library_variable_name`
148151
- `library_variable_order_number`
@@ -363,7 +366,22 @@ Attach define xml metadata at value level
363366
- `variable_`...
364367
- `define_variable_name`
365368
- `define_variable_label`
366-
- `define_variable_`...
369+
- `define_variable_data_type`
370+
- `define_variable_is_collected`
371+
- `define_variable_role`
372+
- `define_variable_size`
373+
- `define_variable_ccode`
374+
- `define_variable_format`
375+
- `define_variable_allowed_terms`
376+
- `define_variable_origin_type`
377+
- `define_variable_has_no_data`
378+
- `define_variable_order_number`
379+
- `define_variable_length`
380+
- `define_variable_has_codelist`
381+
- `define_variable_codelist_coded_values`
382+
- `define_variable_codelist_coded_codes`
383+
- `define_variable_mandatory`
384+
- `define_variable_has_comment`
367385

368386
#### Rule Macro
369387

@@ -420,6 +438,7 @@ Attach define xml metadata at variable level
420438
- `define_variable_length`
421439
- `define_variable_has_codelist`
422440
- `define_variable_codelist_coded_values`
441+
- `define_variable_codelist_coded_codes`
423442
- `define_variable_mandatory`
424443
- `define_variable_has_comment`
425444
- `library_variable_name`

scripts/script_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,7 @@ def load_rules_from_local(args) -> List[dict]:
367367
else:
368368
engine_logger.info(
369369
"No rules specified with -r rules flag. "
370-
"Validating with all rules in local directory"
370+
"Validating with rules in local directory"
371371
)
372372
keys = None
373373

0 commit comments

Comments
 (0)