Skip to content

Commit ba6d2c6

Browse files
alexfurmenkovSFJohnson24RamilCDISC
authored
1329: Add DatasetIsCustom operation to determine if a dataset is custom (#1580)
* Add DatasetIsCustom operation to determine if a dataset is custom * Enhance DatasetIsCustom operation to accurately identify custom datasets based on supplementary domain prefixes and library presence * Add RelatedDomainIsCustom operation to identify custom rdomains * minor changes * Add unit tests for DatasetIsCustom operation to validate custom dataset identification * Add unit tests for RelatedDomainIsCustom operation to validate custom domain identification * Require dataset_path parameter in DatasetIsCustom operation for validation * Add dataset_is_custom and related_domain_is_custom operations with documentation * Remove dataset_path validation from DatasetIsCustom operation * Add dataset_is_custom and related_domain_is_custom fields to dataset metadata examples * Remove dataset_is_custom operation and update related documentation --------- Co-authored-by: Samuel Johnson <96841389+SFJohnson24@users.noreply.github.com> Co-authored-by: RamilCDISC <113539111+RamilCDISC@users.noreply.github.com>
1 parent c7c0a62 commit ba6d2c6

6 files changed

Lines changed: 158 additions & 16 deletions

File tree

cdisc_rules_engine/models/sdtm_dataset_metadata.py

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -12,22 +12,27 @@ class SDTMDatasetMetadata(DatasetMetadata):
1212

1313
"""
1414
Examples
15-
| name | unsplit_name | is_supp | domain | rdomain | is_ap | ap_suffix |
16-
| -------- | ------------ | ------- | ------ | ------- | ----- | --------- |
17-
| QS | QS | False | QS | None | False | |
18-
| QSX | QS | False | QS | None | False | |
19-
| QSXX | QS | False | QS | None | False | |
20-
| SUPPQS | SUPPQS | True | None | QS | False | |
21-
| SUPPQSX | SUPPQS | True | None | QS | False | |
22-
| SUPPQSXX | SUPPQS | True | None | QS | False | |
23-
| APQS | APQS | False | APQS | None | True | QS |
24-
| APQSX | APQS | False | APQS | None | True | QS |
25-
| APQSXX | APQS | False | APQS | None | True | QS |
26-
| SQAPQS | SQAPQS | True | None | APQS | True | |
27-
| SQAPQSX | SQAPQS | True | None | APQS | True | |
28-
| SQAPQSXX | SQAPQS | True | None | APQS | True | |
29-
| RELREC | RELREC | False | None | None | False | |
30-
"""
15+
| name | unsplit_name | is_supp | domain | rdomain | is_ap | ap_suffix | domain_is_custom | related_domain | related_domain_is_custom |
16+
| -------- | ------------ | ------- | ------ | ------- | ----- | --------- | ----------------- | -------------- | ------------------------ |
17+
| QS | QS | False | QS | None | False | | False | | |
18+
| QSX | QS | False | QS | None | False | | False | | |
19+
| QSXX | QS | False | QS | None | False | | False | | |
20+
| SUPPQS | SUPPQS | True | None | QS | False | | False | QS | |
21+
| SUPPQSX | SUPPQS | True | None | QS | False | | False | QS | |
22+
| SUPPQSXX | SUPPQS | True | None | QS | False | | False | QS | |
23+
| APQS | APQS | False | APQS | None | True | QS | False | QS | |
24+
| APQSX | APQS | False | APQS | None | True | QS | False | QS | |
25+
| APQSXX | APQS | False | APQS | None | True | QS | False | QS | |
26+
| SQAPQS | SQAPQS | True | None | APQS | True | | False | QS | |
27+
| SQAPQSX | SQAPQS | True | None | APQS | True | | False | QS | |
28+
| SQAPQSXX | SQAPQS | True | None | APQS | True | | False | | |
29+
| RELREC | RELREC | False | None | None | False | | False | | |
30+
| XX | XX | False | XX | None | False | | True | | |
31+
| SUPPXX | SUPPXX | True | None | XX | False | | False | XX | True |
32+
| APXX | APXX | False | APXX | None | True | XX | False | XX | True |
33+
| SQAPXX | SQAPXX | True | None | APXX | True | | False | XX | True |
34+
| FA | FA | False | FA | None | False | | False | | |
35+
""" # noqa: E501 W291
3136

3237
@property
3338
def domain(self) -> Union[str, None]:

cdisc_rules_engine/operations/operations_factory.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from cdisc_rules_engine.operations.mean import Mean
3131
from cdisc_rules_engine.operations.domain_is_custom import DomainIsCustom
3232
from cdisc_rules_engine.operations.domain_label import DomainLabel
33+
from cdisc_rules_engine.operations.related_domain_is_custom import RelatedDomainIsCustom
3334
from cdisc_rules_engine.operations.standard_domains import StandardDomains
3435
from cdisc_rules_engine.operations.meddra_code_references_validator import (
3536
MedDRACodeReferencesValidator,
@@ -123,6 +124,7 @@ class OperationsFactory(FactoryInterface):
123124
"variable_count": VariableCount,
124125
"variable_is_null": VariableIsNull,
125126
"domain_is_custom": DomainIsCustom,
127+
"related_domain_is_custom": RelatedDomainIsCustom,
126128
"domain_label": DomainLabel,
127129
"standard_domains": StandardDomains,
128130
"required_variables": RequiredVariables,
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
from cdisc_rules_engine.operations.base_operation import BaseOperation
2+
3+
4+
class RelatedDomainIsCustom(BaseOperation):
5+
def _execute_operation(self):
6+
"""
7+
Gets standard details from cache and checks if
8+
given domain is in standard domains.
9+
If no -> the domain is custom.
10+
"""
11+
standard_data: dict = self.library_metadata.standard_metadata
12+
13+
for ds in self.params.datasets:
14+
if ds.is_supp and self.params.domain.endswith(ds.rdomain):
15+
return ds.rdomain not in standard_data.get("domains", {})
16+
return False

resources/schema/rule/Operations.json

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,15 @@
5757
"required": ["id", "operator"],
5858
"type": "object"
5959
},
60+
{
61+
"properties": {
62+
"operator": {
63+
"const": "related_domain_is_custom"
64+
}
65+
},
66+
"required": ["id", "operator"],
67+
"type": "object"
68+
},
6069
{
6170
"properties": {
6271
"operator": {

resources/schema/rule/Operations.md

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -458,6 +458,30 @@ Output
458458
true
459459
```
460460
461+
### related_domain_is_custom
462+
463+
Checks whether the related domain (for example, the parent domain of a SUPP or RELREC dataset) is not present in the set of standard domains for the provided standard and version. This is useful for determining whether relationships point to non-standard or custom domains.
464+
465+
Input
466+
467+
Target Domain: SUPPEX
468+
469+
Product: sdtmig
470+
471+
Version: 3-4
472+
473+
```yaml
474+
Operations:
475+
- operator: related_domain_is_custom
476+
id: $related_domain_is_custom
477+
```
478+
479+
Output
480+
481+
```
482+
true
483+
```
484+
461485
### domain_label
462486

463487
Returns the label for the domain the operation is executing on within the provided standard.
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
import pytest
2+
3+
from cdisc_rules_engine.models.library_metadata_container import (
4+
LibraryMetadataContainer,
5+
)
6+
from cdisc_rules_engine.operations.related_domain_is_custom import (
7+
RelatedDomainIsCustom,
8+
)
9+
10+
11+
class DummyDataset:
12+
def __init__(self, name: str, is_supp: bool, rdomain: str):
13+
self.name = name
14+
self.is_supp = is_supp
15+
self.rdomain = rdomain
16+
17+
18+
class DummyParams:
19+
def __init__(self, datasets, domain: str):
20+
self.datasets = datasets
21+
self.domain = domain
22+
23+
24+
@pytest.mark.parametrize(
25+
"description, standard_domains, study_datasets, domain, expected",
26+
[
27+
(
28+
# Related SUPP domain is not custom when its referenced domain
29+
# exists in standard domains.
30+
"supp_related_domain_not_custom_when_rdomain_in_standard",
31+
{"AE"},
32+
[
33+
DummyDataset(name="SUPPAE", is_supp=True, rdomain="AE"),
34+
],
35+
"SUPPAE",
36+
False,
37+
),
38+
(
39+
# Related SUPP domain is custom when its referenced domain is not
40+
# present in standard domains.
41+
"supp_related_domain_custom_when_rdomain_not_in_standard",
42+
{"AE"},
43+
[
44+
DummyDataset(name="SUPPXX", is_supp=True, rdomain="XX"),
45+
],
46+
"SUPPXX",
47+
True,
48+
),
49+
(
50+
# If there is no matching supplementary dataset for the domain,
51+
# operation should treat it as non-custom (fallback to False).
52+
"no_matching_supp_dataset_returns_false",
53+
{"AE"},
54+
[
55+
DummyDataset(name="SUPPAE", is_supp=True, rdomain="AE"),
56+
],
57+
"DM",
58+
False,
59+
),
60+
],
61+
)
62+
def test_related_domain_is_custom(
63+
description, standard_domains, study_datasets, domain, expected
64+
):
65+
"""Verify RelatedDomainIsCustom behaviour for several scenarios.
66+
67+
Scenarios covered:
68+
- supplementary domain whose referenced domain is standard (not custom);
69+
- supplementary domain whose referenced domain is not standard (custom);
70+
- domain without matching supplementary dataset (falls back to False).
71+
"""
72+
73+
library_metadata = LibraryMetadataContainer(
74+
standard_metadata={"domains": standard_domains}
75+
)
76+
params = DummyParams(datasets=study_datasets, domain=domain)
77+
78+
op = RelatedDomainIsCustom(
79+
params=params,
80+
library_metadata=library_metadata,
81+
original_dataset=None,
82+
cache_service=None,
83+
data_service=None,
84+
)
85+
86+
assert op._execute_operation() is expected

0 commit comments

Comments
 (0)