Skip to content

Commit bd18669

Browse files
authored
Merge pull request #883 from cdisc-org/datasets
test XPT and logger
2 parents c9854f8 + 3f75cac commit bd18669

6 files changed

Lines changed: 167 additions & 26 deletions

File tree

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,10 @@ To obtain an api key, please follow the instructions found here: <https://wiki.c
206206
-ca, --cache TEXT Relative path to cache files containing pre
207207
loaded metadata and rules
208208
-dp, --dataset-path TEXT Absolute path to dataset file
209+
-d, --data TEXT Path to directory containing data files
210+
-l, --log-level [info|debug|error|critical|disabled|warn]
211+
Sets log level for engine logs, logs are
212+
disabled by default
209213
-s, --standard TEXT CDISC standard to validate against
210214
[required]
211215
-v, --version TEXT Standard version to validate against
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
from cdisc_rules_engine.enums.base_enum import BaseEnum
2+
3+
4+
class TestDataFormatTypes(BaseEnum):
5+
JSON = "JSON"
6+
XPT = "XPT"

cdisc_rules_engine/models/test_args.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
"TestArgs",
55
[
66
"cache",
7-
"dataset_path",
7+
"dataset_paths",
8+
"log_level",
89
"rule",
910
"standard",
1011
"version",

core.py

Lines changed: 52 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from cdisc_rules_engine.enums.default_file_paths import DefaultFilePaths
1414
from cdisc_rules_engine.enums.progress_parameter_options import ProgressParameterOptions
1515
from cdisc_rules_engine.enums.report_types import ReportTypes
16+
from cdisc_rules_engine.enums.dataformat_test_types import TestDataFormatTypes
1617
from cdisc_rules_engine.enums.dataformat_types import DataFormatTypes
1718
from cdisc_rules_engine.models.validation_args import Validation_args
1819
from cdisc_rules_engine.models.test_args import TestArgs
@@ -34,8 +35,11 @@
3435
from version import __version__
3536

3637

37-
def valid_data_file(data_path: list) -> Tuple[list, set]:
38-
allowed_formats = [format.value for format in DataFormatTypes]
38+
def valid_data_file(data_path: list, test: bool = False) -> Tuple[list, set]:
39+
if test:
40+
allowed_formats = [format.value for format in TestDataFormatTypes]
41+
else:
42+
allowed_formats = [format.value for format in DataFormatTypes]
3943
found_formats = set()
4044
file_list = []
4145
for file in data_path:
@@ -467,9 +471,22 @@ def list_rules(
467471
@click.option(
468472
"-dp",
469473
"--dataset-path",
470-
required=True,
474+
required=False,
471475
help="Absolute path to dataset file",
472476
)
477+
@click.option(
478+
"-d",
479+
"--data",
480+
required=False,
481+
help="Path to directory containing data files",
482+
)
483+
@click.option(
484+
"-l",
485+
"--log-level",
486+
default="disabled",
487+
type=click.Choice(["info", "debug", "error", "critical", "disabled", "warn"]),
488+
help="Sets log level for engine logs, logs are disabled by default",
489+
)
473490
@click.option(
474491
"-r",
475492
"--rule",
@@ -514,6 +531,8 @@ def test(
514531
ctx,
515532
cache_path: str,
516533
dataset_path: Tuple[str],
534+
data: str,
535+
log_level: str,
517536
rule: str,
518537
standard: str,
519538
version: str,
@@ -527,6 +546,34 @@ def test(
527546
validate_xml,
528547
define_xml_path: str,
529548
):
549+
logger = logging.getLogger("tester")
550+
if data:
551+
if dataset_path:
552+
logger.error(
553+
"Argument --dataset-path cannot be used together with argument --data"
554+
)
555+
ctx.exit()
556+
dataset_paths, found_formats = valid_data_file(
557+
[str(Path(data).joinpath(fn)) for fn in os.listdir(data)]
558+
)
559+
if len(found_formats) > 1:
560+
logger.error(
561+
f"Argument --data contains more than one allowed file format ({', '.join(found_formats)})." # noqa: E501
562+
)
563+
ctx.exit()
564+
elif dataset_path:
565+
dataset_paths, found_formats = valid_data_file([dataset_path])
566+
if len(found_formats) > 1:
567+
logger.error(
568+
f"Argument --dataset_path contains more than one allowed file format ({', '.join(found_formats)})." # noqa: E501
569+
)
570+
ctx.exit()
571+
else:
572+
logger.error(
573+
"You must pass one of the following arguments: --dataset-path, --data"
574+
)
575+
# no need to define dataset_paths here, the program execution will stop
576+
ctx.exit()
530577
external_dictionaries = ExternalDictionariesContainer(
531578
{
532579
DictionaryTypes.MEDDRA.value: meddra,
@@ -539,7 +586,8 @@ def test(
539586
validate_xml = True if validate_xml.lower() in ("y", "yes") else False
540587
args = TestArgs(
541588
cache_path,
542-
dataset_path,
589+
dataset_paths,
590+
log_level,
543591
rule,
544592
standard,
545593
version,

scripts/test_rule.py

Lines changed: 54 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,11 @@
2828
from cdisc_rules_engine.models.rule import Rule
2929
from cdisc_rules_engine.utilities.utils import generate_report_filename
3030
from scripts.script_utils import (
31+
get_datasets,
3132
fill_cache_with_dictionaries,
3233
get_cache_service,
3334
get_library_metadata_from_cache,
3435
)
35-
from cdisc_rules_engine.utilities.utils import get_directory_path
3636
from cdisc_rules_engine.enums.progress_parameter_options import ProgressParameterOptions
3737

3838
"""
@@ -54,7 +54,7 @@ def validate_single_rule(
5454
library_metadata: LibraryMetadataContainer,
5555
rule: dict = None,
5656
):
57-
set_log_level("ERROR")
57+
set_log_level(args)
5858
rule["conditions"] = ConditionCompositeFactory.get_condition_composite(
5959
rule["conditions"]
6060
)
@@ -68,11 +68,11 @@ def validate_single_rule(
6868
define_xml_path=args.define_xml_path,
6969
library_metadata=library_metadata,
7070
validate_xml=args.validate_xml,
71-
dataset_paths=args.dataset_path,
71+
dataset_paths=args.dataset_paths,
7272
)
7373
validated_domains = set()
7474
results = []
75-
directory = get_directory_path(args.dataset_path)
75+
directory = os.path.dirname(args.dataset_paths[0])
7676

7777
if rule.get("sensitivity").lower() == "study":
7878
results.append(
@@ -103,39 +103,78 @@ def validate_single_rule(
103103
return RuleValidationResult(rule, results)
104104

105105

106-
def set_log_level(level: str):
107-
if level == "disabled":
106+
def initialize_logger(disabled, log_level):
107+
if disabled:
108108
engine_logger.disabled = True
109109
else:
110-
engine_logger.setLevel(level)
110+
engine_logger.disabled = False
111+
engine_logger.setLevel(log_level)
112+
113+
114+
def set_log_level(args):
115+
if args.log_level.lower() == "disabled":
116+
engine_logger.disabled = True
117+
else:
118+
engine_logger.setLevel(args.log_level.lower())
111119

112120

113121
def test(args: TestArgs):
114-
set_log_level("ERROR")
122+
set_log_level(args)
115123
# fill cache
116124
CacheManager.register("RedisCacheService", RedisCacheService)
117125
CacheManager.register("InMemoryCacheService", InMemoryCacheService)
118126
manager = CacheManager()
119127
manager.start()
120128
shared_cache = get_cache_service(manager)
129+
engine_logger.info(f"Populating cache, cache path: {args.cache}")
121130
library_metadata: LibraryMetadataContainer = get_library_metadata_from_cache(args)
122131
# install dictionaries if needed
123132
dictionary_versions = fill_cache_with_dictionaries(shared_cache, args)
124133
with open(args.rule, "r", encoding="utf-8") as f:
125134
rules = [Rule.from_cdisc_metadata(json.load(f))]
126-
with open(args.dataset_path, "r") as f:
127-
data_json = json.load(f)
128-
datasets = [DummyDataset(data) for data in data_json.get("datasets", [])]
129135
data_service_factory = DataServiceFactory(
130136
config, shared_cache, args.standard, args.version
131137
)
132-
dummy_data_service = data_service_factory.get_dummy_data_service(datasets)
133138
data_service = data_service_factory.get_data_service()
134-
139+
datasets = []
140+
for dataset_path in args.dataset_paths:
141+
try:
142+
with open(dataset_path, "r") as f:
143+
data_json = json.load(f)
144+
datasets.extend(
145+
[DummyDataset(data) for data in data_json.get("datasets", [])]
146+
)
147+
except Exception as e:
148+
engine_logger.info(f"Dataset {dataset_path} is not encoded in {e}")
149+
if not datasets:
150+
engine_logger.info(
151+
"No datasets loaded from JSON files, attempting to load using data service"
152+
)
153+
try:
154+
datasets = [
155+
DummyDataset(dataset)
156+
for dataset in get_datasets(data_service, args.dataset_paths)
157+
]
158+
for dataset_path in args.dataset_paths:
159+
filename = os.path.basename(dataset_path).lower()
160+
matching_dataset = next(
161+
dataset
162+
for dataset in datasets
163+
if dataset.filename.lower() == filename
164+
)
165+
df = data_service.get_dataset(dataset_name=dataset_path)
166+
matching_dataset.data = df.data
167+
except Exception as e:
168+
engine_logger.error(f"Data service failed to load datasets: {e}")
169+
dummy_data_service = data_service_factory.get_dummy_data_service(datasets)
135170
start = time.time()
136171
results = []
172+
# instantiate logger in each child process to maintain log level
173+
initializer = partial(
174+
initialize_logger, engine_logger.disabled, engine_logger._logger.level
175+
)
137176
# run each rule in a separate process
138-
with Pool(10) as pool:
177+
with Pool(10, initializer=initializer) as pool:
139178
with click.progressbar(
140179
length=len(rules),
141180
fill_char=click.style("\u2588", fg="green"),
@@ -163,7 +202,7 @@ def test(args: TestArgs):
163202
validation_args = Validation_args(
164203
None,
165204
None,
166-
[args.dataset_path],
205+
args.dataset_paths,
167206
None,
168207
os.path.join("resources", "templates", "report-template.xlsx"),
169208
args.standard,

tests/QARegressionTests/test_core/test_test_command.py

Lines changed: 49 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import os
2+
import re
23
import subprocess
34
import unittest
45

@@ -19,24 +20,57 @@ def run_command(self, command):
1920
exit_code = process.returncode
2021
return exit_code, stdout.lower(), stderr.lower()
2122

23+
def test_test_command_with_all_options_one_data_source(self):
24+
command = (
25+
f"python core.py test "
26+
f"-c {os.path.join('resources', 'cache')} "
27+
f"-dp {os.path.join('tests', 'resources', 'CoreIssue164', 'Positive_Dataset.json')} "
28+
f"-r {os.path.join('tests', 'resources', 'Rule-CG0027.json')} "
29+
f"--whodrug "
30+
f"{os.path.join('tests', 'resources', 'dictionaries', 'whodrug')} "
31+
f"--meddra {os.path.join('tests', 'resources', 'dictionaries', 'meddra')} "
32+
f"-s sdtmig "
33+
f"-v 3.4 "
34+
f"-dv 2.1 "
35+
f"-dxp {os.path.join('tests', 'resources','define.xml')} "
36+
f"-l error"
37+
)
38+
exit_code, stdout, stderr = self.run_command(command)
39+
self.assertEqual(exit_code, 0)
40+
self.assertFalse(self.error_keyword in stdout)
41+
self.assertEqual(stderr, "", f"Error while executing command:\n{stderr}")
42+
2243
def test_test_command_with_all_options(self):
2344
command = (
2445
f"python core.py test "
2546
f"-c {os.path.join('resources', 'cache')} "
2647
f"-dp {os.path.join('tests', 'resources', 'CG0027-positive.json')} "
48+
f"-d {os.path.join('tests', 'resources', 'report_test_data')} "
2749
f"-r {os.path.join('tests', 'resources', 'Rule-CG0027.json')} "
2850
f"--whodrug "
2951
f"{os.path.join('tests', 'resources', 'dictionaries', 'whodrug')} "
3052
f"--meddra {os.path.join('tests', 'resources', 'dictionaries', 'meddra')} "
3153
f"-s sdtmig "
3254
f"-v 3.4 "
3355
f"-dv 2.1 "
34-
f"-dxp {os.path.join('tests', 'resources','define.xml')}"
56+
f"-dxp {os.path.join('tests', 'resources','define.xml')} "
57+
f"-l error"
3558
)
3659
exit_code, stdout, stderr = self.run_command(command)
3760
self.assertEqual(exit_code, 0)
3861
self.assertFalse(self.error_keyword in stdout)
39-
self.assertEqual(stderr, "", f"Error while executing command:\n{stderr}")
62+
self.assertFalse(self.error_keyword in stdout)
63+
expected_pattern = (
64+
r"\[error \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3} - "
65+
r"core\.py:\d+\] - argument --dataset-path cannot be used together "
66+
r"with argument --data\n"
67+
)
68+
error_msg = (
69+
f"Error message format doesn't match expected pattern.\n"
70+
f"Actual: {stderr}\n"
71+
f"Expected pattern: {expected_pattern}"
72+
)
73+
self.assertTrue(re.match(expected_pattern, stderr), error_msg)
4074

4175
def test_test_command_without_dataset_path(self):
4276
command = (
@@ -45,10 +79,18 @@ def test_test_command_without_dataset_path(self):
4579
f"-r {os.path.join('tests', 'resources', 'Rule-CG0027.json')}"
4680
)
4781
exit_code, stdout, stderr = self.run_command(command)
48-
self.assertNotEqual(exit_code, 0)
49-
self.assertNotEqual(
50-
stderr, "", f"Error not raised while executing invalid command:\n{stderr}"
82+
self.assertEqual(exit_code, 0)
83+
expected_pattern = (
84+
r"\[error \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3} - "
85+
r"core\.py:\d+\] - you must pass one of the following arguments: "
86+
r"--dataset-path, --data\n"
87+
)
88+
error_msg = (
89+
f"Error message format doesn't match expected pattern.\n"
90+
f"Actual: {stderr}\n"
91+
f"Expected pattern: {expected_pattern}"
5192
)
93+
self.assertTrue(re.match(expected_pattern, stderr), error_msg)
5294

5395
def test_test_command_without_rule(self):
5496
command = (
@@ -126,7 +168,8 @@ def test_test_command_with_vx_as_yes(self):
126168
f"-vx y"
127169
)
128170
exit_code, stdout, stderr = self.run_command(command)
129-
self.assertFalse(stderr == "")
171+
self.assertEqual(exit_code, 0)
172+
self.assertTrue(stderr == "")
130173

131174
def tearDown(self):
132175
for file_name in os.listdir("."):

0 commit comments

Comments
 (0)