Skip to content

Commit 6689709

Browse files
committed
change csv conversion to an engine output format
1 parent 0f3f781 commit 6689709

9 files changed

Lines changed: 89 additions & 12 deletions

File tree

.github/workflows/validate-published-rules.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ jobs:
9898
CORE_IDS_ARG="--core-ids ${{ inputs.core_ids }}"
9999
fi
100100
101-
# CORE_IDS_ARG="--core-ids CORE-000001 CORE-000002 CORE-000003 CORE-000004" # TODO: Remove this before merging to main
101+
CORE_IDS_ARG="--core-ids CORE-000001 CORE-000002 CORE-000003 CORE-000004 CORE-000005 CORE-000006" # TODO: Remove this before merging to main
102102
103103
./venv/bin/python engine/scripts/validate_published_rules.py \
104104
--rules-root "$(pwd)/open-rules" \
@@ -116,7 +116,7 @@ jobs:
116116
with:
117117
name: published-rules-validation-${{ github.run_id }}
118118
path: |
119-
open-rules/Published/**/results/results.json
119+
open-rules/Published/**/results/results.csv
120120
summary_table.md
121121
detail_report.md
122122
if-no-files-found: warn

cdisc_rules_engine/constants/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
NULL_FLAVORS = ["", None, {}, {None}, [], [None], np.nan]
77

8-
KNOWN_REPORT_EXTENSIONS = [".json", ".xlsx", ".xls"]
8+
KNOWN_REPORT_EXTENSIONS = [".json", ".xlsx", ".xls", ".csv"]
99

1010
VALIDATION_FORMATS_MESSAGE = (
1111
"SAS V5 XPT, Dataset-JSON (JSON or NDJSON), or Excel (XLSX)"

cdisc_rules_engine/enums/report_types.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@
44
class ReportTypes(BaseEnum):
55
XLSX = "XLSX"
66
JSON = "JSON"
7+
CSV = "CSV"

cdisc_rules_engine/services/reporting/base_report_data.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from abc import ABC
1+
from abc import ABC, abstractmethod
22
from io import IOBase
33
from typing import Iterable
44

@@ -53,3 +53,11 @@ def process_values(values: list[str]) -> list[str]:
5353
else:
5454
processed_values.append(value)
5555
return processed_values
56+
57+
@abstractmethod
58+
def get_csv_rows(self) -> tuple[list[str], list[list[str]]]:
59+
"""
60+
Return (header, rows) for the CSV output format.
61+
Each row is a list of string values matching the header columns.
62+
"""
63+
pass
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import csv
2+
import os
3+
from io import IOBase
4+
from typing import override
5+
6+
from cdisc_rules_engine.enums.report_types import ReportTypes
7+
from cdisc_rules_engine.models.validation_args import Validation_args
8+
from cdisc_rules_engine.services.reporting.base_report_data import BaseReportData
9+
10+
from .base_report import BaseReport
11+
12+
13+
class CsvReport(BaseReport):
14+
"""
15+
Writes a results.csv file in the format defined by the report standard,
16+
compatible with the cdisc-open-rules test harness baselines.
17+
"""
18+
19+
def __init__(
20+
self,
21+
report_standard: BaseReportData,
22+
args: Validation_args,
23+
template: IOBase | None = None,
24+
):
25+
super().__init__(report_standard, args, template)
26+
27+
@property
28+
@override
29+
def _file_ext(self) -> str:
30+
return ReportTypes.CSV.value.lower()
31+
32+
@override
33+
def write_report(self) -> None:
34+
output_dir = os.path.dirname(self._output_name)
35+
if output_dir:
36+
os.makedirs(output_dir, exist_ok=True)
37+
38+
header, rows = self._report_standard.get_csv_rows()
39+
40+
with open(self._output_name, "w", newline="", encoding="utf-8") as fh:
41+
writer = csv.writer(fh)
42+
writer.writerow(header)
43+
writer.writerows(rows)

cdisc_rules_engine/services/reporting/report_factory.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from .base_report import BaseReport
1414
from .excel_report import ExcelReport
1515
from .json_report import JsonReport
16+
from .csv_report import CsvReport
1617

1718

1819
class ReportFactory:
@@ -46,6 +47,7 @@ def __init__(
4647
self._output_type_service_map: dict[str, Type[BaseReport]] = {
4748
ReportTypes.XLSX.value: ExcelReport,
4849
ReportTypes.JSON.value: JsonReport,
50+
ReportTypes.CSV.value: CsvReport,
4951
}
5052
self._standard_type_map: dict[str, Type[BaseReportData]] = {
5153
"usdm": USDMReportData,

cdisc_rules_engine/services/reporting/sdtm_report_data.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,18 @@ def _generate_error_details(
347347
)
348348
return errors
349349

350+
def get_csv_rows(self) -> tuple[list[str], list[list[str]]]:
351+
header = ["Dataset", "Record", "Variable", "Value"]
352+
rows = []
353+
for issue in self.data_sheets.get("Issue Details", []):
354+
dataset = (issue.get("dataset") or "").removesuffix(".csv")
355+
record = str(issue.get("row", ""))
356+
variables = issue.get("variables") or []
357+
values = issue.get("values") or []
358+
for variable, value in zip(variables, values):
359+
rows.append([dataset, record, variable, str(value)])
360+
return header, rows
361+
350362
def get_rules_report_data(self) -> list[dict]:
351363
"""
352364
Generates the rules report data that goes into the excel export.

cdisc_rules_engine/services/reporting/usdm_report_data.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,17 @@ def _generate_error_details(
245245
)
246246
return errors
247247

248+
def get_csv_rows(self) -> tuple[list[str], list[list[str]]]:
249+
header = ["path", "attribute", "value"]
250+
rows = []
251+
for issue in self.data_sheets.get("Issue Details", []):
252+
path = issue.get("path") or ""
253+
attributes = issue.get("attributes") or []
254+
values = issue.get("values") or []
255+
for attribute, value in zip(attributes, values):
256+
rows.append([path, attribute, str(value)])
257+
return header, rows
258+
248259
def get_rules_report_data(self) -> list[dict]:
249260
"""
250261
Generates the rules report data that goes into the excel export.

docs/cli-reference.md

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -69,14 +69,14 @@ python core.py validate --help
6969

7070
### Output
7171

72-
| Flag | Description |
73-
| -------------------------------------------- | --------------------------------------------------------------------------------------------------------------------- |
74-
| `-o, --output TEXT` | Output file path (without extension). Extension is added automatically based on format. |
75-
| `-of, --output-format [JSON\|XLSX]` | Output format. |
76-
| `-rr, --raw-report` | Raw output format (JSON only). |
77-
| `-mr, --max-report-rows INTEGER` | Max rows in the Issue Details tab of Excel output (default: 1000; 0 = unlimited). Also via `MAX_REPORT_ROWS` env var. |
78-
| `-me, --max-errors-per-rule INTEGER BOOLEAN` | Limit errors per rule. Format: `-me <limit> <per_dataset_flag>`. See below. |
79-
| `-rt, --report-template TEXT` | Path to a custom Excel report template. |
72+
| Flag | Description |
73+
| -------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------- |
74+
| `-o, --output TEXT` | Output file path (without extension). Extension is added automatically based on format. |
75+
| `-of, --output-format [JSON\|XLSX\|CSV]` | Output format. `CSV` writes issue rows directly (Dataset, Record, Variable, Value) compatible with the open-rules test harness. |
76+
| `-rr, --raw-report` | Raw output format (JSON only). |
77+
| `-mr, --max-report-rows INTEGER` | Max rows in the Issue Details tab of Excel output (default: 1000; 0 = unlimited). Also via `MAX_REPORT_ROWS` env var. |
78+
| `-me, --max-errors-per-rule INTEGER BOOLEAN` | Limit errors per rule. Format: `-me <limit> <per_dataset_flag>`. See below. |
79+
| `-rt, --report-template TEXT` | Path to a custom Excel report template. |
8080

8181
#### `--max-errors-per-rule` Detail
8282

0 commit comments

Comments
 (0)