From 2eb99be7670e26403d3c8aa0144234fd57f4a721 Mon Sep 17 00:00:00 2001 From: Samuel Johnson Date: Mon, 1 Jun 2026 15:59:04 -0400 Subject: [PATCH] linting changes --- .flake8 | 1 + .github/ISSUE_TEMPLATE/1-bug.yml | 4 +- .github/workflows/build-binary.yml | 39 ++++++- .github/workflows/check-schema-markdown.yml | 7 +- .github/workflows/deploy-rule-tester.yml | 9 +- .github/workflows/lint-format.yml | 61 ++++++++--- .github/workflows/prerelease-update-cache.yml | 3 +- .../workflows/prerelease-update-version.yml | 6 +- .github/workflows/test-suite.yml | 101 ++++++++++++++---- .markdownlint-cli2.yaml | 13 +++ .pre-commit-config.yaml | 17 ++- .yamllint.yaml | 13 +++ README.md | 22 ++-- cdisc_rules_engine/__init__.py | 1 - .../check_operators/dataframe_operators.py | 6 +- cdisc_rules_engine/interfaces/__init__.py | 1 - .../models/define/value_level_metadata.py | 12 +-- .../models/external_dictionaries_container.py | 1 - cdisc_rules_engine/rules_engine.py | 6 +- .../services/data_readers/__init__.py | 1 - .../services/logging/__init__.py | 1 - .../utilities/jsonata_processor.py | 2 +- cdisc_rules_engine/utilities/utils.py | 12 +-- docs/PYPI.md | 83 ++++++++++---- docs/README.md | 7 +- docs/build_executable.md | 71 +++++++----- docs/cli-reference.md | 34 ++++-- docs/contributing.md | 42 +++++--- docs/development.md | 39 ++++--- docs/faq.md | 99 ++++++++++++----- docs/index.md | 26 +++-- docs/quick-start.md | 34 ++++-- requirements-dev.txt | 6 +- scripts/merge_schema_markdown.md | 46 +++++--- .../test_Issues/test_CoreIssue208.py | 1 - tests/unit/test_adam_variable_reader.py | 2 +- tests/unit/test_csv_reader.py | 24 ++--- ...dataset_metadata_define_dataset_builder.py | 1 - .../test_domain_presence_define_builder.py | 1 - .../test_meddra/test_meddra_term.py | 2 +- .../test_meddra/test_meddra_terms_factory.py | 61 ++++++----- ...test_label_referenced_variable_metadata.py | 2 +- .../test_name_referenced_variable_metadata.py | 2 +- 43 files changed, 634 insertions(+), 288 deletions(-) create mode 100644 .markdownlint-cli2.yaml create mode 100644 .yamllint.yaml diff --git a/.flake8 b/.flake8 index 069c56121..894dcd5de 100644 --- a/.flake8 +++ b/.flake8 @@ -6,5 +6,6 @@ exclude = .github, .pytest_cache, cdisc_rules_engine/resources, venv, + .venv, build, dist diff --git a/.github/ISSUE_TEMPLATE/1-bug.yml b/.github/ISSUE_TEMPLATE/1-bug.yml index 82ba66361..98979f8f5 100644 --- a/.github/ISSUE_TEMPLATE/1-bug.yml +++ b/.github/ISSUE_TEMPLATE/1-bug.yml @@ -39,7 +39,9 @@ body: value: https://jira.cdisc.org/projects/CORERULES/issues/CORERULES- - type: markdown attributes: - value: "\nIn the next fields, please provide a [Minimal Reproducible Example](https://stackoverflow.com/help/minimal-reproducible-example)." + value: | + In the next fields, please provide a + [Minimal Reproducible Example](https://stackoverflow.com/help/minimal-reproducible-example). - type: input id: cli_command validations: diff --git a/.github/workflows/build-binary.yml b/.github/workflows/build-binary.yml index 8a257da1d..773d0d649 100644 --- a/.github/workflows/build-binary.yml +++ b/.github/workflows/build-binary.yml @@ -27,13 +27,46 @@ jobs: - name: Build Binary (Linux) if: runner.os == 'Linux' - run: pyinstaller --onedir --contents-directory "." core.py --icon=resources/assets/CORE_logo_sm.ico --dist ./dist/output/${{ inputs.name }} --collect-submodules pyreadstat --add-data=$pythonLocation/lib/python3.12/site-packages/xmlschema/schemas:xmlschema/schemas --add-data=resources/cache:resources/cache --add-data=resources/templates:resources/templates --add-data=resources/schema:resources/schema --add-data=resources/datasets:resources/datasets --add-data=resources/jsonata:resources/jsonata + run: >- + pyinstaller --onedir --contents-directory "." core.py + --icon=resources/assets/CORE_logo_sm.ico + --dist ./dist/output/${{ inputs.name }} + --collect-submodules pyreadstat + --add-data=$pythonLocation/lib/python3.12/site-packages/xmlschema/schemas:xmlschema/schemas + --add-data=resources/cache:resources/cache + --add-data=resources/templates:resources/templates + --add-data=resources/schema:resources/schema + --add-data=resources/datasets:resources/datasets + --add-data=resources/jsonata:resources/jsonata - name: Build Binary (Mac) if: runner.os == 'macOS' - run: pyinstaller --onedir --contents-directory "." core.py --icon=resources/assets/CORE_logo_sm.icns --dist ./dist/output/${{ inputs.name }} --collect-submodules pyreadstat --add-data=$pythonLocation/lib/python3.12/site-packages/xmlschema/schemas:xmlschema/schemas --add-data=resources/cache:resources/cache --add-data=resources/templates:resources/templates --add-data=resources/schema:resources/schema --add-data=resources/datasets:resources/datasets --add-data=resources/jsonata:resources/jsonata + run: >- + pyinstaller --onedir --contents-directory "." core.py + --icon=resources/assets/CORE_logo_sm.icns + --dist ./dist/output/${{ inputs.name }} + --collect-submodules pyreadstat + --add-data=$pythonLocation/lib/python3.12/site-packages/xmlschema/schemas:xmlschema/schemas + --add-data=resources/cache:resources/cache + --add-data=resources/templates:resources/templates + --add-data=resources/schema:resources/schema + --add-data=resources/datasets:resources/datasets + --add-data=resources/jsonata:resources/jsonata - name: Build Binary (Windows) if: runner.os == 'Windows' - run: pyinstaller --onedir --contents-directory "." core.py --icon=resources/assets/CORE_logo_sm.ico --dist ./dist/output/${{ inputs.name }} --collect-submodules pyreadstat --add-data="$env:pythonLocation\Lib\site-packages\xmlschema\schemas;xmlschema/schemas" --hidden-import numpy --hidden-import numpy.core._methods --hidden-import numpy.lib.format --add-data="resources/cache;resources/cache" --add-data="resources/templates;resources/templates" --add-data="resources/schema;resources/schema" --add-data="resources/datasets;resources/datasets" --add-data="resources/jsonata;resources/jsonata" + run: >- + pyinstaller --onedir --contents-directory "." core.py + --icon=resources/assets/CORE_logo_sm.ico + --dist ./dist/output/${{ inputs.name }} + --collect-submodules pyreadstat + --add-data="$env:pythonLocation\Lib\site-packages\xmlschema\schemas;xmlschema/schemas" + --hidden-import numpy + --hidden-import numpy.core._methods + --hidden-import numpy.lib.format + --add-data="resources/cache;resources/cache" + --add-data="resources/templates;resources/templates" + --add-data="resources/schema;resources/schema" + --add-data="resources/datasets;resources/datasets" + --add-data="resources/jsonata;resources/jsonata" - name: Archive Binary uses: actions/upload-artifact@v6 with: diff --git a/.github/workflows/check-schema-markdown.yml b/.github/workflows/check-schema-markdown.yml index 04e684864..d8b0098e7 100644 --- a/.github/workflows/check-schema-markdown.yml +++ b/.github/workflows/check-schema-markdown.yml @@ -29,9 +29,12 @@ jobs: run: | npm i prettier npx prettier resources/schema/rule-merged/*.json --write - - uses: CatChen/check-git-status-action@7b45cb4ce3e00a8bce4910dc2d5f2785235a6d7e # v2.1.2 + - uses: CatChen/check-git-status-action@7b45cb4ce3e00a8bce4910dc2d5f2785235a6d7e # v2.1.2 with: fail-if-not-clean: true request-changes-if-not-clean: ${{ github.event_name == 'pull_request' }} - request-changes-comment: Updated schema has not been merged with markdown descriptions. Please run the "Merge Schema with Markdown Descriptions" workflow to update the merged schema files. + request-changes-comment: >- + Updated schema has not been merged with markdown descriptions. + Please run the "Merge Schema with Markdown Descriptions" workflow + to update the merged schema files. targets: resources/schema/rule-merged diff --git a/.github/workflows/deploy-rule-tester.yml b/.github/workflows/deploy-rule-tester.yml index 908c45f3c..7267cfc9a 100644 --- a/.github/workflows/deploy-rule-tester.yml +++ b/.github/workflows/deploy-rule-tester.yml @@ -16,7 +16,11 @@ permissions: contents: read env: - creds: '{"clientId":"${{ vars.AZURE_CLIENT_ID }}","clientSecret":"${{ secrets.AZURE_CLIENT_SECRET }}","subscriptionId":"${{ vars.AZURE_SUBSCRIPTION_ID }}","tenantId":"${{ vars.AZURE_TENANT_ID }}"}' + creds: >- + {"clientId":"${{ vars.AZURE_CLIENT_ID }}", + "clientSecret":"${{ secrets.AZURE_CLIENT_SECRET }}", + "subscriptionId":"${{ vars.AZURE_SUBSCRIPTION_ID }}", + "tenantId":"${{ vars.AZURE_TENANT_ID }}"} functionAppName: cdisc-library-conformance-rules-generator-dev PYTHON_VERSION: "3.12" @@ -34,7 +38,8 @@ jobs: with: app-name: ${{ env.functionAppName }} mask-inputs: false - app-settings-json: '{"WEBSITE_ENABLE_SYNC_UPDATE_SITE": "1"}' # wait for this step to complete before the webapps-deploy step + # wait for this step to complete before the webapps-deploy step + app-settings-json: '{"WEBSITE_ENABLE_SYNC_UPDATE_SITE": "1"}' general-settings-json: '{"linuxFxVersion": "PYTHON|${{ env.PYTHON_VERSION }}"}' slot-name: ${{ vars.AZURE_WEBAPP_SLOT }} diff --git a/.github/workflows/lint-format.yml b/.github/workflows/lint-format.yml index 4c7f70fe0..f9494f89e 100644 --- a/.github/workflows/lint-format.yml +++ b/.github/workflows/lint-format.yml @@ -7,13 +7,14 @@ on: - main permissions: contents: read - jobs: get_changed_files: runs-on: ubuntu-latest outputs: py: ${{ steps.changes.outputs.py_all_changed_files }} - pretty: ${{ steps.changes.outputs.pretty_all_changed_files }} + yaml: ${{ steps.changes.outputs.yaml_all_changed_files }} + md: ${{ steps.changes.outputs.md_all_changed_files }} + json: ${{ steps.changes.outputs.json_all_changed_files }} steps: - name: Checkout repository uses: actions/checkout@v6 @@ -21,18 +22,19 @@ jobs: fetch-depth: 0 - name: Get changed files id: changes - uses: tj-actions/changed-files@7dee1b0c1557f278e5c7dc244927139d78c0e22a # v47.0.4 + uses: tj-actions/changed-files@7dee1b0c1557f278e5c7dc244927139d78c0e22a # v47.0.4 with: files_yaml: | py: - '**.py' - pretty: - - '**.json' - - '**.md' + yaml: - '**.yaml' - '**.yml' + md: + - '**.md' + json: + - '**.json' separator: " " - check_python_files: runs-on: ubuntu-latest needs: get_changed_files @@ -57,19 +59,50 @@ jobs: - name: Run black run: | black --check ${{needs.get_changed_files.outputs.py}} - - check_prettier_files: + check_json_files: runs-on: ubuntu-latest needs: get_changed_files - # only run if there are changed files - if: ${{needs.get_changed_files.outputs.pretty}} + if: ${{needs.get_changed_files.outputs.json}} + steps: + - name: Checkout repository + uses: actions/checkout@v6 + with: + fetch-depth: 0 + - name: Run json lint + run: | + for f in ${{needs.get_changed_files.outputs.json}}; do + python -m json.tool $f > /dev/null && echo "$f OK" || exit 1 + done + check_yaml_files: + runs-on: ubuntu-latest + needs: get_changed_files + if: ${{needs.get_changed_files.outputs.yaml}} + steps: + - name: Checkout repository + uses: actions/checkout@v6 + with: + fetch-depth: 0 + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: "3.12" + - name: Install yamllint + run: | + pip install yamllint -c requirements-dev.txt + - name: Run yamllint + run: | + yamllint ${{needs.get_changed_files.outputs.yaml}} + check_markdown_files: + runs-on: ubuntu-latest + needs: get_changed_files + if: ${{needs.get_changed_files.outputs.md}} steps: - name: Checkout repository uses: actions/checkout@v6 with: fetch-depth: 0 - uses: actions/setup-node@v6 - - name: Run prettier + - name: Run markdownlint run: | - npm i prettier - npx prettier --check ${{needs.get_changed_files.outputs.pretty}} + npm i markdownlint-cli2 + npx markdownlint-cli2 ${{needs.get_changed_files.outputs.md}} diff --git a/.github/workflows/prerelease-update-cache.yml b/.github/workflows/prerelease-update-cache.yml index ba225f5d1..3812afc94 100644 --- a/.github/workflows/prerelease-update-cache.yml +++ b/.github/workflows/prerelease-update-cache.yml @@ -13,7 +13,8 @@ jobs: with: # https://docs.github.com/en/authentication/connecting-to-github-with-ssh/generating-a-new-ssh-key-and-adding-it-to-the-ssh-agent#generating-a-new-ssh-key # git bash: ssh-keygen -t ed25519 -C "github-actions@cdisc.org" - # Add public key (.pub one) as a deploy key at Your repo -> Settings -> Security -> Deploy keys, check "Allow write access". + # Add public key (.pub one) as a deploy key at + # Your repo -> Settings -> Security -> Deploy keys, check "Allow write access". # Add private key as a secret at Your repo -> Settings -> Security -> Secrets and variables -> Actions ssh-key: ${{ secrets.GH_ACTION_PRIVATE_KEY }} diff --git a/.github/workflows/prerelease-update-version.yml b/.github/workflows/prerelease-update-version.yml index f0a9a07fb..6cefc9d30 100644 --- a/.github/workflows/prerelease-update-version.yml +++ b/.github/workflows/prerelease-update-version.yml @@ -21,9 +21,9 @@ jobs: uses: actions/checkout@v6 with: # https://docs.github.com/en/authentication/connecting-to-github-with-ssh/generating-a-new-ssh-key-and-adding-it-to-the-ssh-agent#generating-a-new-ssh-key - # git bash: ssh-keygen -t ed25519 -C "github-actions@cdisc.org" - # Add public key (.pub one) as a deploy key at Your repo -> Settings -> Security -> Deploy keys, check "Allow write access". - # Add private key as a secret at Your repo -> Settings -> Security -> Secrets and variables -> Actions + # Generate SSH key: ssh-keygen -t ed25519 -C "github-actions@cdisc.org" + # Add public key as deploy key (Settings -> Security -> Deploy keys, allow write access) + # Add private key as secret (Settings -> Security -> Secrets and variables -> Actions) ssh-key: ${{ secrets.GH_ACTION_PRIVATE_KEY }} - name: Set up Python diff --git a/.github/workflows/test-suite.yml b/.github/workflows/test-suite.yml index 0e33d67bd..fb3b479e2 100644 --- a/.github/workflows/test-suite.yml +++ b/.github/workflows/test-suite.yml @@ -51,13 +51,17 @@ jobs: CDISC_LIBRARY_API_KEY: ${{ secrets.CDISC_LIBRARY_API_KEY }} continue-on-error: true run: | - python core.py validate -s sdtmig -v 3-3 ${{ env.RULE_LIST }} -d CORE_Test_Suite/data -dxp CORE_Test_Suite/data/Define.xml -of json -o CORE_Test_Suite/pandas-results -l info || true + python core.py validate -s sdtmig -v 3-3 ${{ env.RULE_LIST }} \ + -d CORE_Test_Suite/data \ + -dxp CORE_Test_Suite/data/Define.xml \ + -of json -o CORE_Test_Suite/pandas-results -l info || true if [ -f "CORE_Test_Suite/pandas-results.json" ]; then echo "pandas_success=true" >> $GITHUB_OUTPUT echo "## Pandas Validation" >> $GITHUB_STEP_SUMMARY echo "✅ **Success**: Validation completed successfully" >> $GITHUB_STEP_SUMMARY - python CORE_Test_Suite/scripts/validation_summary.py CORE_Test_Suite/pandas-results.json >> $GITHUB_STEP_SUMMARY + python CORE_Test_Suite/scripts/validation_summary.py \ + CORE_Test_Suite/pandas-results.json >> $GITHUB_STEP_SUMMARY else echo "Failed to generate pandas-results.json" echo "pandas_success=false" >> $GITHUB_OUTPUT @@ -69,7 +73,12 @@ jobs: if: steps.pandas_run.outputs.pandas_success == 'true' continue-on-error: true run: | - python CORE_Test_Suite/scripts/comparison.py CORE_Test_Suite/pandas-results.json CORE_Test_Suite/CORE-Report.json CORE_Test_Suite/pandas_comparison.xlsx --mode test --json-output CORE_Test_Suite/pandas_comparison.json + python CORE_Test_Suite/scripts/comparison.py \ + CORE_Test_Suite/pandas-results.json \ + CORE_Test_Suite/CORE-Report.json \ + CORE_Test_Suite/pandas_comparison.xlsx \ + --mode test \ + --json-output CORE_Test_Suite/pandas_comparison.json echo "pandas_diff=$?" >> $GITHUB_ENV PANDAS_EXIT_CODE=$? @@ -84,7 +93,13 @@ jobs: if: steps.pandas_run.outputs.pandas_success == 'true' continue-on-error: true run: | - python CORE_Test_Suite/scripts/compare_implementations.py CORE_Test_Suite/pandas-results.json CORE_Test_Suite/CORE-Report.json CORE_Test_Suite/pandas_comparison.json --github-step-summary $GITHUB_STEP_SUMMARY --mode test + python CORE_Test_Suite/scripts/compare_implementations.py \ + CORE_Test_Suite/pandas-results.json \ + CORE_Test_Suite/CORE-Report.json \ + CORE_Test_Suite/pandas_comparison.json \ + --github-step-summary $GITHUB_STEP_SUMMARY \ + --mode test + - name: Run validation with Dask id: dask_run continue-on-error: true @@ -92,36 +107,52 @@ jobs: DATASET_SIZE_THRESHOLD: 0 CDISC_LIBRARY_API_KEY: ${{ secrets.CDISC_LIBRARY_API_KEY }} run: | - python core.py validate -s sdtmig -v 3-3 ${{ env.RULE_LIST }} -d CORE_Test_Suite/data -dxp CORE_Test_Suite/data/Define.xml -of json -o CORE_Test_Suite/dask-results -l info || true + python core.py validate -s sdtmig -v 3-3 ${{ env.RULE_LIST }} \ + -d CORE_Test_Suite/data \ + -dxp CORE_Test_Suite/data/Define.xml \ + -of json -o CORE_Test_Suite/dask-results -l info || true if [ -f "CORE_Test_Suite/dask-results.json" ]; then echo "dask_success=true" >> $GITHUB_OUTPUT echo "## Dask Validation" >> $GITHUB_STEP_SUMMARY echo "✅ **Success**: Validation completed successfully" >> $GITHUB_STEP_SUMMARY - python CORE_Test_Suite/scripts/validation_summary.py dask-results.json >> $GITHUB_STEP_SUMMARY + python CORE_Test_Suite/scripts/validation_summary.py \ + dask-results.json >> $GITHUB_STEP_SUMMARY else echo "Failed to generate dask-results.json" echo "dask_success=false" >> $GITHUB_OUTPUT echo "## Dask Validation" >> $GITHUB_STEP_SUMMARY - echo "❌ **Failed**: No results file was generated" >> $GITHUB_STEP_SUMMARY + echo "❌ **Failed**: No results file was generated" >> $GITHUB_STEP_SUMMARY fi + - name: Dask comparison script continue-on-error: true if: steps.dask_run.outputs.dask_success == 'true' run: | - python CORE_Test_Suite/scripts/comparison.py CORE_Test_Suite/dask-results.json CORE_Test_Suite/CORE-Report.json CORE_Test_Suite/dask_comparison.xlsx --mode test --json-output CORE_Test_Suite/dask_comparison.json + python CORE_Test_Suite/scripts/comparison.py \ + CORE_Test_Suite/dask-results.json \ + CORE_Test_Suite/CORE-Report.json \ + CORE_Test_Suite/dask_comparison.xlsx \ + --mode test \ + --json-output CORE_Test_Suite/dask_comparison.json DASK_EXIT_CODE=$? echo "dask_diff=$DASK_EXIT_CODE" >> $GITHUB_ENV if [ $DASK_EXIT_CODE -eq 0 ]; then echo "Dask comparison completed successfully (no differences)" else echo "Dask comparison found differences" + fi - name: Generate dask comparison summary if: steps.dask_run.outputs.dask_success == 'true' continue-on-error: true run: | - python CORE_Test_Suite/scripts/compare_implementations.py CORE_Test_Suite/dask-results.json CORE_Test_Suite/CORE-Report.json CORE_Test_Suite/dask_comparison.json --github-step-summary $GITHUB_STEP_SUMMARY --mode test + python CORE_Test_Suite/scripts/compare_implementations.py \ + CORE_Test_Suite/dask-results.json \ + CORE_Test_Suite/CORE-Report.json \ + CORE_Test_Suite/dask_comparison.json \ + --github-step-summary $GITHUB_STEP_SUMMARY \ + --mode test ################################# # USDM TEST SUITE VALIDATION @@ -129,7 +160,8 @@ jobs: - name: Parse USDM rule list run: | - usdm_rules=$(cat CORE_Test_Suite/rulelist/USDM_Test_Suite_Rules.txt | sed 's/\r$//' | sed 's/^/-r /' | tr '\n' ' ') + usdm_rules=$(cat CORE_Test_Suite/rulelist/USDM_Test_Suite_Rules.txt \ + | sed 's/\r$//' | sed 's/^/-r /' | tr '\n' ' ') echo "USDM_RULE_LIST=$usdm_rules" >> $GITHUB_ENV echo "USDM rules: $usdm_rules" @@ -137,13 +169,16 @@ jobs: id: usdm_neg continue-on-error: true run: | - python core.py validate -s usdm -v 4-0 ${{ env.USDM_RULE_LIST }} -dp CORE_Test_Suite/usdm_data/USDM_Test_Suite_negative.json -of json -o CORE_Test_Suite/usdm_negative_report -l error || true + python core.py validate -s usdm -v 4-0 ${{ env.USDM_RULE_LIST }} \ + -dp CORE_Test_Suite/usdm_data/USDM_Test_Suite_negative.json \ + -of json -o CORE_Test_Suite/usdm_negative_report -l error || true if [ -f "CORE_Test_Suite/usdm_negative_report.json" ]; then echo "usdm_neg_success=true" >> $GITHUB_OUTPUT echo "## USDM Negative" >> $GITHUB_STEP_SUMMARY echo "**Success**: Negative test passed" >> $GITHUB_STEP_SUMMARY - python CORE_Test_Suite/scripts/validation_summary.py CORE_Test_Suite/usdm_negative_report.json >> $GITHUB_STEP_SUMMARY + python CORE_Test_Suite/scripts/validation_summary.py \ + CORE_Test_Suite/usdm_negative_report.json >> $GITHUB_STEP_SUMMARY else echo "usdm_neg_success=false" >> $GITHUB_OUTPUT echo "**Failed**: No results for negative test" >> $GITHUB_STEP_SUMMARY @@ -153,7 +188,12 @@ jobs: if: steps.usdm_neg.outputs.usdm_neg_success == 'true' continue-on-error: true run: | - python CORE_Test_Suite/scripts/comparison.py CORE_Test_Suite/usdm_negative_report.json CORE_Test_Suite/USDM_Negative_Result.json CORE_Test_Suite/usdm_negative_comparison.xlsx --mode test --json-output CORE_Test_Suite/usdm_negative_comparison.json + python CORE_Test_Suite/scripts/comparison.py \ + CORE_Test_Suite/usdm_negative_report.json \ + CORE_Test_Suite/USDM_Negative_Result.json \ + CORE_Test_Suite/usdm_negative_comparison.xlsx \ + --mode test \ + --json-output CORE_Test_Suite/usdm_negative_comparison.json USDM_NEG_EXIT_CODE=$? echo "usdm_neg_diff=$USDM_NEG_EXIT_CODE" >> $GITHUB_ENV if [ $USDM_NEG_EXIT_CODE -eq 0 ]; then @@ -166,19 +206,27 @@ jobs: if: steps.usdm_neg.outputs.usdm_neg_success == 'true' continue-on-error: true run: | - python CORE_Test_Suite/scripts/compare_implementations.py CORE_Test_Suite/usdm_negative_report.json CORE_Test_Suite/USDM_Negative_Result.json CORE_Test_Suite/usdm_negative_comparison.json --github-step-summary $GITHUB_STEP_SUMMARY --mode test + python CORE_Test_Suite/scripts/compare_implementations.py \ + CORE_Test_Suite/usdm_negative_report.json \ + CORE_Test_Suite/USDM_Negative_Result.json \ + CORE_Test_Suite/usdm_negative_comparison.json \ + --github-step-summary $GITHUB_STEP_SUMMARY \ + --mode test - name: Run USDM validation (Positive) id: usdm_pos continue-on-error: true run: | - python core.py validate -s usdm -v 4-0 ${{ env.USDM_RULE_LIST }} -dp CORE_Test_Suite/usdm_data/USDM_Test_Suite_positive.json -of json -o CORE_Test_Suite/usdm_positive_report -l error || true + python core.py validate -s usdm -v 4-0 ${{ env.USDM_RULE_LIST }} \ + -dp CORE_Test_Suite/usdm_data/USDM_Test_Suite_positive.json \ + -of json -o CORE_Test_Suite/usdm_positive_report -l error || true if [ -f "CORE_Test_Suite/usdm_positive_report.json" ]; then echo "usdm_pos_success=true" >> $GITHUB_OUTPUT echo "## USDM Positive" >> $GITHUB_STEP_SUMMARY echo "**Success**: Positive test passed" >> $GITHUB_STEP_SUMMARY - python CORE_Test_Suite/scripts/validation_summary.py CORE_Test_Suite/usdm_positive_report.json >> $GITHUB_STEP_SUMMARY + python CORE_Test_Suite/scripts/validation_summary.py \ + CORE_Test_Suite/usdm_positive_report.json >> $GITHUB_STEP_SUMMARY else echo "usdm_pos_success=false" >> $GITHUB_OUTPUT echo "**Failed**: No results for positive test" >> $GITHUB_STEP_SUMMARY @@ -188,7 +236,12 @@ jobs: if: steps.usdm_pos.outputs.usdm_pos_success == 'true' continue-on-error: true run: | - python CORE_Test_Suite/scripts/comparison.py CORE_Test_Suite/usdm_positive_report.json CORE_Test_Suite/USDM_Positive_Result.json CORE_Test_Suite/usdm_positive_comparison.xlsx --mode test --json-output CORE_Test_Suite/usdm_positive_comparison.json + python CORE_Test_Suite/scripts/comparison.py \ + CORE_Test_Suite/usdm_positive_report.json \ + CORE_Test_Suite/USDM_Positive_Result.json \ + CORE_Test_Suite/usdm_positive_comparison.xlsx \ + --mode test \ + --json-output CORE_Test_Suite/usdm_positive_comparison.json USDM_POS_EXIT_CODE=$? echo "usdm_pos_diff=$USDM_POS_EXIT_CODE" >> $GITHUB_ENV if [ $USDM_POS_EXIT_CODE -eq 0 ]; then @@ -201,7 +254,12 @@ jobs: if: steps.usdm_pos.outputs.usdm_pos_success == 'true' continue-on-error: true run: | - python CORE_Test_Suite/scripts/compare_implementations.py CORE_Test_Suite/usdm_positive_report.json CORE_Test_Suite/USDM_Positive_Result.json CORE_Test_Suite/usdm_positive_comparison.json --github-step-summary $GITHUB_STEP_SUMMARY --mode test + python CORE_Test_Suite/scripts/compare_implementations.py \ + CORE_Test_Suite/usdm_positive_report.json \ + CORE_Test_Suite/USDM_Positive_Result.json \ + CORE_Test_Suite/usdm_positive_comparison.json \ + --github-step-summary $GITHUB_STEP_SUMMARY \ + --mode test ####################### # UPLOAD ALL RESULTS @@ -238,11 +296,12 @@ jobs: USDM_NEG_DIFF="${{ env.usdm_neg_diff }}" USDM_POS_DIFF="${{ env.usdm_pos_diff }}" - - if [[ "$PANDAS_DIFF" == "1" || "$DASK_DIFF" == "1" || "$USDM_NEG_DIFF" == "1" || "$USDM_POS_DIFF" == "1" ]]; then + if [[ "$PANDAS_DIFF" == "1" || "$DASK_DIFF" == "1" \ + || "$USDM_NEG_DIFF" == "1" || "$USDM_POS_DIFF" == "1" ]]; then echo "Differences found in one or more comparisons" exit 1 - elif [[ "$PANDAS_DIFF" == "0" && "$DASK_DIFF" == "0" && "$USDM_NEG_DIFF" == "0" && "$USDM_POS_DIFF" == "0" ]]; then + elif [[ "$PANDAS_DIFF" == "0" && "$DASK_DIFF" == "0" \ + && "$USDM_NEG_DIFF" == "0" && "$USDM_POS_DIFF" == "0" ]]; then echo "No differences found in any comparison" exit 0 else diff --git a/.markdownlint-cli2.yaml b/.markdownlint-cli2.yaml new file mode 100644 index 000000000..03bc2806d --- /dev/null +++ b/.markdownlint-cli2.yaml @@ -0,0 +1,13 @@ +config: + MD013: + line_length: 120 + tables: false + code_blocks: false + headings: false + MD024: + siblings_only: true + MD029: false + MD033: false + MD041: false +ignores: + - "resources/**" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e3d7cf0c4..1b387343a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,11 +1,24 @@ repos: - repo: https://github.com/ambv/black - rev: 24.10.0 + rev: 26.5.1 hooks: - id: black language_version: python3 - repo: https://github.com/pycqa/flake8 - rev: 6.1.0 + rev: 7.3.0 hooks: - id: flake8 language_version: python3 + - repo: https://github.com/adrienverge/yamllint + rev: v1.38.0 + hooks: + - id: yamllint + args: [--strict] + - repo: https://github.com/DavidAnson/markdownlint-cli2 + rev: v0.22.1 + hooks: + - id: markdownlint-cli2 + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v6.0.0 + hooks: + - id: check-json diff --git a/.yamllint.yaml b/.yamllint.yaml new file mode 100644 index 000000000..e3debde96 --- /dev/null +++ b/.yamllint.yaml @@ -0,0 +1,13 @@ +--- +extends: default +rules: + line-length: + max: 120 + document-start: disable + truthy: + allowed-values: ["true", "false", "on"] + new-lines: disable + trailing-spaces: disable +ignore: | + resources/** + .github/workflows/build-version.yml diff --git a/README.md b/README.md index ae529c53a..87bf240e1 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,17 @@ +# cdisc-rules-engine +

- + CDISC CORE Logo

-[![](https://img.shields.io/badge/python-3.12-blue.svg)](https://www.python.org/downloads/release/python-3120) [![](https://img.shields.io/pypi/v/cdisc-rules-engine.svg)](https://pypi.org/project/cdisc-rules-engine) [![](https://img.shields.io/docker/v/cdiscdocker/cdisc-rules-engine?label=docker)](https://hub.docker.com/r/cdiscdocker/cdisc-rules-engine) - -# cdisc-rules-engine +[![Python 3.12](https://img.shields.io/badge/python-3.12-blue.svg)](https://www.python.org/downloads/release/python-3120) +[![PyPI](https://img.shields.io/pypi/v/cdisc-rules-engine.svg)](https://pypi.org/project/cdisc-rules-engine) +[![Docker](https://img.shields.io/docker/v/cdiscdocker/cdisc-rules-engine?label=docker)](https://hub.docker.com/r/cdiscdocker/cdisc-rules-engine) -Open source offering of the CDISC Rules Engine, a tool designed for validating clinical trial data against data standards. +Open source offering of the CDISC Rules Engine, a tool designed for validating clinical +trial data against data standards. ## Quick Start Documentation @@ -28,12 +31,15 @@ Full documentation lives in the [`docs/`](docs/index.md) directory and is hosted ### Questions or Need Help? -If you need any assistance or encounter errors during setup, check or reach out via our [Q&A](https://github.com/cdisc-org/cdisc-rules-engine/discussions/categories/q-a?discussions_q=) board +If you need any assistance or encounter errors during setup, check or reach out via our +[Q&A](https://github.com/cdisc-org/cdisc-rules-engine/discussions/categories/q-a?discussions_q=) +board. ### Submit an Issue -If you encounter any bugs or have feature requests please submit an issue on our GitHub repository: -[https://github.com/cdisc-org/cdisc-rules-engine/issues](https://github.com/cdisc-org/cdisc-rules-engine/issues) +If you encounter any bugs or have feature requests please submit an issue on our +GitHub repository: +[GitHub Issues](https://github.com/cdisc-org/cdisc-rules-engine/issues) When submitting an issue, please include: diff --git a/cdisc_rules_engine/__init__.py b/cdisc_rules_engine/__init__.py index 33a5c99ad..68aae553f 100644 --- a/cdisc_rules_engine/__init__.py +++ b/cdisc_rules_engine/__init__.py @@ -1,5 +1,4 @@ from .plugin_loader import PluginLoader - loader = PluginLoader() loader.load() diff --git a/cdisc_rules_engine/check_operators/dataframe_operators.py b/cdisc_rules_engine/check_operators/dataframe_operators.py index 5625b5a81..7564de1e4 100644 --- a/cdisc_rules_engine/check_operators/dataframe_operators.py +++ b/cdisc_rules_engine/check_operators/dataframe_operators.py @@ -476,10 +476,8 @@ def _get_string_part_series(self, part_to_validate: str, length: int, target: st elif part_to_validate == "prefix": series_to_validate = self.value[target].str.slice(stop=length) else: - raise ValueError( - f"Invalid part to validate: {part_to_validate}. \ - Valid values are: suffix, prefix" - ) + raise ValueError(f"Invalid part to validate: {part_to_validate}. \ + Valid values are: suffix, prefix") series_to_validate = series_to_validate.mask(pd.isna(self.value[target])) return series_to_validate diff --git a/cdisc_rules_engine/interfaces/__init__.py b/cdisc_rules_engine/interfaces/__init__.py index 5febc42d5..11c9e77f7 100644 --- a/cdisc_rules_engine/interfaces/__init__.py +++ b/cdisc_rules_engine/interfaces/__init__.py @@ -9,7 +9,6 @@ from .dictionary_term_interface import DictionaryTermInterface from .terms_factory_interface import TermsFactoryInterface - __all__ = [ "CacheServiceInterface", "ConditionInterface", diff --git a/cdisc_rules_engine/models/define/value_level_metadata.py b/cdisc_rules_engine/models/define/value_level_metadata.py index 7d0e2baff..530663147 100644 --- a/cdisc_rules_engine/models/define/value_level_metadata.py +++ b/cdisc_rules_engine/models/define/value_level_metadata.py @@ -164,12 +164,12 @@ def isdatetime(dataframe): return True try: datetime.fromisoformat(dataframe[self.item.Name]) - except: + except Exception: try: datetime.fromisoformat( dataframe[self.item.Name].replace("Z", "+00:00") ) - except: + except Exception: return False return True return True @@ -182,7 +182,7 @@ def isdate(dataframe): return True try: datetime.date.fromisoformat(dataframe[self.item.Name]) - except: + except Exception: return False return True @@ -194,7 +194,7 @@ def istime(dataframe): return True try: datetime.time.fromisoformat(dataframe[self.item.Name]) - except: + except Exception: return False return True @@ -206,12 +206,12 @@ def is_incomplete(dataframe): return True try: datetime.fromisoformat(dataframe[self.item.Name]) - except: + except Exception: try: datetime.fromisoformat( dataframe[self.item.Name].replace("Z", "+00:00") ) - except: + except Exception: return True return False return False diff --git a/cdisc_rules_engine/models/external_dictionaries_container.py b/cdisc_rules_engine/models/external_dictionaries_container.py index c434b7f28..3fa14bad0 100644 --- a/cdisc_rules_engine/models/external_dictionaries_container.py +++ b/cdisc_rules_engine/models/external_dictionaries_container.py @@ -14,7 +14,6 @@ ) from cdisc_rules_engine.exceptions.custom_exceptions import UnsupportedDictionaryType - DICTIONARY_VALIDATORS = { DictionaryTypes.MEDDRA.value: MedDRAValidator, DictionaryTypes.LOINC.value: LoincValidator, diff --git a/cdisc_rules_engine/rules_engine.py b/cdisc_rules_engine/rules_engine.py index 8860ff8d5..e524707d7 100644 --- a/cdisc_rules_engine/rules_engine.py +++ b/cdisc_rules_engine/rules_engine.py @@ -275,16 +275,14 @@ def validate_single_dataset( ] except Exception as e: logger.trace(e) - logger.error( - f"""Error occurred during validation. + logger.error(f"""Error occurred during validation. Error: {e} Error Type: {type(e)} Error Message: {str(e)} Dataset Name: {dataset_metadata.name} Rule ID: {rule.get("core_id", "unknown")} Full traceback: {traceback.format_exc()} - """ - ) + """) error_obj: ValidationErrorContainer = self.handle_validation_exceptions( e, dataset_metadata.name ) diff --git a/cdisc_rules_engine/services/data_readers/__init__.py b/cdisc_rules_engine/services/data_readers/__init__.py index 9fe2b3c11..d397c72ea 100644 --- a/cdisc_rules_engine/services/data_readers/__init__.py +++ b/cdisc_rules_engine/services/data_readers/__init__.py @@ -3,5 +3,4 @@ from .parquet_reader import ParquetReader from .dataset_json_reader import DatasetJSONReader - __all__ = ["DataReaderFactory", "XPTReader", "DatasetJSONReader", "ParquetReader"] diff --git a/cdisc_rules_engine/services/logging/__init__.py b/cdisc_rules_engine/services/logging/__init__.py index 13bba08bb..a62550e62 100644 --- a/cdisc_rules_engine/services/logging/__init__.py +++ b/cdisc_rules_engine/services/logging/__init__.py @@ -1,7 +1,6 @@ from .console_logger import ConsoleLogger from .logging_service_factory import LoggingServiceFactory - __all__ = [ "ConsoleLogger", "LoggingServiceFactory", diff --git a/cdisc_rules_engine/utilities/jsonata_processor.py b/cdisc_rules_engine/utilities/jsonata_processor.py index ad550909b..8af903c85 100644 --- a/cdisc_rules_engine/utilities/jsonata_processor.py +++ b/cdisc_rules_engine/utilities/jsonata_processor.py @@ -94,7 +94,7 @@ def execute_jsonata_rule( @staticmethod @cache def get_all_custom_functions( - jsonata_custom_functions: tuple[()] | tuple[tuple[str, str], ...] + jsonata_custom_functions: tuple[()] | tuple[tuple[str, str], ...], ): builtins_and_customs = [ ("utils", DefaultFilePaths.JSONATA_UTILS.value), diff --git a/cdisc_rules_engine/utilities/utils.py b/cdisc_rules_engine/utilities/utils.py index 02a69a1fe..9b014d33a 100644 --- a/cdisc_rules_engine/utilities/utils.py +++ b/cdisc_rules_engine/utilities/utils.py @@ -250,9 +250,9 @@ def get_meddra_code_term_pairs_cache_key(meddra_path: str) -> str: return f"meddra_valid_code_term_pairs_{meddra_path}" -def get_item_index_by_condition[ - T -](list_of_dicts: List[T], condition: Callable[[T], bool]) -> Optional[int]: +def get_item_index_by_condition[T]( + list_of_dicts: List[T], condition: Callable[[T], bool] +) -> Optional[int]: """ Uses linear search to return index of element in unsorted list which applies to the condition. @@ -262,9 +262,9 @@ def get_item_index_by_condition[ return index -def search_in_list[ - T -](list_of_dicts: List[T], condition: Callable[[T], bool]) -> Optional[T]: +def search_in_list[T]( + list_of_dicts: List[T], condition: Callable[[T], bool] +) -> Optional[T]: """ Returns an element of unsorted list that applies to the condition. """ diff --git a/docs/PYPI.md b/docs/PYPI.md index d0d0ffd16..58d648d90 100644 --- a/docs/PYPI.md +++ b/docs/PYPI.md @@ -1,12 +1,15 @@ # PyPI Integration -CORE is available as a Python package for direct integration into your own pipelines and tooling. +CORE is available as a Python package for direct integration into your own pipelines +and tooling. ```bash pip install cdisc-rules-engine ``` -This installs the engine underlying the CLI and executable, but **does not include `core.py`** or the CLI entrypoints. If you need the full CLI, use the [executable or source code](quick-start.md) instead. +This installs the engine underlying the CLI and executable, but **does not include +`core.py`** or the CLI entrypoints. If you need the full CLI, use the +[executable or source code](quick-start.md) instead. --- @@ -14,10 +17,16 @@ This installs the engine underlying the CLI and executable, but **does not inclu Installing the package alone is not enough to run validations. You also need: -1. **The rules cache** — download the contents of `resources/cache/` from the [repository](https://github.com/cdisc-org/cdisc-rules-engine) and store them somewhere in your project. Keep this in sync with the package version you're using. -2. **A CDISC Library API key** — required for controlled terminology and library metadata. See [update-cache](cli-reference.md#updating-the-cache-update-cache) for how to obtain one. +1. **The rules cache** — download the contents of `resources/cache/` from the + [repository](https://github.com/cdisc-org/cdisc-rules-engine) and store them + somewhere in your project. Keep this in sync with the package version you're using. +2. **A CDISC Library API key** — required for controlled terminology and library + metadata. See [update-cache](cli-reference.md#updating-the-cache-update-cache) + for how to obtain one. -The package also includes the USDM and Dataset-JSON schemas, available if you use the dataset reader classes in `cdisc_rules_engine/services/data_readers` or the metadata readers in `cdisc_rules_engine/services`. +The package also includes the USDM and Dataset-JSON schemas, available if you use the +dataset reader classes in `cdisc_rules_engine/services/data_readers` or the metadata +readers in `cdisc_rules_engine/services`. --- @@ -72,7 +81,9 @@ cache = load_rules_cache("path/to/rules/cache") rules = cache.get_all_by_prefix(get_rules_cache_key("sdtmig", "3-4")) ``` -Each rule is a dict with keys: `core_id`, `domains`, `author`, `reference`, `sensitivity`, `executability`, `description`, `authorities`, `standards`, `classes`, `rule_type`, `conditions`, `actions`, `datasets`, `output_variables`. +Each rule is a dict with keys: `core_id`, `domains`, `author`, `reference`, +`sensitivity`, `executability`, `description`, `authorities`, `standards`, `classes`, +`rule_type`, `conditions`, `actions`, `datasets`, `output_variables`. If you have rules in raw CDISC metadata format, convert them first: @@ -143,7 +154,11 @@ for rule in ae_rules: value_level_metadata=None, ) try: - was_triggered = run(rule=rule, defined_variables=dataset_variable, defined_actions=core_actions) + was_triggered = run( + rule=rule, + defined_variables=dataset_variable, + defined_actions=core_actions + ) if was_triggered: all_results.extend(results) except Exception as e: @@ -166,7 +181,11 @@ for rule in ae_rules: ## Option B: RulesEngine Class -More setup, but handles dataset reading, preprocessing, and multi-domain validation. The source code in `cdisc_rules_engine/rules_engine.py` and the existing CLI implementation in `core.py` are the best reference for wiring this together — the initializer arguments map closely to the CLI flags documented in the [CLI Reference](cli-reference.md). +More setup, but handles dataset reading, preprocessing, and multi-domain validation. +The source code in `cdisc_rules_engine/rules_engine.py` and the existing CLI +implementation in `core.py` are the best reference for wiring this together — the +initializer arguments map closely to the CLI flags documented in the +[CLI Reference](cli-reference.md). ### Step 1: Prepare Dataset Metadata @@ -195,7 +214,8 @@ datasets = [ ] ``` -You don't need to manually create `PandasDataset` or `DatasetVariable` objects for Option B — the engine handles this internally. +You don't need to manually create `PandasDataset` or `DatasetVariable` objects for +Option B — the engine handles this internally. ### Step 2: Initialize Library Metadata @@ -212,8 +232,13 @@ standard = "sdtmig" standard_version = "3-4" standard_substandard = None -standard_metadata = cache.get(get_standard_details_cache_key(standard, standard_version, standard_substandard)) -model_metadata = cache.get(get_model_details_cache_key_from_ig(standard_metadata)) if standard_metadata else {} +standard_metadata = cache.get( + get_standard_details_cache_key(standard, standard_version, standard_substandard) +) +model_metadata = ( + cache.get(get_model_details_cache_key_from_ig(standard_metadata)) + if standard_metadata else {} +) ct_packages = ["sdtmct-2021-12-17"] # replace with your CT package versions ct_package_metadata = {pkg: cache.get(pkg) for pkg in ct_packages} @@ -221,8 +246,16 @@ ct_package_metadata = {pkg: cache.get(pkg) for pkg in ct_packages} library_metadata = LibraryMetadataContainer( standard_metadata=standard_metadata, model_metadata=model_metadata, - variables_metadata=cache.get(get_library_variables_metadata_cache_key(standard, standard_version, standard_substandard)), - variable_codelist_map=cache.get(get_variable_codelist_map_cache_key(standard, standard_version, standard_substandard)), + variables_metadata=cache.get( + get_library_variables_metadata_cache_key( + standard, standard_version, standard_substandard + ) + ), + variable_codelist_map=cache.get( + get_variable_codelist_map_cache_key( + standard, standard_version, standard_substandard + ) + ), ct_package_metadata=ct_package_metadata, ) ``` @@ -273,7 +306,8 @@ rules_engine = RulesEngine( ### Step 5: Run Validation -Note the `ConditionCompositeFactory` conversion step — this is required before passing rules to `validate_single_rule`: +Note the `ConditionCompositeFactory` conversion step — this is required before passing +rules to `validate_single_rule`: ```python import time @@ -286,7 +320,9 @@ validation_results = [] for rule in rules: try: if isinstance(rule["conditions"], dict): - rule["conditions"] = ConditionCompositeFactory.get_condition_composite(rule["conditions"]) + rule["conditions"] = ConditionCompositeFactory.get_condition_composite( + rule["conditions"] + ) results = rules_engine.validate_single_rule(rule, datasets) flattened = [r for domain_results in results.values() for r in domain_results] validation_results.append(RuleValidationResult(rule, flattened)) @@ -335,11 +371,16 @@ reporting_services = reporting_factory.get_report_services() **Cache key format** — always use dashes in version strings (`3-4`, not `3.4`). -**`column_prefix_map`** — maps the `--` variable prefix to the dataset domain (e.g. `{"--": "AE"}`), resolving placeholders like `--SEQ` → `AESEQ`. +**`column_prefix_map`** — maps the `--` variable prefix to the dataset domain +(e.g. `{"--": "AE"}`), resolving placeholders like `--SEQ` → `AESEQ`. -**External dictionaries** — pass an `ExternalDictionariesContainer` to `RulesEngine` if validating rules that require MedDRA, WHODrug, LOINC, UNII, MedRT, or SNOMED. See the [External Dictionary Reference](https://cdisc-org.github.io/conformance-rules-editor/#/exdictionary). +**External dictionaries** — pass an `ExternalDictionariesContainer` to `RulesEngine` +if validating rules that require MedDRA, WHODrug, LOINC, UNII, MedRT, or SNOMED. +See the +[External Dictionary Reference](https://cdisc-org.github.io/cdisc-open-rules/#/exdictionary). -**Dask** — set `max_dataset_size=0` when initializing `DataServiceFactory` to force Dask processing for all datasets. +**Dask** — set `max_dataset_size=0` when initializing `DataServiceFactory` to force +Dask processing for all datasets. **Windows compatibility** — add `freeze_support()` for multiprocessing: @@ -361,7 +402,9 @@ if __name__ == "__main__": - `full_path` must be set in `SDTMDatasetMetadata` when using the `RulesEngine` approach - The rule's `domains.Include` must match your dataset's domain - `standard_version` format must be consistent throughout (`3-4`, not `3.4`) -- CT package metadata must be present in the cache if validating against controlled terminology +- CT package metadata must be present in the cache if validating against controlled + terminology - When using `define.xml`, the file must be named `define.xml` and the path must be valid - If using external dictionaries, verify all file paths are correct and accessible -- Don't forget the `ConditionCompositeFactory` conversion before calling `validate_single_rule` (Option B) +- Don't forget the `ConditionCompositeFactory` conversion before calling + `validate_single_rule` (Option B) diff --git a/docs/README.md b/docs/README.md index 0dcf0e37c..487bd1770 100644 --- a/docs/README.md +++ b/docs/README.md @@ -8,6 +8,7 @@ [![PyPI](https://img.shields.io/pypi/v/cdisc-rules-engine.svg)](https://pypi.org/project/cdisc-rules-engine) [![Docker](https://img.shields.io/docker/v/cdiscdocker/cdisc-rules-engine?label=docker)](https://hub.docker.com/r/cdiscdocker/cdisc-rules-engine) -# CDISC Rules Engine (CORE) - -Open source offering of the CDISC Conformance Rules Engine — a tool for validating clinical trial data against CDISC data standards. CORE validates study data structure and conformance against both published CDISC conformance rules for the various CDISC standards and custom rules authored in the CORE rule format. +Open source offering of the CDISC Conformance Rules Engine — a tool for validating +clinical trial data against CDISC data standards. CORE validates study data structure +and conformance against both published CDISC conformance rules for the various CDISC +standards and custom rules authored in the CORE rule format. diff --git a/docs/build_executable.md b/docs/build_executable.md index ccea09cce..070c63e9c 100644 --- a/docs/build_executable.md +++ b/docs/build_executable.md @@ -1,22 +1,26 @@ # Building CDISC Rules Engine Executable -Pre-built executables for each release are available on the Releases page. If you need to build your own there are two approaches. +Pre-built executables for each release are available on the Releases page. +If you need to build your own there are two approaches. ## Option 1: Using GitHub Actions (Recommended) ### Step 1: Fork the Repository and Setup -1. Fork the repository: https://github.com/cdisc-org/cdisc-rules-engine -2. The workflow file `.github/workflows/build-version.yml` is already included in the main repository. It is contained within our .gitignore so you can customize it as you see fit. +1. Fork the repository: [cdisc-rules-engine](https://github.com/cdisc-org/cdisc-rules-engine) +2. The workflow file `.github/workflows/build-version.yml` is already included in the main + repository. It is contained within our .gitignore so you can customize it as you see fit. ### Step 2: Add your API Key 1. Go to the top bar of the fork, click Settings > Security > Secrets and Variables > Actions -2. Click **New Repository Secret** and set an action secret named CDISC_LIBRARY_API_KEY and secret as your API key +2. Click **New Repository Secret** and set an action secret named CDISC_LIBRARY_API_KEY + and secret as your API key ### Step 3: Run the Build -Go to the **Actions** tab → **Build Custom Executable** → **Run workflow**. Download the artifact when complete. +Go to the **Actions** tab → **Build Custom Executable** → **Run workflow**. +Download the artifact when complete. ### Step 3: Automated Builds (Optional) @@ -37,12 +41,14 @@ schedule: - Docker Desktop installed and running - Git -- **Note**: There is no official support for a macOS docker runner; Windows also requires some additional setup -- **Note**: You will need to run Windows Command Prompt / Windows Powershell as administrator. This can be done by right clicking and the application and selecting 'Run as Administrator' +- **Note**: There is no official support for a macOS docker runner; Windows also requires + some additional setup +- **Note**: You will need to run Windows Command Prompt / Windows Powershell as administrator. + This can be done by right clicking and the application and selecting 'Run as Administrator' ### Step 1: Clone Repository -#### Linux/macOS/WSL/Windows Command Prompt/Powershell: +#### Linux/macOS/WSL/Windows Command Prompt/Powershell ```bash git clone https://github.com/cdisc-org/cdisc-rules-engine.git @@ -51,9 +57,12 @@ cd cdisc-rules-engine ### Step 1.5: Update cache and code -When you clone the repo initially, it will come with an updated cache and main branch. Before subsequent local docker builds, you will want to follow the README to install the compatible python version of engine, create the virtual environment, and then update the cache as well as pulling down changes from main in cdisc-rules-engine root directory. +When you clone the repo initially, it will come with an updated cache and main branch. +Before subsequent local docker builds, you will want to follow the README to install the +compatible python version of engine, create the virtual environment, and then update the +cache as well as pulling down changes from main in cdisc-rules-engine root directory. -#### Linux/macOS/WSL/Git Bash/Windows Command Prompt & PowerShell: +#### Linux/macOS/WSL/Git Bash/Windows Command Prompt & PowerShell ```bash # Set up upstream remote (only done once) @@ -65,7 +74,7 @@ git pull upstream main ### Step 2: Build with Docker -#### Linux/macOS/WSL/Git Bash: +#### Linux/macOS/WSL/Git Bash ```bash # Build the executable @@ -83,7 +92,7 @@ chmod +x ./build-output/core echo "Executable ready: ./build-output/core" ``` -#### Windows Command Prompt: +#### Windows Command Prompt ```cmd REM Build the executable @@ -104,7 +113,7 @@ del temp_id.txt echo Executable ready: ./build-output/core ``` -#### Windows PowerShell: +#### Windows PowerShell ```powershell # Build the executable @@ -119,15 +128,17 @@ docker rm $CONTAINER_ID ## Customizing the Build for Your Environment -The default Dockerfile builds for Ubuntu 22.04 on AMD64 architecture. To customize for your specific environment, modify these sections in Dockerfile.build: +The default Dockerfile builds for Ubuntu 22.04 on AMD64 architecture. To customize for +your specific environment, modify these sections in Dockerfile.build: ### Change Target Operating System -To change what underlying OS the executable is built on to match your implementation needs, you will need to edit the dockerfile +To change what underlying OS the executable is built on to match your implementation +needs, you will need to edit the dockerfile: -- https://docs.docker.com/reference/dockerfile/#from -- **Windows**: https://hub.docker.com/r/microsoft/windows -- **macOS**: https://hub.docker.com/search - you can explore DockerHub to find a macOS image to utilize +- [Dockerfile FROM reference](https://docs.docker.com/reference/dockerfile/#from) +- **Windows**: [microsoft/windows on Docker Hub](https://hub.docker.com/r/microsoft/windows) +- **macOS**: [Search Docker Hub](https://hub.docker.com/search) for a macOS image to utilize You will need to edit these areas of the dockerfile: @@ -140,7 +151,8 @@ FROM --platform=linux/amd64 ubuntu:22.04 ### Update PyInstaller Output Path -If you change the base OS, update the PyInstaller dist path. this is for clarity and organization, but it's not technically required for functionality: +If you change the base OS, update the PyInstaller dist path. This is for clarity and +organization, but it's not technically required for functionality: ```dockerfile # Change the --dist path in the pyinstaller command (around line 20) @@ -183,7 +195,8 @@ docker cp "${CONTAINER_ID}:/app/dist/output/core-ubuntu-22.04/core" ./build-outp ### Executability -Currently the Dockerfile.build is ubuntu and we give the file executable permissions. This may not be required depending on your OS. +Currently the Dockerfile.build is ubuntu and we give the file executable permissions. +This may not be required depending on your OS. ```bash RUN chmod +x /app/dist/output/core-ubuntu-22.04/core/core && \ @@ -193,7 +206,8 @@ RUN chmod +x /app/dist/output/core-ubuntu-22.04/core/core && \ ### Windows Users -- **Recommended**: Use WSL (Windows Subsystem for Linux) or Git Bash for the best experience with the bash commands +- **Recommended**: Use WSL (Windows Subsystem for Linux) or Git Bash for the best + experience with the bash commands - The `chmod +x` command is not needed on Windows as executable permissions work differently - If using Command Prompt, some syntax differs from bash (variable assignment, echo commands) @@ -205,15 +219,20 @@ RUN chmod +x /app/dist/output/core-ubuntu-22.04/core/core && \ ### Cross-Platform Alternative -For the most consistent experience across all platforms, consider using the **GitHub Actions approach (Option 1)**, which handles platform differences automatically and doesn't require local Docker setup. +For the most consistent experience across all platforms, consider using the +**GitHub Actions approach (Option 1)**, which handles platform differences automatically +and doesn't require local Docker setup. ## Troubleshooting ### Architecture Issues -You can build executables for different operating systems using GitHub's hosted runners. This creates platform-specific executables that work on different environments. See: +You can build executables for different operating systems using GitHub's hosted runners. +This creates platform-specific executables that work on different environments. See: -- https://docs.github.com/en/actions/concepts/runners/github-hosted-runners -- https://github.com/actions/runner-images +- [GitHub-hosted runners](https://docs.github.com/en/actions/concepts/runners/github-hosted-runners) +- [Runner images](https://github.com/actions/runner-images) -The runner in our workflow currently builds for ubuntu-22.04 but this can be changed to your particular OS, as well as CPU architectures (This will be different for Apple M chips that use ARM architecture versus Intel chips) +The runner in our workflow currently builds for ubuntu-22.04 but this can be changed to +your particular OS, as well as CPU architectures (This will be different for Apple M chips +that use ARM architecture versus Intel chips) diff --git a/docs/cli-reference.md b/docs/cli-reference.md index dabe4eb17..52c190c7d 100644 --- a/docs/cli-reference.md +++ b/docs/cli-reference.md @@ -1,6 +1,7 @@ # CLI Reference -> Throughout this reference, examples use `python core.py`. If you're using the pre-built executable, replace this with `.\core.exe` (Windows) or `./core` (Linux/Mac). +> Throughout this reference, examples use `python core.py`. If you're using the pre-built +> executable, replace this with `.\core.exe` (Windows) or `./core` (Linux/Mac). --- @@ -85,9 +86,12 @@ python core.py validate --help -me 100 True # Hard limit per dataset per rule ``` -- `False` (default): After each dataset, if cumulative errors for a rule meet the limit, that rule stops processing further datasets. -- `True`: Limits reported issues to `` per dataset per rule. The rule still executes on all datasets. -- Can also be set via `MAX_ERRORS_PER_RULE` env var. The larger of the env var and CLI values is used. If either sets `per_dataset_flag` to `True`, it will be `True`. +- `False` (default): After each dataset, if cumulative errors for a rule meet the limit, + that rule stops processing further datasets. +- `True`: Limits reported issues to `` per dataset per rule. + The rule still executes on all datasets. +- Can also be set via `MAX_ERRORS_PER_RULE` env var. The larger of the env var and CLI + values is used. If either sets `per_dataset_flag` to `True`, it will be `True`. ### Performance & Behavior @@ -115,7 +119,8 @@ The **Rules Report** tab in the output summarizes the outcome for each rule: ### Large Dataset Processing (Dask) -CORE uses Dask instead of pandas for datasets exceeding 1/4 of available RAM. To force Dask for all datasets: +CORE uses Dask instead of pandas for datasets exceeding 1/4 of available RAM. +To force Dask for all datasets: **Linux/Mac:** @@ -131,7 +136,7 @@ $env:DATASET_SIZE_THRESHOLD=0; .\core.exe validate -s sdtmig -v 3-4 -d C:\path\t Or create a `.env` file in the root directory (See `.env.example`): -``` +```text DATASET_SIZE_THRESHOLD=0 ``` @@ -145,11 +150,16 @@ Download and refresh locally cached rules, controlled terminology, and metadata. python core.py update-cache ``` -An API key is required for metadata and CT. Rules are accessible without a key. Set your key via the `CDISC_LIBRARY_API_KEY` environment variable or in a `.env` file in the root directory (no quotes needed around the value). +An API key is required for metadata and CT. Rules are accessible without a key. Set your +key via the `CDISC_LIBRARY_API_KEY` environment variable or in a `.env` file in the root +directory (no quotes needed around the value). -To obtain an API key: [wiki.cdisc.org — Getting Started](https://wiki.cdisc.org/display/LIBSUPRT/Getting+Started%3A+Access+to+CDISC+Library+API+using+API+Key+Authentication) +To obtain an API key: +[wiki.cdisc.org — Getting Started](https://wiki.cdisc.org/display/LIBSUPRT/Getting+Started%3A+Access+to+CDISC+Library+API+using+API+Key+Authentication) -> **Firewall note:** CORE connects to `api.library.cdisc.org` on port 443. If you see SSL certificate verification errors, contact your IT department to obtain the corporate CA bundle or request whitelisting for this hostname. +> **Firewall note:** CORE connects to `api.library.cdisc.org` on port 443. If you see SSL +> certificate verification errors, contact your IT department to obtain the corporate CA +> bundle or request whitelisting for this hostname. ### Options @@ -187,7 +197,8 @@ python core.py update-cache --remove-custom-rules ALL ### Custom Standards -Custom standards map a standard identifier to a list of applicable rule IDs. Add rules to the cache first, then create a standard that references them. +Custom standards map a standard identifier to a list of applicable rule IDs. Add rules +to the cache first, then create a standard that references them. **Standard JSON format:** @@ -197,7 +208,8 @@ Custom standards map a standard identifier to a list of applicable rule IDs. Add } ``` -Custom standards can also reference CDISC standard names to inherit library metadata while using custom rules: +Custom standards can also reference CDISC standard names to inherit library metadata +while using custom rules: ```json { diff --git a/docs/contributing.md b/docs/contributing.md index d53a3315e..f85f341f9 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -1,20 +1,25 @@ # Contributing -Thank you for your interest in contributing to CORE! There are two main ways to contribute: **rule contributions** (via `cdisc-open-rules`) and **engine contributions** (code, tests, documentation in this repository). +Thank you for your interest in contributing to CORE! There are two main ways to +contribute: **rule contributions** (via `cdisc-open-rules`) and **engine contributions** +(code, tests, documentation in this repository). --- ## Rule Contributions -Conformance rules are maintained separately in [`cdisc-open-rules`](https://github.com/cdisc-org/cdisc-open-rules). If you want to: +Conformance rules are maintained separately in +[`cdisc-open-rules`](https://github.com/cdisc-org/cdisc-open-rules). If you want to: - Propose a new conformance rule - Report an issue with an existing rule's logic - Contribute a rule implementation -Please open an issue or pull request in that repository. Rule authoring can also be done through the hosted [CORE Rule Editor](https://cdisc-org.github.io/conformance-rules-editor). +Please open an issue or pull request in that repository. Rule authoring can also be done +through the hosted [CORE Rule Editor](https://cdisc-org.github.io/conformance-rules-editor). -For questions about rule contribution workflows, post in [GitHub Discussions](https://github.com/cdisc-org/cdisc-rules-engine/discussions). +For questions about rule contribution workflows, post in +[GitHub Discussions](https://github.com/cdisc-org/cdisc-rules-engine/discussions). --- @@ -22,7 +27,8 @@ For questions about rule contribution workflows, post in [GitHub Discussions](ht ### Setting Up the Development Environment -Follow the [Development → Environment Setup](development.md#environment-setup) guide to clone the repository and install dependencies. +Follow the [Development → Environment Setup](development.md#environment-setup) guide to +clone the repository and install dependencies. ### Code Style @@ -32,15 +38,19 @@ This project enforces consistent formatting and linting via pre-commit hooks. - [`black`](https://black.readthedocs.io/) — Python code formatter - [`flake8`](https://flake8.pycqa.org/) — Python linter -- [`prettier`](https://prettier.io/) — JSON, YAML, and Markdown formatter +- [`yamllint`](https://yamllint.readthedocs.io/) — YAML linter +- [`markdownlint-cli2`](https://github.com/DavidAnson/markdownlint-cli2) — Markdown linter +- [`check-json`](https://github.com/pre-commit/pre-commit-hooks) — JSON syntax validator -Both `black` and `flake8` are included in `requirements-dev.txt`. After installing dependencies, install the pre-commit hooks: +All tools except `markdownlint-cli2` are included in `requirements-dev.txt`. +After installing dependencies, install the pre-commit hooks: ```bash pre-commit install ``` -This installs the hooks into `.git/hooks/` so formatting and linting run automatically on each commit. +This installs the hooks into `.git/hooks/` so formatting and linting run automatically +on each commit. To run the checks manually: @@ -54,7 +64,8 @@ pre-commit run --all-files python -m pytest tests ``` -This runs both unit and regression tests. All tests must pass before submitting a pull request. +This runs both unit and regression tests. All tests must pass before submitting a pull +request. ### Submitting a Pull Request @@ -62,15 +73,18 @@ This runs both unit and regression tests. All tests must pass before submitting 2. Make your changes, following the code style guidelines above. 3. Add or update tests for any changed behavior. 4. Ensure all tests pass locally. -5. Open a pull request with a clear description of the change and the motivation behind it. +5. Open a pull request with a clear description of the change and the motivation behind + it. -For larger changes or new features, consider opening a GitHub Discussion or issue first to align on the approach. +For larger changes or new features, consider opening a GitHub Discussion or issue first +to align on the approach. --- ## Reporting Bugs & Requesting Features -Use [GitHub Issues](https://github.com/cdisc-org/cdisc-rules-engine/issues) to report bugs or request features. +Use [GitHub Issues](https://github.com/cdisc-org/cdisc-rules-engine/issues) to report +bugs or request features. When reporting a bug, please include: @@ -83,4 +97,6 @@ When reporting a bug, please include: ## Questions & Discussion -For general questions, use the [Q&A discussion board](https://github.com/cdisc-org/cdisc-rules-engine/discussions/categories/q-a). Please search existing discussions before opening a new one. +For general questions, use the +[Q&A discussion board](https://github.com/cdisc-org/cdisc-rules-engine/discussions/categories/q-a). +Please search existing discussions before opening a new one. diff --git a/docs/development.md b/docs/development.md index bb4c27427..1b3b9430a 100644 --- a/docs/development.md +++ b/docs/development.md @@ -1,6 +1,7 @@ # Development -This page covers integrating CORE as a library, building from source, running tests, creating executables, and packaging. +This page covers integrating CORE as a library, building from source, running tests, +creating executables, and packaging. --- @@ -24,7 +25,8 @@ For implementation details, see [PYPI.md](./PYPI.md). ## Environment Setup -**Python 3.12 is required.** Other versions are not supported and may produce unexpected errors or incorrect validation results. +**Python 3.12 is required.** Other versions are not supported and may produce unexpected +errors or incorrect validation results. ```bash # Check your Python version @@ -64,7 +66,10 @@ python -m pytest tests ## Creating an Executable -Pre-built executables are available on the [Releases page](https://github.com/cdisc-org/cdisc-rules-engine/releases). If you need to build your own, see [README_Build_Executable.md](./build_executable.md) in the repository root. +Pre-built executables are available on the +[Releases page](https://github.com/cdisc-org/cdisc-rules-engine/releases). +If you need to build your own, see [README_Build_Executable.md](./build_executable.md) +in the repository root. For reference, the PyInstaller commands are: @@ -88,7 +93,8 @@ pyinstaller core.py ^ --add-data="resources/jsonata;resources/jsonata" ``` -The executable is created in the `dist/` folder and does not require Python to be installed on the target machine. +The executable is created in the `dist/` folder and does not require Python to be +installed on the target machine. --- @@ -140,32 +146,36 @@ py -m twine upload --repository {repository_name} dist/* ## Updating the USDM JSON Schema -CORE validates against USDM JSON Schema versions 3.0 and 4.0. Schema definitions are stored as `.pkl` files in `resources/cache/`: +CORE validates against USDM JSON Schema versions 3.0 and 4.0. Schema definitions are +stored as `.pkl` files in `resources/cache/`: - `resources/cache/usdm-3-0-schema.pkl` - `resources/cache/usdm-4-0-schema.pkl` -These are derived from the OpenAPI specs in [`cdisc-org/DDF-RA`](https://github.com/cdisc-org/DDF-RA). To update or add a schema version: +These are derived from the OpenAPI specs in +[`cdisc-org/DDF-RA`](https://github.com/cdisc-org/DDF-RA). +To update or add a schema version: 1. Extract the OpenAPI spec for the target tag: - ```bash - git --no-pager --git-dir DDF-RA.git show --format=format:"%B" {tag}:Deliverables/API/USDM_API.json > USDM_API_{version}.json - ``` +```bash + git --no-pager --git-dir DDF-RA.git show --format=format:"%B" \ + {tag}:Deliverables/API/USDM_API.json > USDM_API_{version}.json +``` Example tag: `v3.0.0` 2. Convert the OpenAPI spec to JSON Schema: - ```bash +```bash python scripts/openapi-to-json.py - ``` +``` 3. Convert the JSON Schema to `.pkl`: - ```bash +```bash python scripts/json_pkl_converter.py - ``` +``` 4. Place the resulting `.pkl` file in `resources/cache/`. @@ -173,7 +183,8 @@ These are derived from the OpenAPI specs in [`cdisc-org/DDF-RA`](https://github. ## Dataset Format Reference (JSON) -When validating a single rule with `--local-rules`, JSON datasets must match the Dataset-JSON format used by the rule editor: +When validating a single rule with `--local-rules`, JSON datasets must match the +Dataset-JSON format used by the rule editor: ```json { diff --git a/docs/faq.md b/docs/faq.md index 1227743e8..fbae1ff54 100644 --- a/docs/faq.md +++ b/docs/faq.md @@ -1,6 +1,8 @@ # FAQ & Troubleshooting -> Still stuck? Post in the [Q&A discussion board](https://github.com/cdisc-org/cdisc-rules-engine/discussions/categories/q-a) or [open an issue](https://github.com/cdisc-org/cdisc-rules-engine/issues). +> Still stuck? Post in the +> [Q&A discussion board](https://github.com/cdisc-org/cdisc-rules-engine/discussions/categories/q-a) +> or [open an issue](https://github.com/cdisc-org/cdisc-rules-engine/issues). --- @@ -8,7 +10,8 @@ ### Which Python version does CORE require? -**Python 3.12 is required.** Other versions are not supported and may cause unexpected errors or incorrect validation results. +**Python 3.12 is required.** Other versions are not supported and may cause unexpected +errors or incorrect validation results. ```bash python --version @@ -26,7 +29,9 @@ xattr -rd com.apple.quarantine . ### I get `[SSL: CERTIFICATE_VERIFY_FAILED]` when running `update-cache` -This is typically caused by a corporate firewall performing SSL inspection. CORE connects to `api.library.cdisc.org` on port 443. Contact your IT department to either obtain the corporate CA certificate bundle or request whitelisting for that hostname. +This is typically caused by a corporate firewall performing SSL inspection. CORE connects +to `api.library.cdisc.org` on port 443. Contact your IT department to either obtain the +corporate CA certificate bundle or request whitelisting for that hostname. --- @@ -34,27 +39,40 @@ This is typically caused by a corporate firewall performing SSL inspection. CORE ### Do I need an API key? -An API key is required for controlled terminology and library metadata. **Rules are accessible without a key.** Running `update-cache` without a key will still populate conformance rules. +An API key is required for controlled terminology and library metadata. **Rules are +accessible without a key.** Running `update-cache` without a key will still populate +conformance rules. -To obtain a key: [wiki.cdisc.org — Getting Started](https://wiki.cdisc.org/display/LIBSUPRT/Getting+Started%3A+Access+to+CDISC+Library+API+using+API+Key+Authentication) +To obtain a key: +[wiki.cdisc.org — Getting Started](https://wiki.cdisc.org/display/LIBSUPRT/Getting+Started%3A+Access+to+CDISC+Library+API+using+API+Key+Authentication) > Note: It can take up to an hour after sign-up for a key to be issued. ### Where do I put my API key? -Set it as the `CDISC_LIBRARY_API_KEY` environment variable, or add it (without quotes) to a `.env` file in the project root directory: +Set it as the `CDISC_LIBRARY_API_KEY` environment variable, or add it (without quotes) +to a `.env` file in the project root directory: -``` +```text CDISC_LIBRARY_API_KEY=your_key_here ``` ### My validation returned no results or unexpected rules -- **Console output / logs:** By default, engine logs are disabled. Use `-l` / `--log-level` to enable them. Available levels: `info`, `debug`, `warn`, `error`, `critical`. -- **The output report:** Open the results file and review the **Rule Report** tab (XLSX) or the top-level `Rules_Report` array (JSON). Rules with a status of `SKIPPED` will include a reason in the Issue Details — this is often the cause of unexpectedly absent results. -- **Scope flags:** Confirm that your `-s`, `-v`, and for TIG `-ss` arguments match the standard, version, and substandard you intended to validate against. A mismatch will cause rules to be silently out of scope. - -If you're still not seeing expected results after checking the above, post in the [Q&A discussion board](https://github.com/cdisc-org/cdisc-rules-engine/discussions/categories/q-a) and include the relevant log output and the rule IDs you expected to run. +- **Console output / logs:** By default, engine logs are disabled. Use `-l` / + `--log-level` to enable them. Available levels: `info`, `debug`, `warn`, `error`, + `critical`. +- **The output report:** Open the results file and review the **Rule Report** tab (XLSX) + or the top-level `Rules_Report` array (JSON). Rules with a status of `SKIPPED` will + include a reason in the Issue Details — this is often the cause of unexpectedly absent + results. +- **Scope flags:** Confirm that your `-s`, `-v`, and for TIG `-ss` arguments match the + standard, version, and substandard you intended to validate against. A mismatch will + cause rules to be silently out of scope. + +If you're still not seeing expected results after checking the above, post in the +[Q&A discussion board](https://github.com/cdisc-org/cdisc-rules-engine/discussions/categories/q-a) +and include the relevant log output and the rule IDs you expected to run. --- @@ -77,7 +95,12 @@ CORE defaults to `utf-8`. If your files use a different encoding, specify it: python core.py validate -s sdtmig -v 3-4 -dp path/to/dataset.xpt -e cp1252 ``` -> NOTE: you may notice a `'utf-9' codec can't decode byte` error in the logs. This is usually due to Windows Smart Quotes, produced in excel, which are CP1252 encoded, not utf-8. Unfortunately, Windows Smart Quotes produce a file that is mostly utf-8 with some CP1252 for the smart quotes so the -e command will not work to resolve this. You will need to locate these quotes and manually change them before being able to rerun this data. +> NOTE: you may notice a `'utf-9' codec can't decode byte` error in the logs. This is +> usually due to Windows Smart Quotes, produced in Excel, which are CP1252 encoded, not +> utf-8. Unfortunately, Windows Smart Quotes produce a file that is mostly utf-8 with +> some CP1252 for the smart quotes so the `-e` command will not work to resolve this. +> You will need to locate these quotes and manually change them before being able to +> rerun this data. ### Will using -d pointed at my data directory cause CORE to include my Define-XML file in the validation? @@ -85,7 +108,8 @@ No. Define-XML must be provided separately via `--define-xml-path` (`-dxp`). ### Validation is very slow on large files -Set `DATASET_SIZE_THRESHOLD=0` to force Dask processing for all datasets regardless of size: +Set `DATASET_SIZE_THRESHOLD=0` to force Dask processing for all datasets regardless +of size: ```bash # Linux/Mac @@ -97,15 +121,17 @@ $env:DATASET_SIZE_THRESHOLD=0; .\core.exe validate -s sdtmig -v 3-4 -d C:\path\t Or add to a `.env` file in the root directory: -``` +```text DATASET_SIZE_THRESHOLD=0 ``` -By default the engine uses Dask automatically for datasets exceeding 1/4 of available RAM. +By default the engine uses Dask automatically for datasets exceeding 1/4 of available +RAM. ### How do I validate against TIG? -TIG requires `--substandard` and, in the case of custom domains, `--use-case` to identify what use case the custom domains are applicable to. +TIG requires `--substandard` and, in the case of custom domains, `--use-case` to +identify what use case the custom domains are applicable to. ```bash python core.py validate -s tig -v 1-0 -ss SDTM -uc INDH -d /path/to/datasets @@ -126,7 +152,8 @@ python core.py validate -s sdtmig -v 3-4 -d /data -r CORE-000001 -r CORE-000002 python core.py validate -s sdtmig -v 3-4 -d /data -er CORE-000001 ``` -You can view and clone the CDISC CORE rules at [cdisc-open-rules](https://github.com/cdisc-org/cdisc-open-rules) +You can view and clone the CDISC CORE rules at +[cdisc-open-rules](https://github.com/cdisc-org/cdisc-open-rules). ## Privacy & Data Protection @@ -134,12 +161,19 @@ You can view and clone the CDISC CORE rules at [cdisc-open-rules](https://github **No. All input data remains local to the machine where CORE is executed.** Specifically: -- Study files are read directly from the local filesystem (or a specified input path) and are never uploaded or transmitted anywhere. -- Validation runs entirely in-process on the local machine (or whatever environment CORE is deployed to — on-premises server, cloud VM, container, etc.). -- The output report is written locally upon completion (or to a specified output directory). -- All metadata required for rule execution (controlled terminology packages, standard metadata, etc.) is pre-fetched via `update-cache` and bundled at release time. Rule Validation execution itself requires no outbound network calls carrying study data. +- Study files are read directly from the local filesystem (or a specified input path) + and are never uploaded or transmitted anywhere. +- Validation runs entirely in-process on the local machine (or whatever environment + CORE is deployed to — on-premises server, cloud VM, container, etc.). +- The output report is written locally upon completion (or to a specified output + directory). +- All metadata required for rule execution (controlled terminology packages, standard + metadata, etc.) is pre-fetched via `update-cache` and bundled at release time. Rule + Validation execution itself requires no outbound network calls carrying study data. -**No patient or personal data ever leaves the environment where CORE is installed**, supporting compliance with data protection requirements such as HIPAA, GDPR, and sponsor data governance policies. +**No patient or personal data ever leaves the environment where CORE is installed**, +supporting compliance with data protection requirements such as HIPAA, GDPR, and sponsor +data governance policies. --- @@ -148,18 +182,27 @@ You can view and clone the CDISC CORE rules at [cdisc-open-rules](https://github ### What's the difference between custom rules and custom standards? - **Custom rules** are individual rule definitions stored in the cache by CORE ID. -- **Custom standards** map a standard identifier to a list of rule IDs, acting as a lookup for which rules apply. +- **Custom standards** map a standard identifier to a list of rule IDs, acting as a + lookup for which rules apply. -Add your custom rules first, then create a standard that references them. See [CLI Reference → update-cache](cli-reference.md#custom-rules) for details. Custom Rules & Standards continue to be a work in progress, there are tickets within CORE's Issues to full implement further support for them in the future. +Add your custom rules first, then create a standard that references them. See +[CLI Reference → update-cache](cli-reference.md#custom-rules) for details. Custom Rules +& Standards continue to be a work in progress, there are tickets within CORE's Issues +to fully implement further support for them in the future. ### Can a custom standard use CDISC library metadata? -Yes. If you name your custom standard after an existing CDISC standard (e.g. `sdtmig/3-4`), CORE will fetch library metadata for that standard while applying your custom rules. +Yes. If you name your custom standard after an existing CDISC standard +(e.g. `sdtmig/3-4`), CORE will fetch library metadata for that standard while applying +your custom rules. --- ## Still Need Help? -- **Search existing Q&A:** [GitHub Discussions](https://github.com/cdisc-org/cdisc-rules-engine/discussions/categories/q-a) +- **Search existing Q&A:** + [GitHub Discussions](https://github.com/cdisc-org/cdisc-rules-engine/discussions/categories/q-a) - **Open a new discussion:** For questions or usage help -- **Open an issue:** [GitHub Issues](https://github.com/cdisc-org/cdisc-rules-engine/issues) for bugs or feature requests +- **Open an issue:** + [GitHub Issues](https://github.com/cdisc-org/cdisc-rules-engine/issues) + for bugs or feature requests diff --git a/docs/index.md b/docs/index.md index a6f428cab..a9b20c652 100644 --- a/docs/index.md +++ b/docs/index.md @@ -8,13 +8,15 @@ [![PyPI](https://img.shields.io/pypi/v/cdisc-rules-engine.svg)](https://pypi.org/project/cdisc-rules-engine) [![Docker](https://img.shields.io/docker/v/cdiscdocker/cdisc-rules-engine?label=docker)](https://hub.docker.com/r/cdiscdocker/cdisc-rules-engine) -CORE is the open-source offering of the CDISC Conformance Rules Engine — a tool for validating clinical trial data against CDISC data standards. +CORE is the open-source offering of the CDISC Conformance Rules Engine — a tool for +validating clinical trial data against CDISC data standards. --- ## Scope -CORE validates study datasets against published CDISC conformance rules for the following standards: +CORE validates study datasets against published CDISC conformance rules for the following +standards: | Standard | Description | | ---------------------- | ------------------------------------------------ | @@ -25,7 +27,9 @@ CORE validates study datasets against published CDISC conformance rules for the | **FDA Business Rules** | FDA submission conformance rules | | **USDM** | Unified Study Definitions Model | -CORE validates data _structure and conformance_ against published rules. It is not a replacement for clinical review, statistical analysis, or submission readiness assessment. Rule logic is defined in [`cdisc-open-rules`](https://github.com/cdisc-org/cdisc-open-rules). +CORE validates data _structure and conformance_ against published rules. It is not a +replacement for clinical review, statistical analysis, or submission readiness assessment. +Rule logic is defined in [`cdisc-open-rules`](https://github.com/cdisc-org/cdisc-open-rules). --- @@ -45,8 +49,14 @@ CORE validates data _structure and conformance_ against published rules. It is n ## Community & Support -- **Questions & Discussions:** [GitHub Discussions — Q&A](https://github.com/cdisc-org/cdisc-rules-engine/discussions/categories/q-a) -- **Bug Reports & Feature Requests:** [GitHub Issues](https://github.com/cdisc-org/cdisc-rules-engine/issues) -- **Rule Contributions:** [cdisc-open-rules](https://github.com/cdisc-org/cdisc-open-rules) -- **CDISC Library API:** [wiki.cdisc.org — Getting Started](https://wiki.cdisc.org/display/LIBSUPRT/Getting+Started%3A+Access+to+CDISC+Library+API+using+API+Key+Authentication) -- **Published CDISC Conformance Rules Github**[cdisc-open-rules](https://github.com/cdisc-org/cdisc-open-rules) +- **Questions & Discussions:** + [GitHub Discussions — Q&A](https://github.com/cdisc-org/cdisc-rules-engine/discussions/categories/q-a) +- **Bug Reports & Feature Requests:** + [GitHub Issues](https://github.com/cdisc-org/cdisc-rules-engine/issues) +- **Rule Contributions:** + [cdisc-open-rules](https://github.com/cdisc-org/cdisc-open-rules) +- **CDISC Library API:** + [wiki.cdisc.org — Getting Started](https://wiki.cdisc.org/display/LIBSUPRT/Getting+Started%3A+Access+to+CDISC+Library+API+using+API+Key+Authentication) +- **Published CDISC Conformance Rules GitHub:** + [cdisc-open-rules](https://github.com/cdisc-org/cdisc-open-rules) + \ No newline at end of file diff --git a/docs/quick-start.md b/docs/quick-start.md index 0b810ae20..c05838521 100644 --- a/docs/quick-start.md +++ b/docs/quick-start.md @@ -1,16 +1,20 @@ # Quick Start -> **Need help?** See [FAQ & Troubleshooting](faq.md) or post in [GitHub Discussions](https://github.com/cdisc-org/cdisc-rules-engine/discussions/categories/q-a). +> **Need help?** See [FAQ & Troubleshooting](faq.md) or post in +> [GitHub Discussions](https://github.com/cdisc-org/cdisc-rules-engine/discussions/categories/q-a). --- ## Option 1: Pre-Built Executable -**Best for:** Users who want to run CORE without installing Python or managing dependencies. +**Best for:** Users who want to run CORE without installing Python or managing +dependencies. ### 1. Download -Download the latest executable for your operating system from the [Releases page](https://github.com/cdisc-org/cdisc-rules-engine/releases) and unzip the downloaded file. +Download the latest executable for your operating system from the +[Releases page](https://github.com/cdisc-org/cdisc-rules-engine/releases) and unzip +the downloaded file. ### 2. Verify the Installation @@ -39,9 +43,14 @@ chmod +x ./core ### 3. (Optional) Update the Cache -Executable releases ship with a pre-populated cache, so you can skip this step and go straight to validation. If you want the latest published rules, see [CLI Reference → update-cache](cli-reference.md#updating-the-cache-update-cache) for API key setup and options. +Executable releases ship with a pre-populated cache, so you can skip this step and go +straight to validation. If you want the latest published rules, see +[CLI Reference → update-cache](cli-reference.md#updating-the-cache-update-cache) for +API key setup and options. -> **Note:** Rules published after a release may depend on engine features not present in that executable. When in doubt, wait for the next release rather than updating the cache manually. +> **Note:** Rules published after a release may depend on engine features not present +> in that executable. When in doubt, wait for the next release rather than updating the +> cache manually. ### 4. Run a Validation @@ -83,7 +92,7 @@ Test files are cleaned up automatically after completion. Check your version: ```bash - python --version +python --version ``` Install Python 3.12 from [python.org](https://www.python.org/downloads/) if needed. @@ -92,7 +101,7 @@ Install Python 3.12 from [python.org](https://www.python.org/downloads/) if need Check your version: ```bash - git --version +git --version ``` Install Git from [git-scm.com](https://git-scm.com/downloads) if needed. @@ -154,8 +163,12 @@ CORE supports the following input formats: | **XLSX** | Microsoft Excel | | **CSV** | CORE Comma Separated Value Test Data | -> **Note:** See [here](https://github.com/cdisc-org/cdisc-open-rules/blob/main/README.md#rule-authoring-and-test-data-creation-process) for a description of CSV formatting -> **Note:** Define-XML files must be provided via `--define-xml-path` (`-dxp`), not through the dataset directory. +> **Note:** See the +> [CSV formatting description](https://github.com/cdisc-org/cdisc-open-rules/blob/main/README.md#rule-authoring-and-test-data-creation-process) +> for details on CSV formatting. +> +> **Note:** Define-XML files must be provided via `--define-xml-path` (`-dxp`), not +> through the dataset directory. --- @@ -173,4 +186,5 @@ Command summary: | `list-rule-sets` | List standards and versions in the cache | | `list-ct` | List controlled terminology packages in the cache | -> Throughout these docs, examples use `python core.py`. If you're using the executable, replace this with `.\core.exe` (Windows) or `./core` (Linux/Mac). +> Throughout these docs, examples use `python core.py`. If you're using the executable, +> replace this with `.\core.exe` (Windows) or `./core` (Linux/Mac). diff --git a/requirements-dev.txt b/requirements-dev.txt index ac709f651..5033a2404 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,7 +1,7 @@ -r requirements.txt -black==24.10.0 -flake8==6.1.0 -pre-commit==2.20.0 +black==26.5.1 +flake8==7.3.0 +pre-commit==3.8.0 pytest==7.4.0 pytest-asyncio==0.21.0 pytest-cov==6.0.0 \ No newline at end of file diff --git a/scripts/merge_schema_markdown.md b/scripts/merge_schema_markdown.md index c4540d4f3..c748e3004 100644 --- a/scripts/merge_schema_markdown.md +++ b/scripts/merge_schema_markdown.md @@ -1,9 +1,12 @@ # Schema Markdown Merge Tool -This directory contains a script to merge JSON schema files with their corresponding markdown descriptions. This allows us to: +This directory contains a script to merge JSON schema files with their corresponding +markdown descriptions. This allows us to: -- maintain human-readable markdown documentation outside of the JSON schema files for use by the documentation generator -- generate vscode-readable JSON schema files with markdown descriptions to provide tooltips for rule authors +- maintain human-readable markdown documentation outside of the JSON schema files + for use by the documentation generator +- generate vscode-readable JSON schema files with markdown descriptions to provide + tooltips for rule authors ## Overview @@ -11,10 +14,14 @@ The `merge_schema_markdown.py` script: - Reads JSON schema files from `resources/schema/rule/` - Finds matching markdown files (e.g., `Operator.json` → `Operator.md`) -- Parses markdown sections at all header levels (`#`, `##`, `###`, etc.) into separate dictionaries -- For each header level, includes all nested subsections until reaching a header at the same or higher level -- Adds `markdownDescription` properties to schema items where `const` values match section names -- When looking up descriptions, searches through all header levels starting from the top level +- Parses markdown sections at all header levels (`#`, `##`, `###`, etc.) into + separate dictionaries +- For each header level, includes all nested subsections until reaching a header + at the same or higher level +- Adds `markdownDescription` properties to schema items where `const` values match + section names +- When looking up descriptions, searches through all header levels starting from + the top level - Outputs merged schemas to `resources/schema/rule-merged/` ## Usage @@ -27,11 +34,13 @@ Run the script locally: python scripts/merge_schema_markdown.py ``` -This will process all schema files and output merged versions to `resources/schema/rule-merged/`. +This will process all schema files and output merged versions to +`resources/schema/rule-merged/`. ### Automatic Execution -The GitHub Action workflow (`.github/workflows/merge-schema-markdown.yml`) automatically runs on pushes where files in `resources/schema/rule/` are changed +The GitHub Action workflow (`.github/workflows/merge-schema-markdown.yml`) automatically +runs on pushes where files in `resources/schema/rule/` are changed. The workflow will: @@ -40,13 +49,17 @@ The workflow will: ## How It Works -1. **Parse Markdown**: Extract sections from `.md` files at all header levels (`#`, `##`, `###`, etc.) +1. **Parse Markdown**: Extract sections from `.md` files at all header levels + (`#`, `##`, `###`, etc.) - Each header level is stored in a separate dictionary - Nested subsections are included in parent section content - - For example, a `## Section` will include all `###` and `####` headers and their content until the next `##` or `#` header + - For example, a `## Section` will include all `###` and `####` headers and their + content until the next `##` or `#` header 2. **Traverse JSON**: Recursively search for `const` properties in schema files -3. **Add Descriptions**: When a `const` value matches a section name at any level, add a `markdownDescription` property - - Searches through header levels starting from top level (e.g., `#` level first, then `##`, then `###`) +3. **Add Descriptions**: When a `const` value matches a section name at any level, + add a `markdownDescription` property + - Searches through header levels starting from top level + (e.g., `#` level first, then `##`, then `###`) 4. **Preserve Structure**: Maintain all original schema properties and formatting ## Example @@ -81,11 +94,9 @@ Columns are the columns within the original dataset ``` ```` -```` - The output `rule-merged/Rule_Type.json` will be: -```json +````json { "anyOf": [ { @@ -97,4 +108,5 @@ The output `rule-merged/Rule_Type.json` will be: } ```` -Note: The `markdownDescription` for "Record Data" includes all nested `####` headers and their content because they are subsections under the `## Record Data` header. +Note: The `markdownDescription` for "Record Data" includes all nested `####` headers +and their content because they are subsections under the `## Record Data` header. diff --git a/tests/QARegressionTests/test_Issues/test_CoreIssue208.py b/tests/QARegressionTests/test_Issues/test_CoreIssue208.py index acd8e243a..039a51810 100644 --- a/tests/QARegressionTests/test_Issues/test_CoreIssue208.py +++ b/tests/QARegressionTests/test_Issues/test_CoreIssue208.py @@ -5,7 +5,6 @@ import pytest from conftest import get_python_executable - """This regression test is for automating the validation of acceptancce criteria which is "For any variables that come from datasets and appear in the results, the variables should have the same case as the variable names in the dataset". diff --git a/tests/unit/test_adam_variable_reader.py b/tests/unit/test_adam_variable_reader.py index 2888ba6b4..09a93eaca 100644 --- a/tests/unit/test_adam_variable_reader.py +++ b/tests/unit/test_adam_variable_reader.py @@ -32,7 +32,7 @@ def test_check_xx_zz_rule(column_name): @pytest.mark.parametrize("column_name", ["ANL23FL"]) -def test_check_xx_zz_rule(column_name): +def test_check_xx_zz_rule_2(column_name): ad = AdamVariableReader() ad.check_xx_zz(column_name) assert ad.selection_algorithm["ANL23FL"] == 23 diff --git a/tests/unit/test_csv_reader.py b/tests/unit/test_csv_reader.py index 5cb7f6cdc..8fe68111a 100644 --- a/tests/unit/test_csv_reader.py +++ b/tests/unit/test_csv_reader.py @@ -176,30 +176,24 @@ def test_duplicate_paths_removed(self, tmp_path): assert result.count(str(dm)) == 1 -VARIABLES_CSV = textwrap.dedent( - """\ +VARIABLES_CSV = textwrap.dedent("""\ dataset,variable,label,type,length patients.csv,id,Patient ID,integer,10 patients.csv,name,Patient Name,string,50 patients.csv,age,Patient Age,integer,3 -""" -) +""") -DATA_CSV = textwrap.dedent( - """\ +DATA_CSV = textwrap.dedent("""\ id,name,age 1,Alice,30 2,Bob,25 3,Carol,40 -""" -) +""") -DATASETS_CSV = textwrap.dedent( - """\ +DATASETS_CSV = textwrap.dedent("""\ Filename,Label patients.csv,Patient Dataset -""" -) +""") def _write(path: Path, content: str) -> None: @@ -303,12 +297,10 @@ def test_variable_name_to_size_map_with_values(self): assert sizes == {"id": 10, "name": 50, "age": 3} def test_variable_name_to_size_map_with_nan_length(self): - variables_with_nan = textwrap.dedent( - """\ + variables_with_nan = textwrap.dedent("""\ dataset,variable,label,type,length patients.csv,id,Patient ID,integer, - """ - ) + """) _write(self._variables_path(), variables_with_nan) reader = DatasetCSVMetadataReader(str(self.data_path), "patients.csv") sizes = reader.read()["variable_name_to_size_map"] diff --git a/tests/unit/test_dataset_builders/test_dataset_metadata_define_dataset_builder.py b/tests/unit/test_dataset_builders/test_dataset_metadata_define_dataset_builder.py index aff6c25e8..72bf4a6fa 100644 --- a/tests/unit/test_dataset_builders/test_dataset_metadata_define_dataset_builder.py +++ b/tests/unit/test_dataset_builders/test_dataset_metadata_define_dataset_builder.py @@ -11,7 +11,6 @@ ) from cdisc_rules_engine.models.dataset.pandas_dataset import PandasDataset - define_metadata = [ { "define_dataset_name": "TS", diff --git a/tests/unit/test_dataset_builders/test_domain_presence_define_builder.py b/tests/unit/test_dataset_builders/test_domain_presence_define_builder.py index 28278d9b8..891d4a135 100644 --- a/tests/unit/test_dataset_builders/test_domain_presence_define_builder.py +++ b/tests/unit/test_dataset_builders/test_domain_presence_define_builder.py @@ -10,7 +10,6 @@ LibraryMetadataContainer, ) - define_metadata = [ { "define_dataset_name": "AE", diff --git a/tests/unit/test_dictionaries/test_meddra/test_meddra_term.py b/tests/unit/test_dictionaries/test_meddra/test_meddra_term.py index 99180b2e8..fc4514f3d 100644 --- a/tests/unit/test_dictionaries/test_meddra/test_meddra_term.py +++ b/tests/unit/test_dictionaries/test_meddra/test_meddra_term.py @@ -32,7 +32,7 @@ def test_get_term_hierarchies(): terms = factory.install_terms(dictionary_path) expected_term_hierarchies = set( [ - f"TESTSOC{i+1}/TESTHLGT{i+1}/TESTHLT{i+1}/TESTPT{i+1}/TESTLLT{i+1}" + f"TESTSOC{i + 1}/TESTHLGT{i + 1}/TESTHLT{i + 1}/TESTPT{i + 1}/TESTLLT{i + 1}" for i in range(len(terms[TermTypes.LLT.value])) ] ) diff --git a/tests/unit/test_dictionaries/test_meddra/test_meddra_terms_factory.py b/tests/unit/test_dictionaries/test_meddra/test_meddra_terms_factory.py index 0907c2fb7..f9ec89bfe 100644 --- a/tests/unit/test_dictionaries/test_meddra/test_meddra_terms_factory.py +++ b/tests/unit/test_dictionaries/test_meddra/test_meddra_terms_factory.py @@ -22,50 +22,53 @@ def test_install(): # Validate soc for i, term in enumerate(dictionary[TermTypes.SOC.value].values()): - assert term.term == f"TESTSOC{i+1}" - assert term.code == f"SOC{i+1}" - assert term.abbreviation == f"TS{i+1}" - assert term.code_hierarchy == f"SOC{i+1}" - assert term.term_hierarchy == f"TESTSOC{i+1}" + assert term.term == f"TESTSOC{i + 1}" + assert term.code == f"SOC{i + 1}" + assert term.abbreviation == f"TS{i + 1}" + assert term.code_hierarchy == f"SOC{i + 1}" + assert term.term_hierarchy == f"TESTSOC{i + 1}" # Validate HLGT for i, term in enumerate(dictionary[TermTypes.HLGT.value].values()): - assert term.term == f"TESTHLGT{i+1}" - assert term.code == f"HLGT{i+1}" - assert term.code_hierarchy == f"SOC{i+1}/HLGT{i+1}" - assert term.term_hierarchy == f"TESTSOC{i+1}/TESTHLGT{i+1}" - assert term.parent_code == f"SOC{i+1}" - assert term.parent_term == f"TESTSOC{i+1}" + assert term.term == f"TESTHLGT{i + 1}" + assert term.code == f"HLGT{i + 1}" + assert term.code_hierarchy == f"SOC{i + 1}/HLGT{i + 1}" + assert term.term_hierarchy == f"TESTSOC{i + 1}/TESTHLGT{i + 1}" + assert term.parent_code == f"SOC{i + 1}" + assert term.parent_term == f"TESTSOC{i + 1}" # Validate HLT for i, term in enumerate(dictionary[TermTypes.HLT.value].values()): - assert term.term == f"TESTHLT{i+1}" - assert term.code == f"HLT{i+1}" - assert term.code_hierarchy == f"SOC{i+1}/HLGT{i+1}/HLT{i+1}" - assert term.term_hierarchy == f"TESTSOC{i+1}/TESTHLGT{i+1}/TESTHLT{i+1}" - assert term.parent_code == f"HLGT{i+1}" - assert term.parent_term == f"TESTHLGT{i+1}" + assert term.term == f"TESTHLT{i + 1}" + assert term.code == f"HLT{i + 1}" + assert term.code_hierarchy == f"SOC{i + 1}/HLGT{i + 1}/HLT{i + 1}" + assert term.term_hierarchy == f"TESTSOC{i + 1}/TESTHLGT{i + 1}/TESTHLT{i + 1}" + assert term.parent_code == f"HLGT{i + 1}" + assert term.parent_term == f"TESTHLGT{i + 1}" # Validate PT for i, term in enumerate(dictionary[TermTypes.PT.value].values()): - assert term.term == f"TESTPT{i+1}" - assert term.code == f"PT{i+1}" - assert term.code_hierarchy == f"SOC{i+1}/HLGT{i+1}/HLT{i+1}/PT{i+1}" + assert term.term == f"TESTPT{i + 1}" + assert term.code == f"PT{i + 1}" + assert term.code_hierarchy == f"SOC{i + 1}/HLGT{i + 1}/HLT{i + 1}/PT{i + 1}" assert ( term.term_hierarchy - == f"TESTSOC{i+1}/TESTHLGT{i+1}/TESTHLT{i+1}/TESTPT{i+1}" + == f"TESTSOC{i + 1}/TESTHLGT{i + 1}/TESTHLT{i + 1}/TESTPT{i + 1}" ) - assert term.parent_code == f"HLT{i+1}" - assert term.parent_term == f"TESTHLT{i+1}" + assert term.parent_code == f"HLT{i + 1}" + assert term.parent_term == f"TESTHLT{i + 1}" # Validate LLT for i, term in enumerate(dictionary[TermTypes.LLT.value].values()): - assert term.term == f"TESTLLT{i+1}" - assert term.code == f"LLT{i+1}" - assert term.code_hierarchy == f"SOC{i+1}/HLGT{i+1}/HLT{i+1}/PT{i+1}/LLT{i+1}" + assert term.term == f"TESTLLT{i + 1}" + assert term.code == f"LLT{i + 1}" + assert ( + term.code_hierarchy + == f"SOC{i + 1}/HLGT{i + 1}/HLT{i + 1}/PT{i + 1}/LLT{i + 1}" + ) assert ( term.term_hierarchy - == f"TESTSOC{i+1}/TESTHLGT{i+1}/TESTHLT{i+1}/TESTPT{i+1}/TESTLLT{i+1}" + == f"TESTSOC{i + 1}/TESTHLGT{i + 1}/TESTHLT{i + 1}/TESTPT{i + 1}/TESTLLT{i + 1}" ) - assert term.parent_code == f"PT{i+1}" - assert term.parent_term == f"TESTPT{i+1}" + assert term.parent_code == f"PT{i + 1}" + assert term.parent_term == f"TESTPT{i + 1}" diff --git a/tests/unit/test_operations/test_label_referenced_variable_metadata.py b/tests/unit/test_operations/test_label_referenced_variable_metadata.py index c96570043..217727a82 100644 --- a/tests/unit/test_operations/test_label_referenced_variable_metadata.py +++ b/tests/unit/test_operations/test_label_referenced_variable_metadata.py @@ -17,7 +17,7 @@ from unittest.mock import Mock -@pytest.mark.parametrize("dataset_type", [(PandasDataset)]) +@pytest.mark.parametrize("dataset_type", [PandasDataset]) def test_get_label_referenced_variable_metadata( operation_params: OperationParams, dataset_type ): diff --git a/tests/unit/test_operations/test_name_referenced_variable_metadata.py b/tests/unit/test_operations/test_name_referenced_variable_metadata.py index 7c45ee0ee..4d6b247d6 100644 --- a/tests/unit/test_operations/test_name_referenced_variable_metadata.py +++ b/tests/unit/test_operations/test_name_referenced_variable_metadata.py @@ -16,7 +16,7 @@ from unittest.mock import Mock -@pytest.mark.parametrize("dataset_type", [(PandasDataset)]) +@pytest.mark.parametrize("dataset_type", [PandasDataset]) def test_get_name_referenced_variable_metadata( operation_params: OperationParams, dataset_type ):