diff --git a/.flake8 b/.flake8 index 069c56121..894dcd5de 100644 --- a/.flake8 +++ b/.flake8 @@ -6,5 +6,6 @@ exclude = .github, .pytest_cache, cdisc_rules_engine/resources, venv, + .venv, build, dist diff --git a/.github/ISSUE_TEMPLATE/1-bug.yml b/.github/ISSUE_TEMPLATE/1-bug.yml index 82ba66361..98979f8f5 100644 --- a/.github/ISSUE_TEMPLATE/1-bug.yml +++ b/.github/ISSUE_TEMPLATE/1-bug.yml @@ -39,7 +39,9 @@ body: value: https://jira.cdisc.org/projects/CORERULES/issues/CORERULES- - type: markdown attributes: - value: "\nIn the next fields, please provide a [Minimal Reproducible Example](https://stackoverflow.com/help/minimal-reproducible-example)." + value: | + In the next fields, please provide a + [Minimal Reproducible Example](https://stackoverflow.com/help/minimal-reproducible-example). - type: input id: cli_command validations: diff --git a/.github/workflows/build-binary.yml b/.github/workflows/build-binary.yml index 8a257da1d..773d0d649 100644 --- a/.github/workflows/build-binary.yml +++ b/.github/workflows/build-binary.yml @@ -27,13 +27,46 @@ jobs: - name: Build Binary (Linux) if: runner.os == 'Linux' - run: pyinstaller --onedir --contents-directory "." core.py --icon=resources/assets/CORE_logo_sm.ico --dist ./dist/output/${{ inputs.name }} --collect-submodules pyreadstat --add-data=$pythonLocation/lib/python3.12/site-packages/xmlschema/schemas:xmlschema/schemas --add-data=resources/cache:resources/cache --add-data=resources/templates:resources/templates --add-data=resources/schema:resources/schema --add-data=resources/datasets:resources/datasets --add-data=resources/jsonata:resources/jsonata + run: >- + pyinstaller --onedir --contents-directory "." core.py + --icon=resources/assets/CORE_logo_sm.ico + --dist ./dist/output/${{ inputs.name }} + --collect-submodules pyreadstat + --add-data=$pythonLocation/lib/python3.12/site-packages/xmlschema/schemas:xmlschema/schemas + --add-data=resources/cache:resources/cache + --add-data=resources/templates:resources/templates + --add-data=resources/schema:resources/schema + --add-data=resources/datasets:resources/datasets + --add-data=resources/jsonata:resources/jsonata - name: Build Binary (Mac) if: runner.os == 'macOS' - run: pyinstaller --onedir --contents-directory "." core.py --icon=resources/assets/CORE_logo_sm.icns --dist ./dist/output/${{ inputs.name }} --collect-submodules pyreadstat --add-data=$pythonLocation/lib/python3.12/site-packages/xmlschema/schemas:xmlschema/schemas --add-data=resources/cache:resources/cache --add-data=resources/templates:resources/templates --add-data=resources/schema:resources/schema --add-data=resources/datasets:resources/datasets --add-data=resources/jsonata:resources/jsonata + run: >- + pyinstaller --onedir --contents-directory "." core.py + --icon=resources/assets/CORE_logo_sm.icns + --dist ./dist/output/${{ inputs.name }} + --collect-submodules pyreadstat + --add-data=$pythonLocation/lib/python3.12/site-packages/xmlschema/schemas:xmlschema/schemas + --add-data=resources/cache:resources/cache + --add-data=resources/templates:resources/templates + --add-data=resources/schema:resources/schema + --add-data=resources/datasets:resources/datasets + --add-data=resources/jsonata:resources/jsonata - name: Build Binary (Windows) if: runner.os == 'Windows' - run: pyinstaller --onedir --contents-directory "." core.py --icon=resources/assets/CORE_logo_sm.ico --dist ./dist/output/${{ inputs.name }} --collect-submodules pyreadstat --add-data="$env:pythonLocation\Lib\site-packages\xmlschema\schemas;xmlschema/schemas" --hidden-import numpy --hidden-import numpy.core._methods --hidden-import numpy.lib.format --add-data="resources/cache;resources/cache" --add-data="resources/templates;resources/templates" --add-data="resources/schema;resources/schema" --add-data="resources/datasets;resources/datasets" --add-data="resources/jsonata;resources/jsonata" + run: >- + pyinstaller --onedir --contents-directory "." core.py + --icon=resources/assets/CORE_logo_sm.ico + --dist ./dist/output/${{ inputs.name }} + --collect-submodules pyreadstat + --add-data="$env:pythonLocation\Lib\site-packages\xmlschema\schemas;xmlschema/schemas" + --hidden-import numpy + --hidden-import numpy.core._methods + --hidden-import numpy.lib.format + --add-data="resources/cache;resources/cache" + --add-data="resources/templates;resources/templates" + --add-data="resources/schema;resources/schema" + --add-data="resources/datasets;resources/datasets" + --add-data="resources/jsonata;resources/jsonata" - name: Archive Binary uses: actions/upload-artifact@v6 with: diff --git a/.github/workflows/check-schema-markdown.yml b/.github/workflows/check-schema-markdown.yml index 04e684864..d8b0098e7 100644 --- a/.github/workflows/check-schema-markdown.yml +++ b/.github/workflows/check-schema-markdown.yml @@ -29,9 +29,12 @@ jobs: run: | npm i prettier npx prettier resources/schema/rule-merged/*.json --write - - uses: CatChen/check-git-status-action@7b45cb4ce3e00a8bce4910dc2d5f2785235a6d7e # v2.1.2 + - uses: CatChen/check-git-status-action@7b45cb4ce3e00a8bce4910dc2d5f2785235a6d7e # v2.1.2 with: fail-if-not-clean: true request-changes-if-not-clean: ${{ github.event_name == 'pull_request' }} - request-changes-comment: Updated schema has not been merged with markdown descriptions. Please run the "Merge Schema with Markdown Descriptions" workflow to update the merged schema files. + request-changes-comment: >- + Updated schema has not been merged with markdown descriptions. + Please run the "Merge Schema with Markdown Descriptions" workflow + to update the merged schema files. targets: resources/schema/rule-merged diff --git a/.github/workflows/deploy-rule-tester.yml b/.github/workflows/deploy-rule-tester.yml index 908c45f3c..7267cfc9a 100644 --- a/.github/workflows/deploy-rule-tester.yml +++ b/.github/workflows/deploy-rule-tester.yml @@ -16,7 +16,11 @@ permissions: contents: read env: - creds: '{"clientId":"${{ vars.AZURE_CLIENT_ID }}","clientSecret":"${{ secrets.AZURE_CLIENT_SECRET }}","subscriptionId":"${{ vars.AZURE_SUBSCRIPTION_ID }}","tenantId":"${{ vars.AZURE_TENANT_ID }}"}' + creds: >- + {"clientId":"${{ vars.AZURE_CLIENT_ID }}", + "clientSecret":"${{ secrets.AZURE_CLIENT_SECRET }}", + "subscriptionId":"${{ vars.AZURE_SUBSCRIPTION_ID }}", + "tenantId":"${{ vars.AZURE_TENANT_ID }}"} functionAppName: cdisc-library-conformance-rules-generator-dev PYTHON_VERSION: "3.12" @@ -34,7 +38,8 @@ jobs: with: app-name: ${{ env.functionAppName }} mask-inputs: false - app-settings-json: '{"WEBSITE_ENABLE_SYNC_UPDATE_SITE": "1"}' # wait for this step to complete before the webapps-deploy step + # wait for this step to complete before the webapps-deploy step + app-settings-json: '{"WEBSITE_ENABLE_SYNC_UPDATE_SITE": "1"}' general-settings-json: '{"linuxFxVersion": "PYTHON|${{ env.PYTHON_VERSION }}"}' slot-name: ${{ vars.AZURE_WEBAPP_SLOT }} diff --git a/.github/workflows/lint-format.yml b/.github/workflows/lint-format.yml index 4c7f70fe0..f9494f89e 100644 --- a/.github/workflows/lint-format.yml +++ b/.github/workflows/lint-format.yml @@ -7,13 +7,14 @@ on: - main permissions: contents: read - jobs: get_changed_files: runs-on: ubuntu-latest outputs: py: ${{ steps.changes.outputs.py_all_changed_files }} - pretty: ${{ steps.changes.outputs.pretty_all_changed_files }} + yaml: ${{ steps.changes.outputs.yaml_all_changed_files }} + md: ${{ steps.changes.outputs.md_all_changed_files }} + json: ${{ steps.changes.outputs.json_all_changed_files }} steps: - name: Checkout repository uses: actions/checkout@v6 @@ -21,18 +22,19 @@ jobs: fetch-depth: 0 - name: Get changed files id: changes - uses: tj-actions/changed-files@7dee1b0c1557f278e5c7dc244927139d78c0e22a # v47.0.4 + uses: tj-actions/changed-files@7dee1b0c1557f278e5c7dc244927139d78c0e22a # v47.0.4 with: files_yaml: | py: - '**.py' - pretty: - - '**.json' - - '**.md' + yaml: - '**.yaml' - '**.yml' + md: + - '**.md' + json: + - '**.json' separator: " " - check_python_files: runs-on: ubuntu-latest needs: get_changed_files @@ -57,19 +59,50 @@ jobs: - name: Run black run: | black --check ${{needs.get_changed_files.outputs.py}} - - check_prettier_files: + check_json_files: runs-on: ubuntu-latest needs: get_changed_files - # only run if there are changed files - if: ${{needs.get_changed_files.outputs.pretty}} + if: ${{needs.get_changed_files.outputs.json}} + steps: + - name: Checkout repository + uses: actions/checkout@v6 + with: + fetch-depth: 0 + - name: Run json lint + run: | + for f in ${{needs.get_changed_files.outputs.json}}; do + python -m json.tool $f > /dev/null && echo "$f OK" || exit 1 + done + check_yaml_files: + runs-on: ubuntu-latest + needs: get_changed_files + if: ${{needs.get_changed_files.outputs.yaml}} + steps: + - name: Checkout repository + uses: actions/checkout@v6 + with: + fetch-depth: 0 + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: "3.12" + - name: Install yamllint + run: | + pip install yamllint -c requirements-dev.txt + - name: Run yamllint + run: | + yamllint ${{needs.get_changed_files.outputs.yaml}} + check_markdown_files: + runs-on: ubuntu-latest + needs: get_changed_files + if: ${{needs.get_changed_files.outputs.md}} steps: - name: Checkout repository uses: actions/checkout@v6 with: fetch-depth: 0 - uses: actions/setup-node@v6 - - name: Run prettier + - name: Run markdownlint run: | - npm i prettier - npx prettier --check ${{needs.get_changed_files.outputs.pretty}} + npm i markdownlint-cli2 + npx markdownlint-cli2 ${{needs.get_changed_files.outputs.md}} diff --git a/.github/workflows/prerelease-update-cache.yml b/.github/workflows/prerelease-update-cache.yml index ba225f5d1..3812afc94 100644 --- a/.github/workflows/prerelease-update-cache.yml +++ b/.github/workflows/prerelease-update-cache.yml @@ -13,7 +13,8 @@ jobs: with: # https://docs.github.com/en/authentication/connecting-to-github-with-ssh/generating-a-new-ssh-key-and-adding-it-to-the-ssh-agent#generating-a-new-ssh-key # git bash: ssh-keygen -t ed25519 -C "github-actions@cdisc.org" - # Add public key (.pub one) as a deploy key at Your repo -> Settings -> Security -> Deploy keys, check "Allow write access". + # Add public key (.pub one) as a deploy key at + # Your repo -> Settings -> Security -> Deploy keys, check "Allow write access". # Add private key as a secret at Your repo -> Settings -> Security -> Secrets and variables -> Actions ssh-key: ${{ secrets.GH_ACTION_PRIVATE_KEY }} diff --git a/.github/workflows/prerelease-update-version.yml b/.github/workflows/prerelease-update-version.yml index f0a9a07fb..6cefc9d30 100644 --- a/.github/workflows/prerelease-update-version.yml +++ b/.github/workflows/prerelease-update-version.yml @@ -21,9 +21,9 @@ jobs: uses: actions/checkout@v6 with: # https://docs.github.com/en/authentication/connecting-to-github-with-ssh/generating-a-new-ssh-key-and-adding-it-to-the-ssh-agent#generating-a-new-ssh-key - # git bash: ssh-keygen -t ed25519 -C "github-actions@cdisc.org" - # Add public key (.pub one) as a deploy key at Your repo -> Settings -> Security -> Deploy keys, check "Allow write access". - # Add private key as a secret at Your repo -> Settings -> Security -> Secrets and variables -> Actions + # Generate SSH key: ssh-keygen -t ed25519 -C "github-actions@cdisc.org" + # Add public key as deploy key (Settings -> Security -> Deploy keys, allow write access) + # Add private key as secret (Settings -> Security -> Secrets and variables -> Actions) ssh-key: ${{ secrets.GH_ACTION_PRIVATE_KEY }} - name: Set up Python diff --git a/.github/workflows/test-suite.yml b/.github/workflows/test-suite.yml index 0e33d67bd..fb3b479e2 100644 --- a/.github/workflows/test-suite.yml +++ b/.github/workflows/test-suite.yml @@ -51,13 +51,17 @@ jobs: CDISC_LIBRARY_API_KEY: ${{ secrets.CDISC_LIBRARY_API_KEY }} continue-on-error: true run: | - python core.py validate -s sdtmig -v 3-3 ${{ env.RULE_LIST }} -d CORE_Test_Suite/data -dxp CORE_Test_Suite/data/Define.xml -of json -o CORE_Test_Suite/pandas-results -l info || true + python core.py validate -s sdtmig -v 3-3 ${{ env.RULE_LIST }} \ + -d CORE_Test_Suite/data \ + -dxp CORE_Test_Suite/data/Define.xml \ + -of json -o CORE_Test_Suite/pandas-results -l info || true if [ -f "CORE_Test_Suite/pandas-results.json" ]; then echo "pandas_success=true" >> $GITHUB_OUTPUT echo "## Pandas Validation" >> $GITHUB_STEP_SUMMARY echo "✅ **Success**: Validation completed successfully" >> $GITHUB_STEP_SUMMARY - python CORE_Test_Suite/scripts/validation_summary.py CORE_Test_Suite/pandas-results.json >> $GITHUB_STEP_SUMMARY + python CORE_Test_Suite/scripts/validation_summary.py \ + CORE_Test_Suite/pandas-results.json >> $GITHUB_STEP_SUMMARY else echo "Failed to generate pandas-results.json" echo "pandas_success=false" >> $GITHUB_OUTPUT @@ -69,7 +73,12 @@ jobs: if: steps.pandas_run.outputs.pandas_success == 'true' continue-on-error: true run: | - python CORE_Test_Suite/scripts/comparison.py CORE_Test_Suite/pandas-results.json CORE_Test_Suite/CORE-Report.json CORE_Test_Suite/pandas_comparison.xlsx --mode test --json-output CORE_Test_Suite/pandas_comparison.json + python CORE_Test_Suite/scripts/comparison.py \ + CORE_Test_Suite/pandas-results.json \ + CORE_Test_Suite/CORE-Report.json \ + CORE_Test_Suite/pandas_comparison.xlsx \ + --mode test \ + --json-output CORE_Test_Suite/pandas_comparison.json echo "pandas_diff=$?" >> $GITHUB_ENV PANDAS_EXIT_CODE=$? @@ -84,7 +93,13 @@ jobs: if: steps.pandas_run.outputs.pandas_success == 'true' continue-on-error: true run: | - python CORE_Test_Suite/scripts/compare_implementations.py CORE_Test_Suite/pandas-results.json CORE_Test_Suite/CORE-Report.json CORE_Test_Suite/pandas_comparison.json --github-step-summary $GITHUB_STEP_SUMMARY --mode test + python CORE_Test_Suite/scripts/compare_implementations.py \ + CORE_Test_Suite/pandas-results.json \ + CORE_Test_Suite/CORE-Report.json \ + CORE_Test_Suite/pandas_comparison.json \ + --github-step-summary $GITHUB_STEP_SUMMARY \ + --mode test + - name: Run validation with Dask id: dask_run continue-on-error: true @@ -92,36 +107,52 @@ jobs: DATASET_SIZE_THRESHOLD: 0 CDISC_LIBRARY_API_KEY: ${{ secrets.CDISC_LIBRARY_API_KEY }} run: | - python core.py validate -s sdtmig -v 3-3 ${{ env.RULE_LIST }} -d CORE_Test_Suite/data -dxp CORE_Test_Suite/data/Define.xml -of json -o CORE_Test_Suite/dask-results -l info || true + python core.py validate -s sdtmig -v 3-3 ${{ env.RULE_LIST }} \ + -d CORE_Test_Suite/data \ + -dxp CORE_Test_Suite/data/Define.xml \ + -of json -o CORE_Test_Suite/dask-results -l info || true if [ -f "CORE_Test_Suite/dask-results.json" ]; then echo "dask_success=true" >> $GITHUB_OUTPUT echo "## Dask Validation" >> $GITHUB_STEP_SUMMARY echo "✅ **Success**: Validation completed successfully" >> $GITHUB_STEP_SUMMARY - python CORE_Test_Suite/scripts/validation_summary.py dask-results.json >> $GITHUB_STEP_SUMMARY + python CORE_Test_Suite/scripts/validation_summary.py \ + dask-results.json >> $GITHUB_STEP_SUMMARY else echo "Failed to generate dask-results.json" echo "dask_success=false" >> $GITHUB_OUTPUT echo "## Dask Validation" >> $GITHUB_STEP_SUMMARY - echo "❌ **Failed**: No results file was generated" >> $GITHUB_STEP_SUMMARY + echo "❌ **Failed**: No results file was generated" >> $GITHUB_STEP_SUMMARY fi + - name: Dask comparison script continue-on-error: true if: steps.dask_run.outputs.dask_success == 'true' run: | - python CORE_Test_Suite/scripts/comparison.py CORE_Test_Suite/dask-results.json CORE_Test_Suite/CORE-Report.json CORE_Test_Suite/dask_comparison.xlsx --mode test --json-output CORE_Test_Suite/dask_comparison.json + python CORE_Test_Suite/scripts/comparison.py \ + CORE_Test_Suite/dask-results.json \ + CORE_Test_Suite/CORE-Report.json \ + CORE_Test_Suite/dask_comparison.xlsx \ + --mode test \ + --json-output CORE_Test_Suite/dask_comparison.json DASK_EXIT_CODE=$? echo "dask_diff=$DASK_EXIT_CODE" >> $GITHUB_ENV if [ $DASK_EXIT_CODE -eq 0 ]; then echo "Dask comparison completed successfully (no differences)" else echo "Dask comparison found differences" + fi - name: Generate dask comparison summary if: steps.dask_run.outputs.dask_success == 'true' continue-on-error: true run: | - python CORE_Test_Suite/scripts/compare_implementations.py CORE_Test_Suite/dask-results.json CORE_Test_Suite/CORE-Report.json CORE_Test_Suite/dask_comparison.json --github-step-summary $GITHUB_STEP_SUMMARY --mode test + python CORE_Test_Suite/scripts/compare_implementations.py \ + CORE_Test_Suite/dask-results.json \ + CORE_Test_Suite/CORE-Report.json \ + CORE_Test_Suite/dask_comparison.json \ + --github-step-summary $GITHUB_STEP_SUMMARY \ + --mode test ################################# # USDM TEST SUITE VALIDATION @@ -129,7 +160,8 @@ jobs: - name: Parse USDM rule list run: | - usdm_rules=$(cat CORE_Test_Suite/rulelist/USDM_Test_Suite_Rules.txt | sed 's/\r$//' | sed 's/^/-r /' | tr '\n' ' ') + usdm_rules=$(cat CORE_Test_Suite/rulelist/USDM_Test_Suite_Rules.txt \ + | sed 's/\r$//' | sed 's/^/-r /' | tr '\n' ' ') echo "USDM_RULE_LIST=$usdm_rules" >> $GITHUB_ENV echo "USDM rules: $usdm_rules" @@ -137,13 +169,16 @@ jobs: id: usdm_neg continue-on-error: true run: | - python core.py validate -s usdm -v 4-0 ${{ env.USDM_RULE_LIST }} -dp CORE_Test_Suite/usdm_data/USDM_Test_Suite_negative.json -of json -o CORE_Test_Suite/usdm_negative_report -l error || true + python core.py validate -s usdm -v 4-0 ${{ env.USDM_RULE_LIST }} \ + -dp CORE_Test_Suite/usdm_data/USDM_Test_Suite_negative.json \ + -of json -o CORE_Test_Suite/usdm_negative_report -l error || true if [ -f "CORE_Test_Suite/usdm_negative_report.json" ]; then echo "usdm_neg_success=true" >> $GITHUB_OUTPUT echo "## USDM Negative" >> $GITHUB_STEP_SUMMARY echo "**Success**: Negative test passed" >> $GITHUB_STEP_SUMMARY - python CORE_Test_Suite/scripts/validation_summary.py CORE_Test_Suite/usdm_negative_report.json >> $GITHUB_STEP_SUMMARY + python CORE_Test_Suite/scripts/validation_summary.py \ + CORE_Test_Suite/usdm_negative_report.json >> $GITHUB_STEP_SUMMARY else echo "usdm_neg_success=false" >> $GITHUB_OUTPUT echo "**Failed**: No results for negative test" >> $GITHUB_STEP_SUMMARY @@ -153,7 +188,12 @@ jobs: if: steps.usdm_neg.outputs.usdm_neg_success == 'true' continue-on-error: true run: | - python CORE_Test_Suite/scripts/comparison.py CORE_Test_Suite/usdm_negative_report.json CORE_Test_Suite/USDM_Negative_Result.json CORE_Test_Suite/usdm_negative_comparison.xlsx --mode test --json-output CORE_Test_Suite/usdm_negative_comparison.json + python CORE_Test_Suite/scripts/comparison.py \ + CORE_Test_Suite/usdm_negative_report.json \ + CORE_Test_Suite/USDM_Negative_Result.json \ + CORE_Test_Suite/usdm_negative_comparison.xlsx \ + --mode test \ + --json-output CORE_Test_Suite/usdm_negative_comparison.json USDM_NEG_EXIT_CODE=$? echo "usdm_neg_diff=$USDM_NEG_EXIT_CODE" >> $GITHUB_ENV if [ $USDM_NEG_EXIT_CODE -eq 0 ]; then @@ -166,19 +206,27 @@ jobs: if: steps.usdm_neg.outputs.usdm_neg_success == 'true' continue-on-error: true run: | - python CORE_Test_Suite/scripts/compare_implementations.py CORE_Test_Suite/usdm_negative_report.json CORE_Test_Suite/USDM_Negative_Result.json CORE_Test_Suite/usdm_negative_comparison.json --github-step-summary $GITHUB_STEP_SUMMARY --mode test + python CORE_Test_Suite/scripts/compare_implementations.py \ + CORE_Test_Suite/usdm_negative_report.json \ + CORE_Test_Suite/USDM_Negative_Result.json \ + CORE_Test_Suite/usdm_negative_comparison.json \ + --github-step-summary $GITHUB_STEP_SUMMARY \ + --mode test - name: Run USDM validation (Positive) id: usdm_pos continue-on-error: true run: | - python core.py validate -s usdm -v 4-0 ${{ env.USDM_RULE_LIST }} -dp CORE_Test_Suite/usdm_data/USDM_Test_Suite_positive.json -of json -o CORE_Test_Suite/usdm_positive_report -l error || true + python core.py validate -s usdm -v 4-0 ${{ env.USDM_RULE_LIST }} \ + -dp CORE_Test_Suite/usdm_data/USDM_Test_Suite_positive.json \ + -of json -o CORE_Test_Suite/usdm_positive_report -l error || true if [ -f "CORE_Test_Suite/usdm_positive_report.json" ]; then echo "usdm_pos_success=true" >> $GITHUB_OUTPUT echo "## USDM Positive" >> $GITHUB_STEP_SUMMARY echo "**Success**: Positive test passed" >> $GITHUB_STEP_SUMMARY - python CORE_Test_Suite/scripts/validation_summary.py CORE_Test_Suite/usdm_positive_report.json >> $GITHUB_STEP_SUMMARY + python CORE_Test_Suite/scripts/validation_summary.py \ + CORE_Test_Suite/usdm_positive_report.json >> $GITHUB_STEP_SUMMARY else echo "usdm_pos_success=false" >> $GITHUB_OUTPUT echo "**Failed**: No results for positive test" >> $GITHUB_STEP_SUMMARY @@ -188,7 +236,12 @@ jobs: if: steps.usdm_pos.outputs.usdm_pos_success == 'true' continue-on-error: true run: | - python CORE_Test_Suite/scripts/comparison.py CORE_Test_Suite/usdm_positive_report.json CORE_Test_Suite/USDM_Positive_Result.json CORE_Test_Suite/usdm_positive_comparison.xlsx --mode test --json-output CORE_Test_Suite/usdm_positive_comparison.json + python CORE_Test_Suite/scripts/comparison.py \ + CORE_Test_Suite/usdm_positive_report.json \ + CORE_Test_Suite/USDM_Positive_Result.json \ + CORE_Test_Suite/usdm_positive_comparison.xlsx \ + --mode test \ + --json-output CORE_Test_Suite/usdm_positive_comparison.json USDM_POS_EXIT_CODE=$? echo "usdm_pos_diff=$USDM_POS_EXIT_CODE" >> $GITHUB_ENV if [ $USDM_POS_EXIT_CODE -eq 0 ]; then @@ -201,7 +254,12 @@ jobs: if: steps.usdm_pos.outputs.usdm_pos_success == 'true' continue-on-error: true run: | - python CORE_Test_Suite/scripts/compare_implementations.py CORE_Test_Suite/usdm_positive_report.json CORE_Test_Suite/USDM_Positive_Result.json CORE_Test_Suite/usdm_positive_comparison.json --github-step-summary $GITHUB_STEP_SUMMARY --mode test + python CORE_Test_Suite/scripts/compare_implementations.py \ + CORE_Test_Suite/usdm_positive_report.json \ + CORE_Test_Suite/USDM_Positive_Result.json \ + CORE_Test_Suite/usdm_positive_comparison.json \ + --github-step-summary $GITHUB_STEP_SUMMARY \ + --mode test ####################### # UPLOAD ALL RESULTS @@ -238,11 +296,12 @@ jobs: USDM_NEG_DIFF="${{ env.usdm_neg_diff }}" USDM_POS_DIFF="${{ env.usdm_pos_diff }}" - - if [[ "$PANDAS_DIFF" == "1" || "$DASK_DIFF" == "1" || "$USDM_NEG_DIFF" == "1" || "$USDM_POS_DIFF" == "1" ]]; then + if [[ "$PANDAS_DIFF" == "1" || "$DASK_DIFF" == "1" \ + || "$USDM_NEG_DIFF" == "1" || "$USDM_POS_DIFF" == "1" ]]; then echo "Differences found in one or more comparisons" exit 1 - elif [[ "$PANDAS_DIFF" == "0" && "$DASK_DIFF" == "0" && "$USDM_NEG_DIFF" == "0" && "$USDM_POS_DIFF" == "0" ]]; then + elif [[ "$PANDAS_DIFF" == "0" && "$DASK_DIFF" == "0" \ + && "$USDM_NEG_DIFF" == "0" && "$USDM_POS_DIFF" == "0" ]]; then echo "No differences found in any comparison" exit 0 else diff --git a/.markdownlint-cli2.yaml b/.markdownlint-cli2.yaml new file mode 100644 index 000000000..03bc2806d --- /dev/null +++ b/.markdownlint-cli2.yaml @@ -0,0 +1,13 @@ +config: + MD013: + line_length: 120 + tables: false + code_blocks: false + headings: false + MD024: + siblings_only: true + MD029: false + MD033: false + MD041: false +ignores: + - "resources/**" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e3d7cf0c4..1b387343a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,11 +1,24 @@ repos: - repo: https://github.com/ambv/black - rev: 24.10.0 + rev: 26.5.1 hooks: - id: black language_version: python3 - repo: https://github.com/pycqa/flake8 - rev: 6.1.0 + rev: 7.3.0 hooks: - id: flake8 language_version: python3 + - repo: https://github.com/adrienverge/yamllint + rev: v1.38.0 + hooks: + - id: yamllint + args: [--strict] + - repo: https://github.com/DavidAnson/markdownlint-cli2 + rev: v0.22.1 + hooks: + - id: markdownlint-cli2 + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v6.0.0 + hooks: + - id: check-json diff --git a/.yamllint.yaml b/.yamllint.yaml new file mode 100644 index 000000000..e3debde96 --- /dev/null +++ b/.yamllint.yaml @@ -0,0 +1,13 @@ +--- +extends: default +rules: + line-length: + max: 120 + document-start: disable + truthy: + allowed-values: ["true", "false", "on"] + new-lines: disable + trailing-spaces: disable +ignore: | + resources/** + .github/workflows/build-version.yml diff --git a/README.md b/README.md index ae529c53a..87bf240e1 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,17 @@ +# cdisc-rules-engine +
-[](https://www.python.org/downloads/release/python-3120) [](https://pypi.org/project/cdisc-rules-engine) [](https://hub.docker.com/r/cdiscdocker/cdisc-rules-engine) - -# cdisc-rules-engine +[](https://www.python.org/downloads/release/python-3120) +[](https://pypi.org/project/cdisc-rules-engine) +[](https://hub.docker.com/r/cdiscdocker/cdisc-rules-engine) -Open source offering of the CDISC Rules Engine, a tool designed for validating clinical trial data against data standards. +Open source offering of the CDISC Rules Engine, a tool designed for validating clinical +trial data against data standards. ## Quick Start Documentation @@ -28,12 +31,15 @@ Full documentation lives in the [`docs/`](docs/index.md) directory and is hosted ### Questions or Need Help? -If you need any assistance or encounter errors during setup, check or reach out via our [Q&A](https://github.com/cdisc-org/cdisc-rules-engine/discussions/categories/q-a?discussions_q=) board +If you need any assistance or encounter errors during setup, check or reach out via our +[Q&A](https://github.com/cdisc-org/cdisc-rules-engine/discussions/categories/q-a?discussions_q=) +board. ### Submit an Issue -If you encounter any bugs or have feature requests please submit an issue on our GitHub repository: -[https://github.com/cdisc-org/cdisc-rules-engine/issues](https://github.com/cdisc-org/cdisc-rules-engine/issues) +If you encounter any bugs or have feature requests please submit an issue on our +GitHub repository: +[GitHub Issues](https://github.com/cdisc-org/cdisc-rules-engine/issues) When submitting an issue, please include: diff --git a/cdisc_rules_engine/__init__.py b/cdisc_rules_engine/__init__.py index 33a5c99ad..68aae553f 100644 --- a/cdisc_rules_engine/__init__.py +++ b/cdisc_rules_engine/__init__.py @@ -1,5 +1,4 @@ from .plugin_loader import PluginLoader - loader = PluginLoader() loader.load() diff --git a/cdisc_rules_engine/check_operators/dataframe_operators.py b/cdisc_rules_engine/check_operators/dataframe_operators.py index 5625b5a81..7564de1e4 100644 --- a/cdisc_rules_engine/check_operators/dataframe_operators.py +++ b/cdisc_rules_engine/check_operators/dataframe_operators.py @@ -476,10 +476,8 @@ def _get_string_part_series(self, part_to_validate: str, length: int, target: st elif part_to_validate == "prefix": series_to_validate = self.value[target].str.slice(stop=length) else: - raise ValueError( - f"Invalid part to validate: {part_to_validate}. \ - Valid values are: suffix, prefix" - ) + raise ValueError(f"Invalid part to validate: {part_to_validate}. \ + Valid values are: suffix, prefix") series_to_validate = series_to_validate.mask(pd.isna(self.value[target])) return series_to_validate diff --git a/cdisc_rules_engine/interfaces/__init__.py b/cdisc_rules_engine/interfaces/__init__.py index 5febc42d5..11c9e77f7 100644 --- a/cdisc_rules_engine/interfaces/__init__.py +++ b/cdisc_rules_engine/interfaces/__init__.py @@ -9,7 +9,6 @@ from .dictionary_term_interface import DictionaryTermInterface from .terms_factory_interface import TermsFactoryInterface - __all__ = [ "CacheServiceInterface", "ConditionInterface", diff --git a/cdisc_rules_engine/models/define/value_level_metadata.py b/cdisc_rules_engine/models/define/value_level_metadata.py index 7d0e2baff..530663147 100644 --- a/cdisc_rules_engine/models/define/value_level_metadata.py +++ b/cdisc_rules_engine/models/define/value_level_metadata.py @@ -164,12 +164,12 @@ def isdatetime(dataframe): return True try: datetime.fromisoformat(dataframe[self.item.Name]) - except: + except Exception: try: datetime.fromisoformat( dataframe[self.item.Name].replace("Z", "+00:00") ) - except: + except Exception: return False return True return True @@ -182,7 +182,7 @@ def isdate(dataframe): return True try: datetime.date.fromisoformat(dataframe[self.item.Name]) - except: + except Exception: return False return True @@ -194,7 +194,7 @@ def istime(dataframe): return True try: datetime.time.fromisoformat(dataframe[self.item.Name]) - except: + except Exception: return False return True @@ -206,12 +206,12 @@ def is_incomplete(dataframe): return True try: datetime.fromisoformat(dataframe[self.item.Name]) - except: + except Exception: try: datetime.fromisoformat( dataframe[self.item.Name].replace("Z", "+00:00") ) - except: + except Exception: return True return False return False diff --git a/cdisc_rules_engine/models/external_dictionaries_container.py b/cdisc_rules_engine/models/external_dictionaries_container.py index c434b7f28..3fa14bad0 100644 --- a/cdisc_rules_engine/models/external_dictionaries_container.py +++ b/cdisc_rules_engine/models/external_dictionaries_container.py @@ -14,7 +14,6 @@ ) from cdisc_rules_engine.exceptions.custom_exceptions import UnsupportedDictionaryType - DICTIONARY_VALIDATORS = { DictionaryTypes.MEDDRA.value: MedDRAValidator, DictionaryTypes.LOINC.value: LoincValidator, diff --git a/cdisc_rules_engine/rules_engine.py b/cdisc_rules_engine/rules_engine.py index 8860ff8d5..e524707d7 100644 --- a/cdisc_rules_engine/rules_engine.py +++ b/cdisc_rules_engine/rules_engine.py @@ -275,16 +275,14 @@ def validate_single_dataset( ] except Exception as e: logger.trace(e) - logger.error( - f"""Error occurred during validation. + logger.error(f"""Error occurred during validation. Error: {e} Error Type: {type(e)} Error Message: {str(e)} Dataset Name: {dataset_metadata.name} Rule ID: {rule.get("core_id", "unknown")} Full traceback: {traceback.format_exc()} - """ - ) + """) error_obj: ValidationErrorContainer = self.handle_validation_exceptions( e, dataset_metadata.name ) diff --git a/cdisc_rules_engine/services/data_readers/__init__.py b/cdisc_rules_engine/services/data_readers/__init__.py index 9fe2b3c11..d397c72ea 100644 --- a/cdisc_rules_engine/services/data_readers/__init__.py +++ b/cdisc_rules_engine/services/data_readers/__init__.py @@ -3,5 +3,4 @@ from .parquet_reader import ParquetReader from .dataset_json_reader import DatasetJSONReader - __all__ = ["DataReaderFactory", "XPTReader", "DatasetJSONReader", "ParquetReader"] diff --git a/cdisc_rules_engine/services/logging/__init__.py b/cdisc_rules_engine/services/logging/__init__.py index 13bba08bb..a62550e62 100644 --- a/cdisc_rules_engine/services/logging/__init__.py +++ b/cdisc_rules_engine/services/logging/__init__.py @@ -1,7 +1,6 @@ from .console_logger import ConsoleLogger from .logging_service_factory import LoggingServiceFactory - __all__ = [ "ConsoleLogger", "LoggingServiceFactory", diff --git a/cdisc_rules_engine/utilities/jsonata_processor.py b/cdisc_rules_engine/utilities/jsonata_processor.py index ad550909b..8af903c85 100644 --- a/cdisc_rules_engine/utilities/jsonata_processor.py +++ b/cdisc_rules_engine/utilities/jsonata_processor.py @@ -94,7 +94,7 @@ def execute_jsonata_rule( @staticmethod @cache def get_all_custom_functions( - jsonata_custom_functions: tuple[()] | tuple[tuple[str, str], ...] + jsonata_custom_functions: tuple[()] | tuple[tuple[str, str], ...], ): builtins_and_customs = [ ("utils", DefaultFilePaths.JSONATA_UTILS.value), diff --git a/cdisc_rules_engine/utilities/utils.py b/cdisc_rules_engine/utilities/utils.py index d7a1a4b59..40fe038d7 100644 --- a/cdisc_rules_engine/utilities/utils.py +++ b/cdisc_rules_engine/utilities/utils.py @@ -258,9 +258,9 @@ def get_meddra_code_term_pairs_cache_key(meddra_path: str) -> str: return f"meddra_valid_code_term_pairs_{meddra_path}" -def get_item_index_by_condition[ - T -](list_of_dicts: List[T], condition: Callable[[T], bool]) -> Optional[int]: +def get_item_index_by_condition[T]( + list_of_dicts: List[T], condition: Callable[[T], bool] +) -> Optional[int]: """ Uses linear search to return index of element in unsorted list which applies to the condition. @@ -270,9 +270,9 @@ def get_item_index_by_condition[ return index -def search_in_list[ - T -](list_of_dicts: List[T], condition: Callable[[T], bool]) -> Optional[T]: +def search_in_list[T]( + list_of_dicts: List[T], condition: Callable[[T], bool] +) -> Optional[T]: """ Returns an element of unsorted list that applies to the condition. """ diff --git a/docs/PYPI.md b/docs/PYPI.md index 5c3df1521..5241e5fec 100644 --- a/docs/PYPI.md +++ b/docs/PYPI.md @@ -1,12 +1,15 @@ # PyPI Integration -CORE is available as a Python package for direct integration into your own pipelines and tooling. +CORE is available as a Python package for direct integration into your own pipelines +and tooling. ```bash pip install cdisc-rules-engine ``` -This installs the engine underlying the CLI and executable, but **does not include `core.py`** or the CLI entrypoints. If you need the full CLI, use the [executable or source code](quick-start.md) instead. +This installs the engine underlying the CLI and executable, but **does not include +`core.py`** or the CLI entrypoints. If you need the full CLI, use the +[executable or source code](quick-start.md) instead. --- @@ -14,10 +17,16 @@ This installs the engine underlying the CLI and executable, but **does not inclu Installing the package alone is not enough to run validations. You also need: -1. **The rules cache** — download the contents of `resources/cache/` from the [repository](https://github.com/cdisc-org/cdisc-rules-engine) and store them somewhere in your project. Keep this in sync with the package version you're using. -2. **A CDISC Library API key** — required for controlled terminology and library metadata. See [update-cache](cli-reference.md#updating-the-cache-update-cache) for how to obtain one. +1. **The rules cache** — download the contents of `resources/cache/` from the + [repository](https://github.com/cdisc-org/cdisc-rules-engine) and store them + somewhere in your project. Keep this in sync with the package version you're using. +2. **A CDISC Library API key** — required for controlled terminology and library + metadata. See [update-cache](cli-reference.md#updating-the-cache-update-cache) + for how to obtain one. -The package also includes the USDM and Dataset-JSON schemas, available if you use the dataset reader classes in `cdisc_rules_engine/services/data_readers` or the metadata readers in `cdisc_rules_engine/services`. +The package also includes the USDM and Dataset-JSON schemas, available if you use the +dataset reader classes in `cdisc_rules_engine/services/data_readers` or the metadata +readers in `cdisc_rules_engine/services`. --- @@ -66,15 +75,17 @@ Retrieve rules for a standard and version: ```python from cdisc_rules_engine.utilities.utils import get_rules_cache_key - cache = load_rules_cache("path/to/rules/cache") # Note: version uses dashes, not dots rule_keys = cache.get_all_by_prefix(get_rules_cache_key("sdtmig", "3-4")) rules = [cache.get(key) for key in rule_keys[0]] ``` -`get_all_by_prefix` returns a nested list of cache keys, not rule objects directly. Fetch the actual rule dicts by calling cache.get() on each key. -Each rule is a dict with keys: `core_id`, `domains`, `author`, `reference`, `sensitivity`, `executability`, `description`, `authorities`, `standards`, `classes`, `rule_type`, `conditions`, `actions`, `output_variables`. +`get_all_by_prefix` returns a nested list of cache keys, not rule objects directly. +Fetch the actual rule dicts by calling cache.get() on each key. +Each rule is a dict with keys: `core_id`, `domains`, `author`, `reference`, +`sensitivity`, `executability`, `description`, `authorities`, `standards`, `classes`, +`rule_type`, `conditions`, `actions`, `datasets`, `output_variables`. If you have rules in raw CDISC metadata format, convert them first: @@ -145,7 +156,11 @@ for rule in ae_rules: value_level_metadata=None, ) try: - was_triggered = run(rule=rule, defined_variables=dataset_variable, defined_actions=core_actions) + was_triggered = run( + rule=rule, + defined_variables=dataset_variable, + defined_actions=core_actions + ) if was_triggered: all_results.extend(results) except Exception as e: @@ -168,7 +183,11 @@ for rule in ae_rules: ## Option B: RulesEngine Class -More setup, but handles dataset reading, preprocessing, and multi-domain validation. The source code in `cdisc_rules_engine/rules_engine.py` and the existing CLI implementation in `core.py` are the best reference for wiring this together — the initializer arguments map closely to the CLI flags documented in the [CLI Reference](cli-reference.md). +More setup, but handles dataset reading, preprocessing, and multi-domain validation. +The source code in `cdisc_rules_engine/rules_engine.py` and the existing CLI +implementation in `core.py` are the best reference for wiring this together — the +initializer arguments map closely to the CLI flags documented in the +[CLI Reference](cli-reference.md). ### Step 1: Prepare Dataset Metadata @@ -197,7 +216,8 @@ datasets = [ ] ``` -You don't need to manually create `PandasDataset` or `DatasetVariable` objects for Option B — the engine handles this internally. +You don't need to manually create `PandasDataset` or `DatasetVariable` objects for +Option B — the engine handles this internally. ### Step 2: Initialize Library Metadata @@ -214,8 +234,13 @@ standard = "sdtmig" standard_version = "3-4" standard_substandard = None -standard_metadata = cache.get(get_standard_details_cache_key(standard, standard_version, standard_substandard)) -model_metadata = cache.get(get_model_details_cache_key_from_ig(standard_metadata)) if standard_metadata else {} +standard_metadata = cache.get( + get_standard_details_cache_key(standard, standard_version, standard_substandard) +) +model_metadata = ( + cache.get(get_model_details_cache_key_from_ig(standard_metadata)) + if standard_metadata else {} +) ct_packages = ["sdtmct-2021-12-17"] # replace with your CT package versions ct_package_metadata = {pkg: cache.get(pkg) for pkg in ct_packages} @@ -223,8 +248,16 @@ ct_package_metadata = {pkg: cache.get(pkg) for pkg in ct_packages} library_metadata = LibraryMetadataContainer( standard_metadata=standard_metadata, model_metadata=model_metadata, - variables_metadata=cache.get(get_library_variables_metadata_cache_key(standard, standard_version, standard_substandard)), - variable_codelist_map=cache.get(get_variable_codelist_map_cache_key(standard, standard_version, standard_substandard)), + variables_metadata=cache.get( + get_library_variables_metadata_cache_key( + standard, standard_version, standard_substandard + ) + ), + variable_codelist_map=cache.get( + get_variable_codelist_map_cache_key( + standard, standard_version, standard_substandard + ) + ), ct_package_metadata=ct_package_metadata, ) ``` @@ -275,7 +308,8 @@ rules_engine = RulesEngine( ### Step 5: Run Validation -Note the `ConditionCompositeFactory` conversion step — this is required before passing rules to `validate_single_rule`: +Note the `ConditionCompositeFactory` conversion step — this is required before passing +rules to `validate_single_rule`: ```python import time @@ -288,7 +322,9 @@ validation_results = [] for rule in rules: try: if isinstance(rule["conditions"], dict): - rule["conditions"] = ConditionCompositeFactory.get_condition_composite(rule["conditions"]) + rule["conditions"] = ConditionCompositeFactory.get_condition_composite( + rule["conditions"] + ) results = rules_engine.validate_single_rule(rule, datasets) flattened = [r for domain_results in results.values() for r in domain_results] validation_results.append(RuleValidationResult(rule, flattened)) @@ -337,11 +373,16 @@ reporting_services = reporting_factory.get_report_services() **Cache key format** — always use dashes in version strings (`3-4`, not `3.4`). -**`column_prefix_map`** — maps the `--` variable prefix to the dataset domain (e.g. `{"--": "AE"}`), resolving placeholders like `--SEQ` → `AESEQ`. +**`column_prefix_map`** — maps the `--` variable prefix to the dataset domain +(e.g. `{"--": "AE"}`), resolving placeholders like `--SEQ` → `AESEQ`. -**External dictionaries** — pass an `ExternalDictionariesContainer` to `RulesEngine` if validating rules that require MedDRA, WHODrug, LOINC, UNII, MedRT, or SNOMED. See the [External Dictionary Reference](https://cdisc-org.github.io/conformance-rules-editor/#/exdictionary). +**External dictionaries** — pass an `ExternalDictionariesContainer` to `RulesEngine` +if validating rules that require MedDRA, WHODrug, LOINC, UNII, MedRT, or SNOMED. +See the +[External Dictionary Reference](https://cdisc-org.github.io/cdisc-open-rules/#/exdictionary). -**Dask** — set `max_dataset_size=0` when initializing `DataServiceFactory` to force Dask processing for all datasets. +**Dask** — set `max_dataset_size=0` when initializing `DataServiceFactory` to force +Dask processing for all datasets. **Windows compatibility** — add `freeze_support()` for multiprocessing: @@ -363,7 +404,9 @@ if __name__ == "__main__": - `full_path` must be set in `SDTMDatasetMetadata` when using the `RulesEngine` approach - The rule's `domains.Include` must match your dataset's domain - `standard_version` format must be consistent throughout (`3-4`, not `3.4`) -- CT package metadata must be present in the cache if validating against controlled terminology +- CT package metadata must be present in the cache if validating against controlled + terminology - When using `define.xml`, the file must be named `define.xml` and the path must be valid - If using external dictionaries, verify all file paths are correct and accessible -- Don't forget the `ConditionCompositeFactory` conversion before calling `validate_single_rule` (Option B) +- Don't forget the `ConditionCompositeFactory` conversion before calling + `validate_single_rule` (Option B) diff --git a/docs/README.md b/docs/README.md index 0dcf0e37c..487bd1770 100644 --- a/docs/README.md +++ b/docs/README.md @@ -8,6 +8,7 @@ [](https://pypi.org/project/cdisc-rules-engine) [](https://hub.docker.com/r/cdiscdocker/cdisc-rules-engine) -# CDISC Rules Engine (CORE) - -Open source offering of the CDISC Conformance Rules Engine — a tool for validating clinical trial data against CDISC data standards. CORE validates study data structure and conformance against both published CDISC conformance rules for the various CDISC standards and custom rules authored in the CORE rule format. +Open source offering of the CDISC Conformance Rules Engine — a tool for validating +clinical trial data against CDISC data standards. CORE validates study data structure +and conformance against both published CDISC conformance rules for the various CDISC +standards and custom rules authored in the CORE rule format. diff --git a/docs/build_executable.md b/docs/build_executable.md index ccea09cce..070c63e9c 100644 --- a/docs/build_executable.md +++ b/docs/build_executable.md @@ -1,22 +1,26 @@ # Building CDISC Rules Engine Executable -Pre-built executables for each release are available on the Releases page. If you need to build your own there are two approaches. +Pre-built executables for each release are available on the Releases page. +If you need to build your own there are two approaches. ## Option 1: Using GitHub Actions (Recommended) ### Step 1: Fork the Repository and Setup -1. Fork the repository: https://github.com/cdisc-org/cdisc-rules-engine -2. The workflow file `.github/workflows/build-version.yml` is already included in the main repository. It is contained within our .gitignore so you can customize it as you see fit. +1. Fork the repository: [cdisc-rules-engine](https://github.com/cdisc-org/cdisc-rules-engine) +2. The workflow file `.github/workflows/build-version.yml` is already included in the main + repository. It is contained within our .gitignore so you can customize it as you see fit. ### Step 2: Add your API Key 1. Go to the top bar of the fork, click Settings > Security > Secrets and Variables > Actions -2. Click **New Repository Secret** and set an action secret named CDISC_LIBRARY_API_KEY and secret as your API key +2. Click **New Repository Secret** and set an action secret named CDISC_LIBRARY_API_KEY + and secret as your API key ### Step 3: Run the Build -Go to the **Actions** tab → **Build Custom Executable** → **Run workflow**. Download the artifact when complete. +Go to the **Actions** tab → **Build Custom Executable** → **Run workflow**. +Download the artifact when complete. ### Step 3: Automated Builds (Optional) @@ -37,12 +41,14 @@ schedule: - Docker Desktop installed and running - Git -- **Note**: There is no official support for a macOS docker runner; Windows also requires some additional setup -- **Note**: You will need to run Windows Command Prompt / Windows Powershell as administrator. This can be done by right clicking and the application and selecting 'Run as Administrator' +- **Note**: There is no official support for a macOS docker runner; Windows also requires + some additional setup +- **Note**: You will need to run Windows Command Prompt / Windows Powershell as administrator. + This can be done by right clicking and the application and selecting 'Run as Administrator' ### Step 1: Clone Repository -#### Linux/macOS/WSL/Windows Command Prompt/Powershell: +#### Linux/macOS/WSL/Windows Command Prompt/Powershell ```bash git clone https://github.com/cdisc-org/cdisc-rules-engine.git @@ -51,9 +57,12 @@ cd cdisc-rules-engine ### Step 1.5: Update cache and code -When you clone the repo initially, it will come with an updated cache and main branch. Before subsequent local docker builds, you will want to follow the README to install the compatible python version of engine, create the virtual environment, and then update the cache as well as pulling down changes from main in cdisc-rules-engine root directory. +When you clone the repo initially, it will come with an updated cache and main branch. +Before subsequent local docker builds, you will want to follow the README to install the +compatible python version of engine, create the virtual environment, and then update the +cache as well as pulling down changes from main in cdisc-rules-engine root directory. -#### Linux/macOS/WSL/Git Bash/Windows Command Prompt & PowerShell: +#### Linux/macOS/WSL/Git Bash/Windows Command Prompt & PowerShell ```bash # Set up upstream remote (only done once) @@ -65,7 +74,7 @@ git pull upstream main ### Step 2: Build with Docker -#### Linux/macOS/WSL/Git Bash: +#### Linux/macOS/WSL/Git Bash ```bash # Build the executable @@ -83,7 +92,7 @@ chmod +x ./build-output/core echo "Executable ready: ./build-output/core" ``` -#### Windows Command Prompt: +#### Windows Command Prompt ```cmd REM Build the executable @@ -104,7 +113,7 @@ del temp_id.txt echo Executable ready: ./build-output/core ``` -#### Windows PowerShell: +#### Windows PowerShell ```powershell # Build the executable @@ -119,15 +128,17 @@ docker rm $CONTAINER_ID ## Customizing the Build for Your Environment -The default Dockerfile builds for Ubuntu 22.04 on AMD64 architecture. To customize for your specific environment, modify these sections in Dockerfile.build: +The default Dockerfile builds for Ubuntu 22.04 on AMD64 architecture. To customize for +your specific environment, modify these sections in Dockerfile.build: ### Change Target Operating System -To change what underlying OS the executable is built on to match your implementation needs, you will need to edit the dockerfile +To change what underlying OS the executable is built on to match your implementation +needs, you will need to edit the dockerfile: -- https://docs.docker.com/reference/dockerfile/#from -- **Windows**: https://hub.docker.com/r/microsoft/windows -- **macOS**: https://hub.docker.com/search - you can explore DockerHub to find a macOS image to utilize +- [Dockerfile FROM reference](https://docs.docker.com/reference/dockerfile/#from) +- **Windows**: [microsoft/windows on Docker Hub](https://hub.docker.com/r/microsoft/windows) +- **macOS**: [Search Docker Hub](https://hub.docker.com/search) for a macOS image to utilize You will need to edit these areas of the dockerfile: @@ -140,7 +151,8 @@ FROM --platform=linux/amd64 ubuntu:22.04 ### Update PyInstaller Output Path -If you change the base OS, update the PyInstaller dist path. this is for clarity and organization, but it's not technically required for functionality: +If you change the base OS, update the PyInstaller dist path. This is for clarity and +organization, but it's not technically required for functionality: ```dockerfile # Change the --dist path in the pyinstaller command (around line 20) @@ -183,7 +195,8 @@ docker cp "${CONTAINER_ID}:/app/dist/output/core-ubuntu-22.04/core" ./build-outp ### Executability -Currently the Dockerfile.build is ubuntu and we give the file executable permissions. This may not be required depending on your OS. +Currently the Dockerfile.build is ubuntu and we give the file executable permissions. +This may not be required depending on your OS. ```bash RUN chmod +x /app/dist/output/core-ubuntu-22.04/core/core && \ @@ -193,7 +206,8 @@ RUN chmod +x /app/dist/output/core-ubuntu-22.04/core/core && \ ### Windows Users -- **Recommended**: Use WSL (Windows Subsystem for Linux) or Git Bash for the best experience with the bash commands +- **Recommended**: Use WSL (Windows Subsystem for Linux) or Git Bash for the best + experience with the bash commands - The `chmod +x` command is not needed on Windows as executable permissions work differently - If using Command Prompt, some syntax differs from bash (variable assignment, echo commands) @@ -205,15 +219,20 @@ RUN chmod +x /app/dist/output/core-ubuntu-22.04/core/core && \ ### Cross-Platform Alternative -For the most consistent experience across all platforms, consider using the **GitHub Actions approach (Option 1)**, which handles platform differences automatically and doesn't require local Docker setup. +For the most consistent experience across all platforms, consider using the +**GitHub Actions approach (Option 1)**, which handles platform differences automatically +and doesn't require local Docker setup. ## Troubleshooting ### Architecture Issues -You can build executables for different operating systems using GitHub's hosted runners. This creates platform-specific executables that work on different environments. See: +You can build executables for different operating systems using GitHub's hosted runners. +This creates platform-specific executables that work on different environments. See: -- https://docs.github.com/en/actions/concepts/runners/github-hosted-runners -- https://github.com/actions/runner-images +- [GitHub-hosted runners](https://docs.github.com/en/actions/concepts/runners/github-hosted-runners) +- [Runner images](https://github.com/actions/runner-images) -The runner in our workflow currently builds for ubuntu-22.04 but this can be changed to your particular OS, as well as CPU architectures (This will be different for Apple M chips that use ARM architecture versus Intel chips) +The runner in our workflow currently builds for ubuntu-22.04 but this can be changed to +your particular OS, as well as CPU architectures (This will be different for Apple M chips +that use ARM architecture versus Intel chips) diff --git a/docs/cli-reference.md b/docs/cli-reference.md index 00db2c4c2..fe47db1c3 100644 --- a/docs/cli-reference.md +++ b/docs/cli-reference.md @@ -1,6 +1,7 @@ # CLI Reference -> Throughout this reference, examples use `python core.py`. If you're using the pre-built executable, replace this with `.\core.exe` (Windows) or `./core` (Linux/Mac). +> Throughout this reference, examples use `python core.py`. If you're using the pre-built +> executable, replace this with `.\core.exe` (Windows) or `./core` (Linux/Mac). --- @@ -85,9 +86,12 @@ python core.py validate --help -me 100 True # Hard limit per dataset per rule ``` -- `False` (default): After each dataset, if cumulative errors for a rule meet the limit, that rule stops processing further datasets. -- `True`: Limits reported issues to `