diff --git a/.github/workflows/auto-approve.yml b/.github/workflows/auto-approve.yml
new file mode 100644
index 0000000..f24c211
--- /dev/null
+++ b/.github/workflows/auto-approve.yml
@@ -0,0 +1,101 @@
+name: Auto-Approve Clean PRs
+
+on:
+ workflow_run:
+ workflows: [".github/workflows/base.yml", "PyDeequ Bot"]
+ types: [completed]
+
+permissions:
+ pull-requests: write
+ actions: read
+
+jobs:
+ approve:
+ runs-on: ubuntu-latest
+ if: github.event.workflow_run.event == 'pull_request' || github.event.workflow_run.event == 'pull_request_target'
+ timeout-minutes: 2
+
+ steps:
+ - name: Find PR and check both conditions
+ uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
+ with:
+ script: |
+ const sha = context.payload.workflow_run.head_sha;
+ const owner = context.repo.owner;
+ const repo = context.repo.repo;
+
+ // Find the PR for this SHA
+ let prNumber = null;
+ const prs = context.payload.workflow_run.pull_requests;
+ if (prs && prs.length > 0) {
+ prNumber = prs[0].number;
+ } else {
+ const {data: searchResult} = await github.rest.pulls.list({
+ owner, repo, state: 'open', sort: 'updated', direction: 'desc', per_page: 30
+ });
+ const match = searchResult.find(pr => pr.head.sha === sha);
+ if (match) {
+ prNumber = match.number;
+ }
+ }
+
+ if (!prNumber) {
+ core.info(`No open PR found for SHA ${sha}, skipping`);
+ return;
+ }
+
+ core.info(`Found PR #${prNumber} for SHA ${sha}`);
+
+ // Verify the PR head SHA still matches (no new push since trigger)
+ const {data: pr} = await github.rest.pulls.get({
+ owner, repo, pull_number: prNumber
+ });
+ if (pr.head.sha !== sha) {
+ core.info(`PR head ${pr.head.sha} differs from trigger SHA ${sha} — new push arrived, skipping`);
+ return;
+ }
+
+ // Condition 1: CI must have passed for this SHA
+ const {data: workflowRuns} = await github.rest.actions.listWorkflowRunsForRepo({
+ owner, repo, head_sha: sha, status: 'completed'
+ });
+ const ciRun = workflowRuns.workflow_runs.find(r =>
+ r.name === '.github/workflows/base.yml' && r.conclusion === 'success'
+ );
+ if (!ciRun) {
+ core.info(`CI has not passed for SHA ${sha}, skipping`);
+ return;
+ }
+
+ // Condition 2: Bot must have posted a clean review for this SHA
+ const {data: reviews} = await github.rest.pulls.listReviews({
+ owner, repo, pull_number: prNumber
+ });
+
+ const CLEAN_MARKER = '';
+
+ const latestBot = reviews
+ .filter(r => r.user.login === 'github-actions[bot]')
+ .sort((a, b) => new Date(b.submitted_at) - new Date(a.submitted_at))[0];
+
+ if (!latestBot || !latestBot.body.includes(CLEAN_MARKER) || latestBot.commit_id !== sha) {
+ core.info('Bot has not posted a clean review for this SHA, skipping');
+ return;
+ }
+
+ // Both conditions met — check for existing approval to prevent doubles
+ const botApprovals = reviews.filter(r =>
+ r.user.login === 'github-actions[bot]' && r.state === 'APPROVED'
+ );
+ if (botApprovals.length > 0) {
+ core.info('Bot already approved this PR, skipping');
+ return;
+ }
+
+ // Approve
+ core.info(`Approving PR #${prNumber}: bot review clean + CI passed for SHA ${sha}`);
+ await github.rest.pulls.createReview({
+ owner, repo, pull_number: prNumber,
+ event: 'APPROVE',
+ body: `No issues found and CI is passing. Auto-approved.\n\n---\n*Generated by AI — human merge required.*`
+ });
diff --git a/.github/workflows/issue-bot.yml b/.github/workflows/issue-bot.yml
index b24a577..96a2056 100644
--- a/.github/workflows/issue-bot.yml
+++ b/.github/workflows/issue-bot.yml
@@ -61,16 +61,20 @@ jobs:
ISSUE_NUMBER: ${{ github.event.issue.number || github.event.pull_request.number || inputs.issue_number }}
EVENT_TYPE: ${{ github.event_name }}
EVENT_ACTION: ${{ github.event.action }}
+ EVENT_BEFORE: ${{ github.event.before }}
+ EVENT_AFTER: ${{ github.event.pull_request.head.sha || github.event.after }}
GITHUB_ACTOR: ${{ github.actor }}
KB_S3_BUCKET: ${{ secrets.KB_S3_BUCKET }}
KB_S3_KEY: ${{ secrets.KB_S3_KEY }}
BEDROCK_MODEL_ID: ${{ secrets.BEDROCK_MODEL_ID }}
GUARDRAIL_ID: ${{ secrets.GUARDRAIL_ID }}
GUARDRAIL_VERSION: ${{ secrets.GUARDRAIL_VERSION }}
- ISSUE_CLASSIFY_PROMPT: ${{ secrets.ISSUE_CLASSIFY_PROMPT }}
- ISSUE_RESPOND_PROMPT: ${{ secrets.ISSUE_RESPOND_PROMPT }}
- PR_FILE_REVIEW_PROMPT: ${{ secrets.PR_FILE_REVIEW_PROMPT }}
- FOLLOWUP_PROMPT: ${{ secrets.FOLLOWUP_PROMPT }}
+ SM_ISSUE_CLASSIFY_PROMPT: pydeequ-bot/issue-classify-prompt
+ SM_ISSUE_RESPOND_PROMPT: pydeequ-bot/issue-respond-prompt
+ SM_PR_FILE_REVIEW_PROMPT: pydeequ-bot/pr-file-review-prompt
+ SM_FOLLOWUP_PROMPT: pydeequ-bot/followup-prompt
+ CODEBASE_SRC_DIR: pydeequ
+ CODEBASE_FILE_EXT: .py
DRY_RUN: ${{ inputs.dry_run || 'false' }}
ARTIFACT_PATH: ${{ runner.temp }}/bot_result.json
run: python -m issue_bot.main analyze
diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
new file mode 100644
index 0000000..94ae395
--- /dev/null
+++ b/.github/workflows/stale.yml
@@ -0,0 +1,36 @@
+name: Manage Stale Issues and PRs
+
+on:
+ schedule:
+ - cron: '0 9 * * MON'
+ workflow_dispatch:
+
+permissions:
+ issues: write
+ pull-requests: write
+
+jobs:
+ stale:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9.0.0
+ with:
+ days-before-stale: 60
+ days-before-close: 14
+ stale-issue-label: 'stale'
+ stale-pr-label: 'stale'
+ stale-issue-message: >
+ This issue has been inactive for 60 days. It will be closed in 14 days
+ if there is no further activity. If this is still relevant, please comment
+ to keep it open.
+ stale-pr-message: >
+ This PR has been inactive for 60 days. It will be closed in 14 days
+ if there is no further activity. If you are still working on this,
+ please push an update or comment to keep it open.
+ close-issue-message: >
+ Closed due to inactivity. Feel free to reopen if this is still relevant.
+ close-pr-message: >
+ Closed due to inactivity. Feel free to reopen if you'd like to continue this work.
+ exempt-issue-labels: 'bug,enhancement,help-wanted'
+ exempt-pr-labels: 'help-wanted'
+ operations-per-run: 50
diff --git a/README.md b/README.md
index a6003c9..bde2d24 100644
--- a/README.md
+++ b/README.md
@@ -24,7 +24,6 @@ There are 4 main components of Deequ, and they are:
- With PyDeequ v0.1.8+, we now officially support Spark3 ! Just make sure you have an environment variable `SPARK_VERSION` to specify your Spark version!
- We've release a blogpost on integrating PyDeequ onto AWS leveraging services such as AWS Glue, Athena, and SageMaker! Check it out: [Monitor data quality in your data lake using PyDeequ and AWS Glue](https://aws.amazon.com/blogs/big-data/monitor-data-quality-in-your-data-lake-using-pydeequ-and-aws-glue/).
- Check out the [PyDeequ Release Announcement Blogpost](https://aws.amazon.com/blogs/big-data/testing-data-quality-at-scale-with-pydeequ/) with a tutorial walkthrough the Amazon Reviews dataset!
-- Join the PyDeequ community on [PyDeequ Slack](https://join.slack.com/t/pydeequ/shared_invite/zt-te6bntpu-yaqPy7bhiN8Lu0NxpZs47Q) to chat with the devs!
## Quickstart
@@ -120,6 +119,17 @@ checkResult_df = VerificationResult.checkResultsAsDataFrame(spark, checkResult)
checkResult_df.show()
```
+#### Row-Level Results
+
+You can also get row-level results to see which individual rows passed or failed each check. This is useful for quarantining rows with data quality issues:
+
+```python
+rowLevelResult_df = VerificationResult.rowLevelResultsAsDataFrame(spark, checkResult, df)
+rowLevelResult_df.show()
+```
+
+Each check produces a Boolean column (named after the check description) indicating pass/fail per row. When a single Check contains multiple constraints, they are ANDed together into one Boolean column — the row passes only if all constraints in that Check pass. Only checks with row-level-capable constraints (e.g., `isComplete`, `isContainedIn`, `hasPattern`, `isUnique`) will produce output columns.
+
### Repository
Save to a Metrics Repository by adding the `useRepository()` and `saveOrAppendResult()` calls to your Analysis Runner.
diff --git a/poetry.lock b/poetry.lock
index 6563511..62f13bc 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,59 +1,49 @@
-# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
-
-[[package]]
-name = "atomicwrites"
-version = "1.4.1"
-description = "Atomic file writes."
-optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
-files = [
- {file = "atomicwrites-1.4.1.tar.gz", hash = "sha256:81b2c9071a49367a7f770170e5eec8cb66567cfbbc8c73d20ce5ca4a8d71cf11"},
-]
-
-[[package]]
-name = "attrs"
-version = "22.1.0"
-description = "Classes Without Boilerplate"
-optional = false
-python-versions = ">=3.5"
-files = [
- {file = "attrs-22.1.0-py2.py3-none-any.whl", hash = "sha256:86efa402f67bf2df34f51a335487cf46b1ec130d02b8d39fd248abfd30da551c"},
- {file = "attrs-22.1.0.tar.gz", hash = "sha256:29adc2665447e5191d0e7c568fde78b21f9672d344281d0c6e1ab085429b22b6"},
-]
-
-[package.extras]
-dev = ["cloudpickle", "coverage[toml] (>=5.0.2)", "furo", "hypothesis", "mypy (>=0.900,!=0.940)", "pre-commit", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "sphinx", "sphinx-notfound-page", "zope.interface"]
-docs = ["furo", "sphinx", "sphinx-notfound-page", "zope.interface"]
-tests = ["cloudpickle", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy (>=0.900,!=0.940)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "zope.interface"]
-tests-no-zope = ["cloudpickle", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy (>=0.900,!=0.940)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins"]
+# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
[[package]]
name = "black"
-version = "21.12b0"
+version = "24.10.0"
description = "The uncompromising code formatter."
optional = false
-python-versions = ">=3.6.2"
-files = [
- {file = "black-21.12b0-py3-none-any.whl", hash = "sha256:a615e69ae185e08fdd73e4715e260e2479c861b5740057fde6e8b4e3b7dd589f"},
- {file = "black-21.12b0.tar.gz", hash = "sha256:77b80f693a569e2e527958459634f18df9b0ba2625ba4e0c2d5da5be42e6f2b3"},
+python-versions = ">=3.9"
+files = [
+ {file = "black-24.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e6668650ea4b685440857138e5fe40cde4d652633b1bdffc62933d0db4ed9812"},
+ {file = "black-24.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1c536fcf674217e87b8cc3657b81809d3c085d7bf3ef262ead700da345bfa6ea"},
+ {file = "black-24.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:649fff99a20bd06c6f727d2a27f401331dc0cc861fb69cde910fe95b01b5928f"},
+ {file = "black-24.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:fe4d6476887de70546212c99ac9bd803d90b42fc4767f058a0baa895013fbb3e"},
+ {file = "black-24.10.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5a2221696a8224e335c28816a9d331a6c2ae15a2ee34ec857dcf3e45dbfa99ad"},
+ {file = "black-24.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f9da3333530dbcecc1be13e69c250ed8dfa67f43c4005fb537bb426e19200d50"},
+ {file = "black-24.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4007b1393d902b48b36958a216c20c4482f601569d19ed1df294a496eb366392"},
+ {file = "black-24.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:394d4ddc64782e51153eadcaaca95144ac4c35e27ef9b0a42e121ae7e57a9175"},
+ {file = "black-24.10.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b5e39e0fae001df40f95bd8cc36b9165c5e2ea88900167bddf258bacef9bbdc3"},
+ {file = "black-24.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d37d422772111794b26757c5b55a3eade028aa3fde43121ab7b673d050949d65"},
+ {file = "black-24.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:14b3502784f09ce2443830e3133dacf2c0110d45191ed470ecb04d0f5f6fcb0f"},
+ {file = "black-24.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:30d2c30dc5139211dda799758559d1b049f7f14c580c409d6ad925b74a4208a8"},
+ {file = "black-24.10.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1cbacacb19e922a1d75ef2b6ccaefcd6e93a2c05ede32f06a21386a04cedb981"},
+ {file = "black-24.10.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1f93102e0c5bb3907451063e08b9876dbeac810e7da5a8bfb7aeb5a9ef89066b"},
+ {file = "black-24.10.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ddacb691cdcdf77b96f549cf9591701d8db36b2f19519373d60d31746068dbf2"},
+ {file = "black-24.10.0-cp313-cp313-win_amd64.whl", hash = "sha256:680359d932801c76d2e9c9068d05c6b107f2584b2a5b88831c83962eb9984c1b"},
+ {file = "black-24.10.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:17374989640fbca88b6a448129cd1745c5eb8d9547b464f281b251dd00155ccd"},
+ {file = "black-24.10.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:63f626344343083322233f175aaf372d326de8436f5928c042639a4afbbf1d3f"},
+ {file = "black-24.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ccfa1d0cb6200857f1923b602f978386a3a2758a65b52e0950299ea014be6800"},
+ {file = "black-24.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:2cd9c95431d94adc56600710f8813ee27eea544dd118d45896bb734e9d7a0dc7"},
+ {file = "black-24.10.0-py3-none-any.whl", hash = "sha256:3bb2b7a1f7b685f85b11fed1ef10f8a9148bceb49853e47a294a3dd963c1dd7d"},
+ {file = "black-24.10.0.tar.gz", hash = "sha256:846ea64c97afe3bc677b761787993be4991810ecc7a4a937816dd6bddedc4875"},
]
[package.dependencies]
-click = ">=7.1.2"
+click = ">=8.0.0"
mypy-extensions = ">=0.4.3"
-pathspec = ">=0.9.0,<1"
+packaging = ">=22.0"
+pathspec = ">=0.9.0"
platformdirs = ">=2"
-tomli = ">=0.2.6,<2.0.0"
-typing-extensions = [
- {version = ">=3.10.0.0,<3.10.0.1 || >3.10.0.1", markers = "python_version >= \"3.10\""},
- {version = ">=3.10.0.0", markers = "python_version < \"3.10\""},
-]
+tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
+typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""}
[package.extras]
colorama = ["colorama (>=0.4.3)"]
-d = ["aiohttp (>=3.7.4)"]
+d = ["aiohttp (>=3.10)"]
jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"]
-python2 = ["typed-ast (>=1.4.3)"]
uvloop = ["uvloop (>=0.15.2)"]
[[package]]
@@ -214,67 +204,122 @@ files = [
[[package]]
name = "coverage"
-version = "5.5"
+version = "7.10.7"
description = "Code coverage measurement for Python"
optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4"
-files = [
- {file = "coverage-5.5-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:b6d534e4b2ab35c9f93f46229363e17f63c53ad01330df9f2d6bd1187e5eaacf"},
- {file = "coverage-5.5-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:b7895207b4c843c76a25ab8c1e866261bcfe27bfaa20c192de5190121770672b"},
- {file = "coverage-5.5-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:c2723d347ab06e7ddad1a58b2a821218239249a9e4365eaff6649d31180c1669"},
- {file = "coverage-5.5-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:900fbf7759501bc7807fd6638c947d7a831fc9fdf742dc10f02956ff7220fa90"},
- {file = "coverage-5.5-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:004d1880bed2d97151facef49f08e255a20ceb6f9432df75f4eef018fdd5a78c"},
- {file = "coverage-5.5-cp27-cp27m-win32.whl", hash = "sha256:06191eb60f8d8a5bc046f3799f8a07a2d7aefb9504b0209aff0b47298333302a"},
- {file = "coverage-5.5-cp27-cp27m-win_amd64.whl", hash = "sha256:7501140f755b725495941b43347ba8a2777407fc7f250d4f5a7d2a1050ba8e82"},
- {file = "coverage-5.5-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:372da284cfd642d8e08ef606917846fa2ee350f64994bebfbd3afb0040436905"},
- {file = "coverage-5.5-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:8963a499849a1fc54b35b1c9f162f4108017b2e6db2c46c1bed93a72262ed083"},
- {file = "coverage-5.5-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:869a64f53488f40fa5b5b9dcb9e9b2962a66a87dab37790f3fcfb5144b996ef5"},
- {file = "coverage-5.5-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:4a7697d8cb0f27399b0e393c0b90f0f1e40c82023ea4d45d22bce7032a5d7b81"},
- {file = "coverage-5.5-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:8d0a0725ad7c1a0bcd8d1b437e191107d457e2ec1084b9f190630a4fb1af78e6"},
- {file = "coverage-5.5-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:51cb9476a3987c8967ebab3f0fe144819781fca264f57f89760037a2ea191cb0"},
- {file = "coverage-5.5-cp310-cp310-win_amd64.whl", hash = "sha256:c0891a6a97b09c1f3e073a890514d5012eb256845c451bd48f7968ef939bf4ae"},
- {file = "coverage-5.5-cp35-cp35m-macosx_10_9_x86_64.whl", hash = "sha256:3487286bc29a5aa4b93a072e9592f22254291ce96a9fbc5251f566b6b7343cdb"},
- {file = "coverage-5.5-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:deee1077aae10d8fa88cb02c845cfba9b62c55e1183f52f6ae6a2df6a2187160"},
- {file = "coverage-5.5-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:f11642dddbb0253cc8853254301b51390ba0081750a8ac03f20ea8103f0c56b6"},
- {file = "coverage-5.5-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:6c90e11318f0d3c436a42409f2749ee1a115cd8b067d7f14c148f1ce5574d701"},
- {file = "coverage-5.5-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:30c77c1dc9f253283e34c27935fded5015f7d1abe83bc7821680ac444eaf7793"},
- {file = "coverage-5.5-cp35-cp35m-win32.whl", hash = "sha256:9a1ef3b66e38ef8618ce5fdc7bea3d9f45f3624e2a66295eea5e57966c85909e"},
- {file = "coverage-5.5-cp35-cp35m-win_amd64.whl", hash = "sha256:972c85d205b51e30e59525694670de6a8a89691186012535f9d7dbaa230e42c3"},
- {file = "coverage-5.5-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:af0e781009aaf59e25c5a678122391cb0f345ac0ec272c7961dc5455e1c40066"},
- {file = "coverage-5.5-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:74d881fc777ebb11c63736622b60cb9e4aee5cace591ce274fb69e582a12a61a"},
- {file = "coverage-5.5-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:92b017ce34b68a7d67bd6d117e6d443a9bf63a2ecf8567bb3d8c6c7bc5014465"},
- {file = "coverage-5.5-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:d636598c8305e1f90b439dbf4f66437de4a5e3c31fdf47ad29542478c8508bbb"},
- {file = "coverage-5.5-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:41179b8a845742d1eb60449bdb2992196e211341818565abded11cfa90efb821"},
- {file = "coverage-5.5-cp36-cp36m-win32.whl", hash = "sha256:040af6c32813fa3eae5305d53f18875bedd079960822ef8ec067a66dd8afcd45"},
- {file = "coverage-5.5-cp36-cp36m-win_amd64.whl", hash = "sha256:5fec2d43a2cc6965edc0bb9e83e1e4b557f76f843a77a2496cbe719583ce8184"},
- {file = "coverage-5.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:18ba8bbede96a2c3dde7b868de9dcbd55670690af0988713f0603f037848418a"},
- {file = "coverage-5.5-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:2910f4d36a6a9b4214bb7038d537f015346f413a975d57ca6b43bf23d6563b53"},
- {file = "coverage-5.5-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:f0b278ce10936db1a37e6954e15a3730bea96a0997c26d7fee88e6c396c2086d"},
- {file = "coverage-5.5-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:796c9c3c79747146ebd278dbe1e5c5c05dd6b10cc3bcb8389dfdf844f3ead638"},
- {file = "coverage-5.5-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:53194af30d5bad77fcba80e23a1441c71abfb3e01192034f8246e0d8f99528f3"},
- {file = "coverage-5.5-cp37-cp37m-win32.whl", hash = "sha256:184a47bbe0aa6400ed2d41d8e9ed868b8205046518c52464fde713ea06e3a74a"},
- {file = "coverage-5.5-cp37-cp37m-win_amd64.whl", hash = "sha256:2949cad1c5208b8298d5686d5a85b66aae46d73eec2c3e08c817dd3513e5848a"},
- {file = "coverage-5.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:217658ec7187497e3f3ebd901afdca1af062b42cfe3e0dafea4cced3983739f6"},
- {file = "coverage-5.5-cp38-cp38-manylinux1_i686.whl", hash = "sha256:1aa846f56c3d49205c952d8318e76ccc2ae23303351d9270ab220004c580cfe2"},
- {file = "coverage-5.5-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:24d4a7de75446be83244eabbff746d66b9240ae020ced65d060815fac3423759"},
- {file = "coverage-5.5-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:d1f8bf7b90ba55699b3a5e44930e93ff0189aa27186e96071fac7dd0d06a1873"},
- {file = "coverage-5.5-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:970284a88b99673ccb2e4e334cfb38a10aab7cd44f7457564d11898a74b62d0a"},
- {file = "coverage-5.5-cp38-cp38-win32.whl", hash = "sha256:01d84219b5cdbfc8122223b39a954820929497a1cb1422824bb86b07b74594b6"},
- {file = "coverage-5.5-cp38-cp38-win_amd64.whl", hash = "sha256:2e0d881ad471768bf6e6c2bf905d183543f10098e3b3640fc029509530091502"},
- {file = "coverage-5.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d1f9ce122f83b2305592c11d64f181b87153fc2c2bbd3bb4a3dde8303cfb1a6b"},
- {file = "coverage-5.5-cp39-cp39-manylinux1_i686.whl", hash = "sha256:13c4ee887eca0f4c5a247b75398d4114c37882658300e153113dafb1d76de529"},
- {file = "coverage-5.5-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:52596d3d0e8bdf3af43db3e9ba8dcdaac724ba7b5ca3f6358529d56f7a166f8b"},
- {file = "coverage-5.5-cp39-cp39-manylinux2010_i686.whl", hash = "sha256:2cafbbb3af0733db200c9b5f798d18953b1a304d3f86a938367de1567f4b5bff"},
- {file = "coverage-5.5-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:44d654437b8ddd9eee7d1eaee28b7219bec228520ff809af170488fd2fed3e2b"},
- {file = "coverage-5.5-cp39-cp39-win32.whl", hash = "sha256:d314ed732c25d29775e84a960c3c60808b682c08d86602ec2c3008e1202e3bb6"},
- {file = "coverage-5.5-cp39-cp39-win_amd64.whl", hash = "sha256:13034c4409db851670bc9acd836243aeee299949bd5673e11844befcb0149f03"},
- {file = "coverage-5.5-pp36-none-any.whl", hash = "sha256:f030f8873312a16414c0d8e1a1ddff2d3235655a2174e3648b4fa66b3f2f1079"},
- {file = "coverage-5.5-pp37-none-any.whl", hash = "sha256:2a3859cb82dcbda1cfd3e6f71c27081d18aa251d20a17d87d26d4cd216fb0af4"},
- {file = "coverage-5.5.tar.gz", hash = "sha256:ebe78fe9a0e874362175b02371bdfbee64d8edc42a044253ddf4ee7d3c15212c"},
+python-versions = ">=3.9"
+files = [
+ {file = "coverage-7.10.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:fc04cc7a3db33664e0c2d10eb8990ff6b3536f6842c9590ae8da4c614b9ed05a"},
+ {file = "coverage-7.10.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e201e015644e207139f7e2351980feb7040e6f4b2c2978892f3e3789d1c125e5"},
+ {file = "coverage-7.10.7-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:240af60539987ced2c399809bd34f7c78e8abe0736af91c3d7d0e795df633d17"},
+ {file = "coverage-7.10.7-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:8421e088bc051361b01c4b3a50fd39a4b9133079a2229978d9d30511fd05231b"},
+ {file = "coverage-7.10.7-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6be8ed3039ae7f7ac5ce058c308484787c86e8437e72b30bf5e88b8ea10f3c87"},
+ {file = "coverage-7.10.7-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e28299d9f2e889e6d51b1f043f58d5f997c373cc12e6403b90df95b8b047c13e"},
+ {file = "coverage-7.10.7-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:c4e16bd7761c5e454f4efd36f345286d6f7c5fa111623c355691e2755cae3b9e"},
+ {file = "coverage-7.10.7-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:b1c81d0e5e160651879755c9c675b974276f135558cf4ba79fee7b8413a515df"},
+ {file = "coverage-7.10.7-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:606cc265adc9aaedcc84f1f064f0e8736bc45814f15a357e30fca7ecc01504e0"},
+ {file = "coverage-7.10.7-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:10b24412692df990dbc34f8fb1b6b13d236ace9dfdd68df5b28c2e39cafbba13"},
+ {file = "coverage-7.10.7-cp310-cp310-win32.whl", hash = "sha256:b51dcd060f18c19290d9b8a9dd1e0181538df2ce0717f562fff6cf74d9fc0b5b"},
+ {file = "coverage-7.10.7-cp310-cp310-win_amd64.whl", hash = "sha256:3a622ac801b17198020f09af3eaf45666b344a0d69fc2a6ffe2ea83aeef1d807"},
+ {file = "coverage-7.10.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a609f9c93113be646f44c2a0256d6ea375ad047005d7f57a5c15f614dc1b2f59"},
+ {file = "coverage-7.10.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:65646bb0359386e07639c367a22cf9b5bf6304e8630b565d0626e2bdf329227a"},
+ {file = "coverage-7.10.7-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:5f33166f0dfcce728191f520bd2692914ec70fac2713f6bf3ce59c3deacb4699"},
+ {file = "coverage-7.10.7-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:35f5e3f9e455bb17831876048355dca0f758b6df22f49258cb5a91da23ef437d"},
+ {file = "coverage-7.10.7-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4da86b6d62a496e908ac2898243920c7992499c1712ff7c2b6d837cc69d9467e"},
+ {file = "coverage-7.10.7-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:6b8b09c1fad947c84bbbc95eca841350fad9cbfa5a2d7ca88ac9f8d836c92e23"},
+ {file = "coverage-7.10.7-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:4376538f36b533b46f8971d3a3e63464f2c7905c9800db97361c43a2b14792ab"},
+ {file = "coverage-7.10.7-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:121da30abb574f6ce6ae09840dae322bef734480ceafe410117627aa54f76d82"},
+ {file = "coverage-7.10.7-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:88127d40df529336a9836870436fc2751c339fbaed3a836d42c93f3e4bd1d0a2"},
+ {file = "coverage-7.10.7-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ba58bbcd1b72f136080c0bccc2400d66cc6115f3f906c499013d065ac33a4b61"},
+ {file = "coverage-7.10.7-cp311-cp311-win32.whl", hash = "sha256:972b9e3a4094b053a4e46832b4bc829fc8a8d347160eb39d03f1690316a99c14"},
+ {file = "coverage-7.10.7-cp311-cp311-win_amd64.whl", hash = "sha256:a7b55a944a7f43892e28ad4bc0561dfd5f0d73e605d1aa5c3c976b52aea121d2"},
+ {file = "coverage-7.10.7-cp311-cp311-win_arm64.whl", hash = "sha256:736f227fb490f03c6488f9b6d45855f8e0fd749c007f9303ad30efab0e73c05a"},
+ {file = "coverage-7.10.7-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7bb3b9ddb87ef7725056572368040c32775036472d5a033679d1fa6c8dc08417"},
+ {file = "coverage-7.10.7-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:18afb24843cbc175687225cab1138c95d262337f5473512010e46831aa0c2973"},
+ {file = "coverage-7.10.7-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:399a0b6347bcd3822be369392932884b8216d0944049ae22925631a9b3d4ba4c"},
+ {file = "coverage-7.10.7-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:314f2c326ded3f4b09be11bc282eb2fc861184bc95748ae67b360ac962770be7"},
+ {file = "coverage-7.10.7-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c41e71c9cfb854789dee6fc51e46743a6d138b1803fab6cb860af43265b42ea6"},
+ {file = "coverage-7.10.7-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc01f57ca26269c2c706e838f6422e2a8788e41b3e3c65e2f41148212e57cd59"},
+ {file = "coverage-7.10.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a6442c59a8ac8b85812ce33bc4d05bde3fb22321fa8294e2a5b487c3505f611b"},
+ {file = "coverage-7.10.7-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:78a384e49f46b80fb4c901d52d92abe098e78768ed829c673fbb53c498bef73a"},
+ {file = "coverage-7.10.7-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:5e1e9802121405ede4b0133aa4340ad8186a1d2526de5b7c3eca519db7bb89fb"},
+ {file = "coverage-7.10.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d41213ea25a86f69efd1575073d34ea11aabe075604ddf3d148ecfec9e1e96a1"},
+ {file = "coverage-7.10.7-cp312-cp312-win32.whl", hash = "sha256:77eb4c747061a6af8d0f7bdb31f1e108d172762ef579166ec84542f711d90256"},
+ {file = "coverage-7.10.7-cp312-cp312-win_amd64.whl", hash = "sha256:f51328ffe987aecf6d09f3cd9d979face89a617eacdaea43e7b3080777f647ba"},
+ {file = "coverage-7.10.7-cp312-cp312-win_arm64.whl", hash = "sha256:bda5e34f8a75721c96085903c6f2197dc398c20ffd98df33f866a9c8fd95f4bf"},
+ {file = "coverage-7.10.7-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:981a651f543f2854abd3b5fcb3263aac581b18209be49863ba575de6edf4c14d"},
+ {file = "coverage-7.10.7-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:73ab1601f84dc804f7812dc297e93cd99381162da39c47040a827d4e8dafe63b"},
+ {file = "coverage-7.10.7-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:a8b6f03672aa6734e700bbcd65ff050fd19cddfec4b031cc8cf1c6967de5a68e"},
+ {file = "coverage-7.10.7-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:10b6ba00ab1132a0ce4428ff68cf50a25efd6840a42cdf4239c9b99aad83be8b"},
+ {file = "coverage-7.10.7-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c79124f70465a150e89340de5963f936ee97097d2ef76c869708c4248c63ca49"},
+ {file = "coverage-7.10.7-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:69212fbccdbd5b0e39eac4067e20a4a5256609e209547d86f740d68ad4f04911"},
+ {file = "coverage-7.10.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7ea7c6c9d0d286d04ed3541747e6597cbe4971f22648b68248f7ddcd329207f0"},
+ {file = "coverage-7.10.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b9be91986841a75042b3e3243d0b3cb0b2434252b977baaf0cd56e960fe1e46f"},
+ {file = "coverage-7.10.7-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:b281d5eca50189325cfe1f365fafade89b14b4a78d9b40b05ddd1fc7d2a10a9c"},
+ {file = "coverage-7.10.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:99e4aa63097ab1118e75a848a28e40d68b08a5e19ce587891ab7fd04475e780f"},
+ {file = "coverage-7.10.7-cp313-cp313-win32.whl", hash = "sha256:dc7c389dce432500273eaf48f410b37886be9208b2dd5710aaf7c57fd442c698"},
+ {file = "coverage-7.10.7-cp313-cp313-win_amd64.whl", hash = "sha256:cac0fdca17b036af3881a9d2729a850b76553f3f716ccb0360ad4dbc06b3b843"},
+ {file = "coverage-7.10.7-cp313-cp313-win_arm64.whl", hash = "sha256:4b6f236edf6e2f9ae8fcd1332da4e791c1b6ba0dc16a2dc94590ceccb482e546"},
+ {file = "coverage-7.10.7-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a0ec07fd264d0745ee396b666d47cef20875f4ff2375d7c4f58235886cc1ef0c"},
+ {file = "coverage-7.10.7-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:dd5e856ebb7bfb7672b0086846db5afb4567a7b9714b8a0ebafd211ec7ce6a15"},
+ {file = "coverage-7.10.7-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:f57b2a3c8353d3e04acf75b3fed57ba41f5c0646bbf1d10c7c282291c97936b4"},
+ {file = "coverage-7.10.7-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:1ef2319dd15a0b009667301a3f84452a4dc6fddfd06b0c5c53ea472d3989fbf0"},
+ {file = "coverage-7.10.7-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:83082a57783239717ceb0ad584de3c69cf581b2a95ed6bf81ea66034f00401c0"},
+ {file = "coverage-7.10.7-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:50aa94fb1fb9a397eaa19c0d5ec15a5edd03a47bf1a3a6111a16b36e190cff65"},
+ {file = "coverage-7.10.7-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2120043f147bebb41c85b97ac45dd173595ff14f2a584f2963891cbcc3091541"},
+ {file = "coverage-7.10.7-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:2fafd773231dd0378fdba66d339f84904a8e57a262f583530f4f156ab83863e6"},
+ {file = "coverage-7.10.7-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:0b944ee8459f515f28b851728ad224fa2d068f1513ef6b7ff1efafeb2185f999"},
+ {file = "coverage-7.10.7-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4b583b97ab2e3efe1b3e75248a9b333bd3f8b0b1b8e5b45578e05e5850dfb2c2"},
+ {file = "coverage-7.10.7-cp313-cp313t-win32.whl", hash = "sha256:2a78cd46550081a7909b3329e2266204d584866e8d97b898cd7fb5ac8d888b1a"},
+ {file = "coverage-7.10.7-cp313-cp313t-win_amd64.whl", hash = "sha256:33a5e6396ab684cb43dc7befa386258acb2d7fae7f67330ebb85ba4ea27938eb"},
+ {file = "coverage-7.10.7-cp313-cp313t-win_arm64.whl", hash = "sha256:86b0e7308289ddde73d863b7683f596d8d21c7d8664ce1dee061d0bcf3fbb4bb"},
+ {file = "coverage-7.10.7-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:b06f260b16ead11643a5a9f955bd4b5fd76c1a4c6796aeade8520095b75de520"},
+ {file = "coverage-7.10.7-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:212f8f2e0612778f09c55dd4872cb1f64a1f2b074393d139278ce902064d5b32"},
+ {file = "coverage-7.10.7-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3445258bcded7d4aa630ab8296dea4d3f15a255588dd535f980c193ab6b95f3f"},
+ {file = "coverage-7.10.7-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:bb45474711ba385c46a0bfe696c695a929ae69ac636cda8f532be9e8c93d720a"},
+ {file = "coverage-7.10.7-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:813922f35bd800dca9994c5971883cbc0d291128a5de6b167c7aa697fcf59360"},
+ {file = "coverage-7.10.7-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:93c1b03552081b2a4423091d6fb3787265b8f86af404cff98d1b5342713bdd69"},
+ {file = "coverage-7.10.7-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:cc87dd1b6eaf0b848eebb1c86469b9f72a1891cb42ac7adcfbce75eadb13dd14"},
+ {file = "coverage-7.10.7-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:39508ffda4f343c35f3236fe8d1a6634a51f4581226a1262769d7f970e73bffe"},
+ {file = "coverage-7.10.7-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:925a1edf3d810537c5a3abe78ec5530160c5f9a26b1f4270b40e62cc79304a1e"},
+ {file = "coverage-7.10.7-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2c8b9a0636f94c43cd3576811e05b89aa9bc2d0a85137affc544ae5cb0e4bfbd"},
+ {file = "coverage-7.10.7-cp314-cp314-win32.whl", hash = "sha256:b7b8288eb7cdd268b0304632da8cb0bb93fadcfec2fe5712f7b9cc8f4d487be2"},
+ {file = "coverage-7.10.7-cp314-cp314-win_amd64.whl", hash = "sha256:1ca6db7c8807fb9e755d0379ccc39017ce0a84dcd26d14b5a03b78563776f681"},
+ {file = "coverage-7.10.7-cp314-cp314-win_arm64.whl", hash = "sha256:097c1591f5af4496226d5783d036bf6fd6cd0cbc132e071b33861de756efb880"},
+ {file = "coverage-7.10.7-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:a62c6ef0d50e6de320c270ff91d9dd0a05e7250cac2a800b7784bae474506e63"},
+ {file = "coverage-7.10.7-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:9fa6e4dd51fe15d8738708a973470f67a855ca50002294852e9571cdbd9433f2"},
+ {file = "coverage-7.10.7-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:8fb190658865565c549b6b4706856d6a7b09302c797eb2cf8e7fe9dabb043f0d"},
+ {file = "coverage-7.10.7-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:affef7c76a9ef259187ef31599a9260330e0335a3011732c4b9effa01e1cd6e0"},
+ {file = "coverage-7.10.7-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e16e07d85ca0cf8bafe5f5d23a0b850064e8e945d5677492b06bbe6f09cc699"},
+ {file = "coverage-7.10.7-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:03ffc58aacdf65d2a82bbeb1ffe4d01ead4017a21bfd0454983b88ca73af94b9"},
+ {file = "coverage-7.10.7-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1b4fd784344d4e52647fd7857b2af5b3fbe6c239b0b5fa63e94eb67320770e0f"},
+ {file = "coverage-7.10.7-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:0ebbaddb2c19b71912c6f2518e791aa8b9f054985a0769bdb3a53ebbc765c6a1"},
+ {file = "coverage-7.10.7-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:a2d9a3b260cc1d1dbdb1c582e63ddcf5363426a1a68faa0f5da28d8ee3c722a0"},
+ {file = "coverage-7.10.7-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a3cc8638b2480865eaa3926d192e64ce6c51e3d29c849e09d5b4ad95efae5399"},
+ {file = "coverage-7.10.7-cp314-cp314t-win32.whl", hash = "sha256:67f8c5cbcd3deb7a60b3345dffc89a961a484ed0af1f6f73de91705cc6e31235"},
+ {file = "coverage-7.10.7-cp314-cp314t-win_amd64.whl", hash = "sha256:e1ed71194ef6dea7ed2d5cb5f7243d4bcd334bfb63e59878519be558078f848d"},
+ {file = "coverage-7.10.7-cp314-cp314t-win_arm64.whl", hash = "sha256:7fe650342addd8524ca63d77b2362b02345e5f1a093266787d210c70a50b471a"},
+ {file = "coverage-7.10.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:fff7b9c3f19957020cac546c70025331113d2e61537f6e2441bc7657913de7d3"},
+ {file = "coverage-7.10.7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:bc91b314cef27742da486d6839b677b3f2793dfe52b51bbbb7cf736d5c29281c"},
+ {file = "coverage-7.10.7-cp39-cp39-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:567f5c155eda8df1d3d439d40a45a6a5f029b429b06648235f1e7e51b522b396"},
+ {file = "coverage-7.10.7-cp39-cp39-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2af88deffcc8a4d5974cf2d502251bc3b2db8461f0b66d80a449c33757aa9f40"},
+ {file = "coverage-7.10.7-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c7315339eae3b24c2d2fa1ed7d7a38654cba34a13ef19fbcb9425da46d3dc594"},
+ {file = "coverage-7.10.7-cp39-cp39-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:912e6ebc7a6e4adfdbb1aec371ad04c68854cd3bf3608b3514e7ff9062931d8a"},
+ {file = "coverage-7.10.7-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:f49a05acd3dfe1ce9715b657e28d138578bc40126760efb962322c56e9ca344b"},
+ {file = "coverage-7.10.7-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:cce2109b6219f22ece99db7644b9622f54a4e915dad65660ec435e89a3ea7cc3"},
+ {file = "coverage-7.10.7-cp39-cp39-musllinux_1_2_riscv64.whl", hash = "sha256:f3c887f96407cea3916294046fc7dab611c2552beadbed4ea901cbc6a40cc7a0"},
+ {file = "coverage-7.10.7-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:635adb9a4507c9fd2ed65f39693fa31c9a3ee3a8e6dc64df033e8fdf52a7003f"},
+ {file = "coverage-7.10.7-cp39-cp39-win32.whl", hash = "sha256:5a02d5a850e2979b0a014c412573953995174743a3f7fa4ea5a6e9a3c5617431"},
+ {file = "coverage-7.10.7-cp39-cp39-win_amd64.whl", hash = "sha256:c134869d5ffe34547d14e174c866fd8fe2254918cc0a95e99052903bc1543e07"},
+ {file = "coverage-7.10.7-py3-none-any.whl", hash = "sha256:f7941f6f2fe6dd6807a1208737b8a0cbcf1cc6d7b07d24998ad2d63590868260"},
+ {file = "coverage-7.10.7.tar.gz", hash = "sha256:f4ab143ab113be368a3e9b795f9cd7906c5ef407d6173fe9675a902e1fffc239"},
]
+[package.dependencies]
+tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""}
+
[package.extras]
-toml = ["toml"]
+toml = ["tomli"]
[[package]]
name = "cryptography"
@@ -353,23 +398,18 @@ files = [
]
[[package]]
-name = "dparse"
-version = "0.6.0"
-description = "A parser for Python dependency files"
+name = "exceptiongroup"
+version = "1.2.2"
+description = "Backport of PEP 654 (exception groups)"
optional = false
-python-versions = ">=3.5"
+python-versions = ">=3.7"
files = [
- {file = "dparse-0.6.0-py3-none-any.whl", hash = "sha256:3cb489bd06bfa8d285c85f7dec69d9ee8f89c29dd5f4ab48e159746dc13b78b2"},
- {file = "dparse-0.6.0.tar.gz", hash = "sha256:57068bb61859b1676c6beb10f399906eecb41a75b5d3fbc99d0311059cb67213"},
+ {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"},
+ {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"},
]
-[package.dependencies]
-packaging = "*"
-toml = "*"
-
[package.extras]
-conda = ["pyyaml"]
-pipenv = ["pipenv"]
+test = ["pytest (>=6)"]
[[package]]
name = "filelock"
@@ -388,29 +428,29 @@ testing = ["covdefaults (>=2.2)", "coverage (>=6.4.2)", "pytest (>=7.1.2)", "pyt
[[package]]
name = "flake8"
-version = "3.9.2"
+version = "7.3.0"
description = "the modular source code checker: pep8 pyflakes and co"
optional = false
-python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7"
+python-versions = ">=3.9"
files = [
- {file = "flake8-3.9.2-py2.py3-none-any.whl", hash = "sha256:bf8fd333346d844f616e8d47905ef3a3384edae6b4e9beb0c5101e25e3110907"},
- {file = "flake8-3.9.2.tar.gz", hash = "sha256:07528381786f2a6237b061f6e96610a4167b226cb926e2aa2b6b1d78057c576b"},
+ {file = "flake8-7.3.0-py2.py3-none-any.whl", hash = "sha256:b9696257b9ce8beb888cdbe31cf885c90d31928fe202be0889a7cdafad32f01e"},
+ {file = "flake8-7.3.0.tar.gz", hash = "sha256:fe044858146b9fc69b551a4b490d69cf960fcb78ad1edcb84e7fbb1b4a8e3872"},
]
[package.dependencies]
-mccabe = ">=0.6.0,<0.7.0"
-pycodestyle = ">=2.7.0,<2.8.0"
-pyflakes = ">=2.3.0,<2.4.0"
+mccabe = ">=0.7.0,<0.8.0"
+pycodestyle = ">=2.14.0,<2.15.0"
+pyflakes = ">=3.4.0,<3.5.0"
[[package]]
name = "flake8-docstrings"
-version = "1.6.0"
+version = "1.7.0"
description = "Extension for flake8 which uses pydocstyle to check docstrings"
optional = false
-python-versions = "*"
+python-versions = ">=3.7"
files = [
- {file = "flake8-docstrings-1.6.0.tar.gz", hash = "sha256:9fe7c6a306064af8e62a055c2f61e9eb1da55f84bb39caef2b84ce53708ac34b"},
- {file = "flake8_docstrings-1.6.0-py2.py3-none-any.whl", hash = "sha256:99cac583d6c7e32dd28bbfbef120a7c0d1b6dde4adb5a9fd441c4227a6534bde"},
+ {file = "flake8_docstrings-1.7.0-py2.py3-none-any.whl", hash = "sha256:51f2344026da083fc084166a9353f5082b01f72901df422f74b4d953ae88ac75"},
+ {file = "flake8_docstrings-1.7.0.tar.gz", hash = "sha256:4c8cc748dc16e6869728699e5d0d685da9a10b0ea718e090b1ba088e67a941af"},
]
[package.dependencies]
@@ -527,15 +567,50 @@ SecretStorage = {version = ">=3.2", markers = "sys_platform == \"linux\""}
docs = ["jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx"]
testing = ["flake8 (<5)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"]
+[[package]]
+name = "markdown-it-py"
+version = "3.0.0"
+description = "Python port of markdown-it. Markdown parsing, done right!"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"},
+ {file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"},
+]
+
+[package.dependencies]
+mdurl = ">=0.1,<1.0"
+
+[package.extras]
+benchmarking = ["psutil", "pytest", "pytest-benchmark"]
+code-style = ["pre-commit (>=3.0,<4.0)"]
+compare = ["commonmark (>=0.9,<1.0)", "markdown (>=3.4,<4.0)", "mistletoe (>=1.0,<2.0)", "mistune (>=2.0,<3.0)", "panflute (>=2.3,<3.0)"]
+linkify = ["linkify-it-py (>=1,<3)"]
+plugins = ["mdit-py-plugins"]
+profiling = ["gprof2dot"]
+rtd = ["jupyter_sphinx", "mdit-py-plugins", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"]
+testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"]
+
[[package]]
name = "mccabe"
-version = "0.6.1"
+version = "0.7.0"
description = "McCabe checker, plugin for flake8"
optional = false
-python-versions = "*"
+python-versions = ">=3.6"
+files = [
+ {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"},
+ {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"},
+]
+
+[[package]]
+name = "mdurl"
+version = "0.1.2"
+description = "Markdown URL utilities"
+optional = false
+python-versions = ">=3.7"
files = [
- {file = "mccabe-0.6.1-py2.py3-none-any.whl", hash = "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42"},
- {file = "mccabe-0.6.1.tar.gz", hash = "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"},
+ {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"},
+ {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"},
]
[[package]]
@@ -613,18 +688,15 @@ files = [
[[package]]
name = "packaging"
-version = "21.3"
+version = "26.2"
description = "Core utilities for Python packages"
optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.8"
files = [
- {file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"},
- {file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"},
+ {file = "packaging-26.2-py3-none-any.whl", hash = "sha256:5fc45236b9446107ff2415ce77c807cee2862cb6fac22b8a73826d0693b0980e"},
+ {file = "packaging-26.2.tar.gz", hash = "sha256:ff452ff5a3e828ce110190feff1178bb1f2ea2281fa2075aadb987c2fb221661"},
]
-[package.dependencies]
-pyparsing = ">=2.0.2,<3.0.5 || >3.0.5"
-
[[package]]
name = "pandas"
version = "1.4.4"
@@ -657,10 +729,10 @@ files = [
[package.dependencies]
numpy = [
- {version = ">=1.21.0", markers = "python_version >= \"3.10\""},
{version = ">=1.18.5", markers = "(platform_machine != \"aarch64\" and platform_machine != \"arm64\") and python_version < \"3.10\""},
{version = ">=1.19.2", markers = "platform_machine == \"aarch64\" and python_version < \"3.10\""},
{version = ">=1.20.0", markers = "platform_machine == \"arm64\" and python_version < \"3.10\""},
+ {version = ">=1.21.0", markers = "python_version >= \"3.10\""},
]
python-dateutil = ">=2.8.1"
pytz = ">=2020.1"
@@ -710,28 +782,28 @@ test = ["appdirs (==1.4.4)", "pytest (>=6)", "pytest-cov (>=2.7)", "pytest-mock
[[package]]
name = "pluggy"
-version = "1.0.0"
+version = "1.6.0"
description = "plugin and hook calling mechanisms for python"
optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.9"
files = [
- {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"},
- {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"},
+ {file = "pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746"},
+ {file = "pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3"},
]
[package.extras]
dev = ["pre-commit", "tox"]
-testing = ["pytest", "pytest-benchmark"]
+testing = ["coverage", "pytest", "pytest-benchmark"]
[[package]]
name = "pre-commit"
-version = "2.20.0"
+version = "3.8.0"
description = "A framework for managing and maintaining multi-language pre-commit hooks."
optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.9"
files = [
- {file = "pre_commit-2.20.0-py2.py3-none-any.whl", hash = "sha256:51a5ba7c480ae8072ecdb6933df22d2f812dc897d5fe848778116129a681aac7"},
- {file = "pre_commit-2.20.0.tar.gz", hash = "sha256:a978dac7bc9ec0bcee55c18a277d553b0f419d259dadb4b9418ff2d00eb43959"},
+ {file = "pre_commit-3.8.0-py2.py3-none-any.whl", hash = "sha256:9a90a53bf82fdd8778d58085faf8d83df56e40dfe18f45b19446e26bf1b3a63f"},
+ {file = "pre_commit-3.8.0.tar.gz", hash = "sha256:8bb6494d4a20423842e198980c9ecf9f96607a07ea29549e180eef9ae80fe7af"},
]
[package.dependencies]
@@ -739,19 +811,7 @@ cfgv = ">=2.0.0"
identify = ">=1.0.0"
nodeenv = ">=0.11.1"
pyyaml = ">=5.1"
-toml = "*"
-virtualenv = ">=20.0.8"
-
-[[package]]
-name = "py"
-version = "1.11.0"
-description = "library with cross-python path, ini-parsing, io, code, log facilities"
-optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
-files = [
- {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"},
- {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"},
-]
+virtualenv = ">=20.10.0"
[[package]]
name = "py4j"
@@ -766,13 +826,13 @@ files = [
[[package]]
name = "pycodestyle"
-version = "2.7.0"
+version = "2.14.0"
description = "Python style guide checker"
optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
+python-versions = ">=3.9"
files = [
- {file = "pycodestyle-2.7.0-py2.py3-none-any.whl", hash = "sha256:514f76d918fcc0b55c6680472f0a37970994e07bbb80725808c17089be302068"},
- {file = "pycodestyle-2.7.0.tar.gz", hash = "sha256:c389c1d06bf7904078ca03399a4816f974a1d590090fecea0c63ec26ebaf1cef"},
+ {file = "pycodestyle-2.14.0-py2.py3-none-any.whl", hash = "sha256:dd6bf7cb4ee77f8e016f9c8e74a35ddd9f67e1d5fd4184d86c3b98e07099f42d"},
+ {file = "pycodestyle-2.14.0.tar.gz", hash = "sha256:c4b5b517d278089ff9d0abdec919cd97262a3367449ea1c8b49b91529167b783"},
]
[[package]]
@@ -805,13 +865,13 @@ toml = ["toml"]
[[package]]
name = "pyflakes"
-version = "2.3.1"
+version = "3.4.0"
description = "passive checker of Python programs"
optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
+python-versions = ">=3.9"
files = [
- {file = "pyflakes-2.3.1-py2.py3-none-any.whl", hash = "sha256:7893783d01b8a89811dd72d7dfd4d84ff098e5eed95cfa8905b22bbffe52efc3"},
- {file = "pyflakes-2.3.1.tar.gz", hash = "sha256:f5bc8ecabc05bb9d291eb5203d6810b49040f6ff446a756326104746cc00c1db"},
+ {file = "pyflakes-3.4.0-py2.py3-none-any.whl", hash = "sha256:f742a7dbd0d9cb9ea41e9a24a918996e8170c799fa528688d40dd582c8265f4f"},
+ {file = "pyflakes-3.4.0.tar.gz", hash = "sha256:b24f96fafb7d2ab0ec5075b7350b3d2d2218eab42003821c06344973d3ea2f58"},
]
[[package]]
@@ -828,20 +888,6 @@ files = [
[package.extras]
plugins = ["importlib-metadata"]
-[[package]]
-name = "pyparsing"
-version = "3.0.9"
-description = "pyparsing module - Classes and methods to define and execute parsing grammars"
-optional = false
-python-versions = ">=3.6.8"
-files = [
- {file = "pyparsing-3.0.9-py3-none-any.whl", hash = "sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc"},
- {file = "pyparsing-3.0.9.tar.gz", hash = "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb"},
-]
-
-[package.extras]
-diagrams = ["jinja2", "railroad-diagrams"]
-
[[package]]
name = "pyspark"
version = "3.3.2"
@@ -863,91 +909,59 @@ sql = ["pandas (>=1.0.5)", "pyarrow (>=1.0.0)"]
[[package]]
name = "pytest"
-version = "6.2.5"
+version = "8.4.2"
description = "pytest: simple powerful testing with Python"
optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.9"
files = [
- {file = "pytest-6.2.5-py3-none-any.whl", hash = "sha256:7310f8d27bc79ced999e760ca304d69f6ba6c6649c0b60fb0e04a4a77cacc134"},
- {file = "pytest-6.2.5.tar.gz", hash = "sha256:131b36680866a76e6781d13f101efb86cf674ebb9762eb70d3082b6f29889e89"},
+ {file = "pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79"},
+ {file = "pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01"},
]
[package.dependencies]
-atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""}
-attrs = ">=19.2.0"
-colorama = {version = "*", markers = "sys_platform == \"win32\""}
-iniconfig = "*"
-packaging = "*"
-pluggy = ">=0.12,<2.0"
-py = ">=1.8.2"
-toml = "*"
+colorama = {version = ">=0.4", markers = "sys_platform == \"win32\""}
+exceptiongroup = {version = ">=1", markers = "python_version < \"3.11\""}
+iniconfig = ">=1"
+packaging = ">=20"
+pluggy = ">=1.5,<2"
+pygments = ">=2.7.2"
+tomli = {version = ">=1", markers = "python_version < \"3.11\""}
[package.extras]
-testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xmlschema"]
+dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "requests", "setuptools", "xmlschema"]
[[package]]
name = "pytest-cov"
-version = "2.12.1"
+version = "5.0.0"
description = "Pytest plugin for measuring coverage."
optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+python-versions = ">=3.8"
files = [
- {file = "pytest-cov-2.12.1.tar.gz", hash = "sha256:261ceeb8c227b726249b376b8526b600f38667ee314f910353fa318caa01f4d7"},
- {file = "pytest_cov-2.12.1-py2.py3-none-any.whl", hash = "sha256:261bb9e47e65bd099c89c3edf92972865210c36813f80ede5277dceb77a4a62a"},
+ {file = "pytest-cov-5.0.0.tar.gz", hash = "sha256:5837b58e9f6ebd335b0f8060eecce69b662415b16dc503883a02f45dfeb14857"},
+ {file = "pytest_cov-5.0.0-py3-none-any.whl", hash = "sha256:4f0764a1219df53214206bf1feea4633c3b558a2925c8b59f144f682861ce652"},
]
[package.dependencies]
-coverage = ">=5.2.1"
+coverage = {version = ">=5.2.1", extras = ["toml"]}
pytest = ">=4.6"
-toml = "*"
[package.extras]
-testing = ["fields", "hunter", "process-tests", "pytest-xdist", "six", "virtualenv"]
-
-[[package]]
-name = "pytest-flake8"
-version = "1.1.0"
-description = "pytest plugin to check FLAKE8 requirements"
-optional = false
-python-versions = "*"
-files = [
- {file = "pytest-flake8-1.1.0.tar.gz", hash = "sha256:358d449ca06b80dbadcb43506cd3e38685d273b4968ac825da871bd4cc436202"},
- {file = "pytest_flake8-1.1.0-py2.py3-none-any.whl", hash = "sha256:f1b19dad0b9f0aa651d391c9527ebc20ac1a0f847aa78581094c747462bfa182"},
-]
-
-[package.dependencies]
-flake8 = ">=3.5"
-pytest = ">=3.5"
+testing = ["fields", "hunter", "process-tests", "pytest-xdist", "virtualenv"]
[[package]]
name = "pytest-rerunfailures"
-version = "9.1.1"
+version = "14.0"
description = "pytest plugin to re-run tests to eliminate flaky failures"
optional = false
-python-versions = ">=3.5"
+python-versions = ">=3.8"
files = [
- {file = "pytest-rerunfailures-9.1.1.tar.gz", hash = "sha256:1cb11a17fc121b3918414eb5eaf314ee325f2e693ac7cb3f6abf7560790827f2"},
- {file = "pytest_rerunfailures-9.1.1-py3-none-any.whl", hash = "sha256:2eb7d0ad651761fbe80e064b0fd415cf6730cdbc53c16a145fd84b66143e609f"},
+ {file = "pytest-rerunfailures-14.0.tar.gz", hash = "sha256:4a400bcbcd3c7a4ad151ab8afac123d90eca3abe27f98725dc4d9702887d2e92"},
+ {file = "pytest_rerunfailures-14.0-py3-none-any.whl", hash = "sha256:4197bdd2eaeffdbf50b5ea6e7236f47ff0e44d1def8dae08e409f536d84e7b32"},
]
[package.dependencies]
-pytest = ">=5.0"
-setuptools = ">=40.0"
-
-[[package]]
-name = "pytest-runner"
-version = "5.3.2"
-description = "Invoke py.test as distutils command with dependency resolution"
-optional = false
-python-versions = ">=3.6"
-files = [
- {file = "pytest-runner-5.3.2.tar.gz", hash = "sha256:48934ec94301f6727d30615af1960539ff62063f6c9b71b7227174e51ba5fb34"},
- {file = "pytest_runner-5.3.2-py3-none-any.whl", hash = "sha256:c7d785ea6c612396c11ddbaf467764d2cc746ef96a713fbe1a296c221503b7c3"},
-]
-
-[package.extras]
-docs = ["jaraco.packaging (>=8.2)", "rst.linker (>=1.9)", "sphinx"]
-testing = ["pytest (>=4.6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.0.1)", "pytest-flake8", "pytest-mypy", "pytest-virtualenv"]
+packaging = ">=17.1"
+pytest = ">=7.2"
[[package]]
name = "python-dateutil"
@@ -1103,22 +1117,22 @@ files = [
idna2008 = ["idna"]
[[package]]
-name = "safety"
-version = "1.10.3"
-description = "Checks installed dependencies for known vulnerabilities."
+name = "rich"
+version = "15.0.0"
+description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal"
optional = false
-python-versions = ">=3.5"
+python-versions = ">=3.9.0"
files = [
- {file = "safety-1.10.3-py2.py3-none-any.whl", hash = "sha256:5f802ad5df5614f9622d8d71fedec2757099705c2356f862847c58c6dfe13e84"},
- {file = "safety-1.10.3.tar.gz", hash = "sha256:30e394d02a20ac49b7f65292d19d38fa927a8f9582cdfd3ad1adbbc66c641ad5"},
+ {file = "rich-15.0.0-py3-none-any.whl", hash = "sha256:33bd4ef74232fb73fe9279a257718407f169c09b78a87ad3d296f548e27de0bb"},
+ {file = "rich-15.0.0.tar.gz", hash = "sha256:edd07a4824c6b40189fb7ac9bc4c52536e9780fbbfbddf6f1e2502c31b068c36"},
]
[package.dependencies]
-Click = ">=6.0"
-dparse = ">=0.5.1"
-packaging = "*"
-requests = "*"
-setuptools = "*"
+markdown-it-py = ">=2.2.0"
+pygments = ">=2.13.0,<3.0.0"
+
+[package.extras]
+jupyter = ["ipywidgets (>=7.5.1,<9)"]
[[package]]
name = "secretstorage"
@@ -1172,17 +1186,6 @@ files = [
{file = "snowballstemmer-2.2.0.tar.gz", hash = "sha256:09b16deb8547d3412ad7b590689584cd0fe25ec8db3be37788be3810cbf19cb1"},
]
-[[package]]
-name = "toml"
-version = "0.10.2"
-description = "Python Library for Tom's Obvious, Minimal Language"
-optional = false
-python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
-files = [
- {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"},
- {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"},
-]
-
[[package]]
name = "tomli"
version = "1.2.3"
@@ -1194,47 +1197,26 @@ files = [
{file = "tomli-1.2.3.tar.gz", hash = "sha256:05b6166bff487dc068d322585c7ea4ef78deed501cc124060e0f238e89a9231f"},
]
-[[package]]
-name = "tqdm"
-version = "4.64.1"
-description = "Fast, Extensible Progress Meter"
-optional = false
-python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7"
-files = [
- {file = "tqdm-4.64.1-py2.py3-none-any.whl", hash = "sha256:6fee160d6ffcd1b1c68c65f14c829c22832bc401726335ce92c52d395944a6a1"},
- {file = "tqdm-4.64.1.tar.gz", hash = "sha256:5f4f682a004951c1b450bc753c710e9280c5746ce6ffedee253ddbcbf54cf1e4"},
-]
-
-[package.dependencies]
-colorama = {version = "*", markers = "platform_system == \"Windows\""}
-
-[package.extras]
-dev = ["py-make (>=0.1.0)", "twine", "wheel"]
-notebook = ["ipywidgets (>=6)"]
-slack = ["slack-sdk"]
-telegram = ["requests"]
-
[[package]]
name = "twine"
-version = "3.8.0"
+version = "5.1.1"
description = "Collection of utilities for publishing packages on PyPI"
optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.8"
files = [
- {file = "twine-3.8.0-py3-none-any.whl", hash = "sha256:d0550fca9dc19f3d5e8eadfce0c227294df0a2a951251a4385797c8a6198b7c8"},
- {file = "twine-3.8.0.tar.gz", hash = "sha256:8efa52658e0ae770686a13b675569328f1fba9837e5de1867bfe5f46a9aefe19"},
+ {file = "twine-5.1.1-py3-none-any.whl", hash = "sha256:215dbe7b4b94c2c50a7315c0275d2258399280fbb7d04182c7e55e24b5f93997"},
+ {file = "twine-5.1.1.tar.gz", hash = "sha256:9aa0825139c02b3434d913545c7b847a21c835e11597f5255842d457da2322db"},
]
[package.dependencies]
-colorama = ">=0.4.3"
importlib-metadata = ">=3.6"
keyring = ">=15.1"
-pkginfo = ">=1.8.1"
-readme-renderer = ">=21.0"
+pkginfo = ">=1.8.1,<1.11"
+readme-renderer = ">=35.0"
requests = ">=2.20"
requests-toolbelt = ">=0.8.0,<0.9.0 || >0.9.0"
rfc3986 = ">=1.4.0"
-tqdm = ">=4.14"
+rich = ">=12.0.0"
urllib3 = ">=1.26.0"
[[package]]
@@ -1315,5 +1297,5 @@ pyspark = ["pyspark"]
[metadata]
lock-version = "2.0"
-python-versions = ">=3.8,<4"
-content-hash = "616e9d9c99206a718797187683520c788474e89486e6549d92eeed712ff6ae55"
+python-versions = ">=3.9,<4"
+content-hash = "9ca1d776b68eead781bf0161f4f2ee552085c8e201b482b6f78d18c06a2a5d79"
diff --git a/pydeequ/verification.py b/pydeequ/verification.py
index c164246..38da74f 100644
--- a/pydeequ/verification.py
+++ b/pydeequ/verification.py
@@ -143,6 +143,34 @@ def checkResultsAsDataFrame(
)
return DataFrame(df, sql_ctx).toPandas() if pandas else DataFrame(df, sql_ctx)
+ @classmethod
+ def rowLevelResultsAsDataFrame(
+ cls, spark_session: SparkSession, verificationResult, data: DataFrame, pandas: bool = False
+ ):
+ """
+ Returns the original DataFrame with additional Boolean columns indicating which rows
+ passed or failed each Check. Each Check produces one Boolean column named after its
+ description, where multiple constraints within a Check are ANDed together.
+
+ Only checks with row-level-capable constraints (e.g., isComplete, hasPattern, isContainedIn,
+ isUnique) will produce output columns. Aggregate-only checks (e.g., hasSize) are skipped.
+
+ :param SparkSession spark_session: SparkSession
+ :param verificationResult: The results of the verification run
+ :param DataFrame data: The original input DataFrame that was verified
+ :param bool pandas: If True, return a Pandas DataFrame instead of PySpark
+ :return: DataFrame with original columns plus Boolean columns per qualifying Check
+ """
+ df = spark_session._jvm.com.amazon.deequ.VerificationResult.rowLevelResultsAsDataFrame(
+ spark_session._jsparkSession, verificationResult.verificationRun, data._jdf
+ )
+ sql_ctx = SQLContext(
+ sparkContext=spark_session._sc,
+ sparkSession=spark_session,
+ jsqlContext=spark_session._jsparkSession.sqlContext(),
+ )
+ return DataFrame(df, sql_ctx).toPandas() if pandas else DataFrame(df, sql_ctx)
+
class VerificationRunBuilder:
# TODO Remaining Methods
diff --git a/pyproject.toml b/pyproject.toml
index 4120233..e6755d1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -28,24 +28,21 @@ classifiers = [
[tool.poetry.dependencies]
-python = ">=3.8,<4"
+python = ">=3.9,<4"
numpy = ">=1.14.1"
pandas = ">=0.23.0"
pyspark = { version = ">=2.4.7,<4.0.0", optional = true }
[tool.poetry.dev-dependencies]
-pytest = "^6.2.4"
-pytest-cov = "^2.11.1"
-coverage = "^5.5"
-pytest-runner = "^5.3.0"
-black = "^21.5b1"
-flake8 = "^3.9.2"
-flake8-docstrings = "^1.6.0"
-pytest-flake8 = "^1.0.7"
-pre-commit = "^2.12.1"
-pytest-rerunfailures = "^9.1.1"
-twine = "^3.4.1"
-safety = "^1.10.3"
+pytest = "^8.0"
+pytest-cov = "^5.0"
+coverage = "^7.0"
+black = "^24.0"
+flake8 = "^7.0"
+flake8-docstrings = "^1.7"
+pre-commit = "^3.5"
+pytest-rerunfailures = "^14.0"
+twine = "^5.0"
[tool.poetry.extras]
pyspark = ["pyspark"]
@@ -62,7 +59,7 @@ include_trailing_comma = true
force_grid_wrap = 0
use_parentheses = true
ensure_newline_before_comments = true
-target_version = ['py38']
+target_version = ['py39']
include = '\.pyi?$'
exclude = '''
/(
diff --git a/scripts/issue_bot/config.py b/scripts/issue_bot/config.py
index b6fdb0c..5fff510 100644
--- a/scripts/issue_bot/config.py
+++ b/scripts/issue_bot/config.py
@@ -17,6 +17,8 @@ def __init__(self):
sys.exit(1)
self.repo = _require("GITHUB_REPOSITORY")
self.actor = os.getenv("GITHUB_ACTOR", "")
+ self.event_before = os.getenv("EVENT_BEFORE", "")
+ self.event_after = os.getenv("EVENT_AFTER", "")
self.bedrock_model_id = os.getenv("BEDROCK_MODEL_ID", "us.anthropic.claude-opus-4-6-v1")
@@ -32,14 +34,16 @@ def __init__(self):
self.enable_repo_search = os.getenv("ENABLE_REPO_SEARCH", "true").lower() == "true"
self.upstream_repo = os.getenv("UPSTREAM_REPO", "awslabs/python-deequ")
+ self.codebase_src_dir = os.getenv("CODEBASE_SRC_DIR", "pydeequ")
+ self.codebase_file_ext = os.getenv("CODEBASE_FILE_EXT", ".py")
- self.bedrock_timeout = 120
- self.max_context_chars = 200000
+ self.bedrock_timeout = 240
+ self.max_context_chars = 800000
self.max_github_search_results = 8
self.github_api_timeout = 10
self.allowed_labels = {
"bug", "enhancement", "question", "documentation",
- "help-wanted", "analyzer", "check", "spark-compatibility", "installation",
+ "help wanted", "python",
}
diff --git a/scripts/issue_bot/github_client.py b/scripts/issue_bot/github_client.py
index 82d0ed2..5c2d046 100644
--- a/scripts/issue_bot/github_client.py
+++ b/scripts/issue_bot/github_client.py
@@ -11,6 +11,7 @@ def __init__(self, cfg):
self._repo = cfg.repo
self._timeout = cfg.github_api_timeout
self._dry_run = cfg.dry_run
+ self._cfg = cfg
self._repo_root = os.getenv("GITHUB_WORKSPACE", os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))
self._headers = {
"Authorization": f"token {self._token}",
@@ -48,6 +49,59 @@ def get_pr_diff(self, number):
logger.error(f"PR diff fetch failed: {e}")
return ""
+ def get_compare_diff(self, base_sha, head_sha):
+ """Fetch the diff between two commits using the Compare API.
+ Returns the diff text, or empty string on failure (e.g. force-push
+ where base_sha no longer exists)."""
+ headers = {**self._headers, "Accept": "application/vnd.github.v3.diff"}
+ try:
+ resp = requests.get(
+ f"https://api.github.com/repos/{self._repo}/compare/{base_sha}...{head_sha}",
+ headers=headers, timeout=self._timeout,
+ )
+ if resp.status_code == 200:
+ return resp.text
+ logger.warning(f"Compare API {base_sha[:7]}...{head_sha[:7]}: {resp.status_code}")
+ return ""
+ except Exception as e:
+ logger.error(f"Compare diff failed: {e}")
+ return ""
+
+ def get_ci_status(self, sha):
+ """Check commit statuses and check runs. Returns (passed, summary).
+ passed: True (all green), False (something failed), None (pending/unknown)."""
+ status = self._get(f"/repos/{self._repo}/commits/{sha}/status")
+ if status is None:
+ return None, "CI status unavailable"
+ combined_state = status.get("state", "pending")
+
+ check_data = self._get(f"/repos/{self._repo}/commits/{sha}/check-runs")
+ runs = check_data.get("check_runs", []) if check_data else []
+
+ def _is_own_check(name):
+ lower = name.lower()
+ return "bot" in lower and ("analyze" in lower or "/ act" in lower)
+
+ external_runs = [r for r in runs if not _is_own_check(r.get("name", ""))]
+
+ failed = []
+ pending = []
+ for r in external_runs:
+ if r.get("status") != "completed":
+ pending.append(r["name"])
+ elif r.get("conclusion") not in ("success", "neutral", "skipped"):
+ failed.append(r["name"])
+
+ if failed:
+ return False, f"CI failing: {', '.join(failed)}"
+ if pending:
+ return None, f"CI pending: {', '.join(pending)}"
+ if combined_state == "failure":
+ return False, "CI failing (status checks)"
+ if combined_state == "pending":
+ return None, "CI pending (status checks)"
+ return True, "CI passed"
+
def get_pr_files(self, number):
return self._get(f"/repos/{self._repo}/pulls/{number}/files") or []
@@ -64,16 +118,18 @@ def get_pr_review_comments(self, number, max_pages=10):
page += 1
return comments
- def get_codebase_map(self, src_dir="pydeequ"):
- """List all Python source files (excluding tests) as relative paths."""
+ def get_codebase_map(self):
+ """List source files (excluding tests) as relative paths."""
+ src_dir = self._cfg.codebase_src_dir
+ file_ext = self._cfg.codebase_file_ext
full_dir = os.path.join(self._repo_root, src_dir)
prefix = self._repo_root.rstrip("/") + "/"
try:
paths = []
for root, dirs, files in os.walk(full_dir):
- dirs[:] = [d for d in dirs if d not in ("tests", "__pycache__", ".git")]
+ dirs[:] = [d for d in dirs if d not in ("examples", "__pycache__", ".git", "tests", "test")]
for f in files:
- if f.endswith(".py"):
+ if f.endswith(file_ext):
full = os.path.join(root, f)
rel = full[len(prefix):] if full.startswith(prefix) else full
paths.append(rel)
@@ -116,9 +172,9 @@ def post_comment(self, number, body):
return True
return self._post(f"/repos/{self._repo}/issues/{number}/comments", {"body": body})
- def post_pr_review(self, number, summary, inline_comments):
+ def post_pr_review(self, number, summary, inline_comments, event="COMMENT"):
if self._dry_run:
- logger.info(f"[DRY RUN] PR review on #{number}: {len(inline_comments)} inline comments")
+ logger.info(f"[DRY RUN] PR review on #{number}: {len(inline_comments)} inline comments, event={event}")
return True
# Get valid diff lines per file from the PR
@@ -134,31 +190,34 @@ def post_pr_review(self, number, summary, inline_comments):
else:
invalid_comments.append(ic)
+ body = summary
+ if invalid_comments:
+ body += "\n\n**Additional feedback:**\n"
+ for ic in invalid_comments:
+ line_ref = f":{ic['line']}" if ic.get('line') else ""
+ body += f"\n`{ic['file']}{line_ref}` — {ic['comment']}\n"
+
+ payload = {"body": body, "event": event}
if valid_comments:
- body = summary
- if invalid_comments:
- body += "\n\n**Additional feedback:**\n"
- for ic in invalid_comments:
- line_ref = f":{ic['line']}" if ic.get('line') else ""
- body += f"\n`{ic['file']}{line_ref}` — {ic['comment']}\n"
- payload = {"body": body, "event": "REQUEST_CHANGES", "comments": valid_comments}
- try:
- resp = requests.post(
- f"https://api.github.com/repos/{self._repo}/pulls/{number}/reviews",
- headers=self._headers, json=payload, timeout=self._timeout,
- )
- if resp.status_code in (200, 201):
- return True
- logger.error(f"PR review API failed: {resp.status_code}, falling back to comment")
- except Exception as e:
- logger.error(f"PR review API failed: {e}, falling back to comment")
-
- # Fallback: post all as regular comment
- all_comments = inline_comments
+ payload["comments"] = valid_comments
+
+ try:
+ resp = requests.post(
+ f"https://api.github.com/repos/{self._repo}/pulls/{number}/reviews",
+ headers=self._headers, json=payload, timeout=self._timeout,
+ )
+ if resp.status_code in (200, 201):
+ return True
+ logger.error(f"PR review API failed: {resp.status_code}, falling back to comment")
+ logger.error(f"Response: {resp.text[:500]}")
+ except Exception as e:
+ logger.error(f"PR review API failed: {e}, falling back to comment")
+
+ # Fallback: post as regular comment if review API fails
body = summary
- if all_comments:
+ if inline_comments:
body += "\n\n**Inline feedback:**\n"
- for ic in all_comments:
+ for ic in inline_comments:
line_ref = f":{ic['line']}" if ic.get('line') else ""
body += f"\n`{ic['file']}{line_ref}` — {ic['comment']}\n"
return self._post(f"/repos/{self._repo}/issues/{number}/comments", {"body": body})
diff --git a/scripts/issue_bot/main.py b/scripts/issue_bot/main.py
index 77f5ef7..593eaa8 100644
--- a/scripts/issue_bot/main.py
+++ b/scripts/issue_bot/main.py
@@ -6,6 +6,7 @@
"""
import json
+import re
import sys
import os
import datetime
@@ -149,17 +150,57 @@ def analyze():
diff = gh.get_pr_diff(number)
review_comments = gh.get_pr_review_comments(number)
existing_feedback = _format_pr_feedback(comments_data, review_comments)
+
+ # Incremental review: on synchronize, compute what changed since last push
+ incremental_diff = ""
+ incremental_files = set()
+ if is_pr_update and cfg.event_before and cfg.event_after:
+ incremental_diff = gh.get_compare_diff(cfg.event_before, cfg.event_after)
+ if incremental_diff:
+ incremental_files = _extract_diff_files(incremental_diff)
+
+ # Fetch full source files at the SHA the diff is anchored to
+ head_sha = cfg.event_after or item.get("head", {}).get("sha", "")
+ pr_files = gh.get_pr_files(number)
+ full_sources = ""
+ for pf in pr_files:
+ fname = pf.get("filename", "")
+ content = gh.get_file_content(fname, ref=head_sha) if head_sha else gh.get_file_content(fname)
+ if content:
+ entry = f"\n### `{fname}`\n```\n{content}\n```\n"
+ if len(full_sources) + len(entry) > 3_000_000:
+ full_sources += f"\n### `{fname}` — SKIPPED (context budget)\n"
+ break
+ full_sources += entry
+
+ # Build incremental review instructions
+ incremental_section = ""
+ if incremental_diff:
+ incremental_section = (
+ "\n\n"
+ "This is a RE-REVIEW after the author pushed new commits. "
+ "The below shows ONLY what changed since the last push. "
+ "You MUST limit your comments to lines/files in the incremental diff. "
+ "Do NOT re-raise issues on unchanged code — the author already saw prior feedback. "
+ "Do NOT comment on lines that are not part of the incremental diff. "
+ "If the incremental diff only fixes issues from prior feedback, respond with zero comments."
+ "\n\n"
+ f"\n{incremental_diff}\n\n"
+ )
+
# System prompt: instructions + all trusted context (not scanned by guardrail)
system_prompt = _render(tmpl, current_date=datetime.date.today().isoformat()) + (
f"\n\n\n{context}\n\n"
f"\n{codebase_map}\n\n"
+ f"\n{full_sources}\n\n"
f"\n{diff}\n\n"
- f"\n{existing_feedback}\n"
+ f"\n{existing_feedback}\n\n"
+ f"{incremental_section}"
)
# User prompt: only user-authored content (scanned by guardrail)
user_prompt = f"\nTitle: {title}\nBody: {body}\n"
raw = bedrock.invoke(system_prompt, user_prompt,
- max_tokens=4000, json_schema=PR_REVIEW_SCHEMA)
+ max_tokens=8000, json_schema=PR_REVIEW_SCHEMA)
if raw is None:
_write_artifact({
"action": "ESCALATE", "reason": "bedrock_unavailable", "title": title,
@@ -172,14 +213,50 @@ def analyze():
inline_comments = pr_result.get("comments", [])
except json.JSONDecodeError:
inline_comments = _parse_file_review_multi(raw)
+
+ # Hard filter: on incremental review, drop comments on files not in the incremental diff
+ if incremental_files and inline_comments:
+ inline_comments = [
+ c for c in inline_comments
+ if c.get("file", "") in incremental_files
+ ]
+
+ # Hard filter: drop NITs on re-reviews (code-enforced, not prompt-dependent)
+ if is_pr_update and inline_comments:
+ inline_comments = [
+ c for c in inline_comments
+ if c.get("severity", "").upper() != "NIT"
+ ]
+
+ # Format comments: prepend severity, append evidence as context
+ for c in inline_comments:
+ severity = c.get("severity", "")
+ evidence = c.get("evidence", "")
+ prefix = f"**{severity}**: " if severity else ""
+ suffix = "\n\n> " + evidence.replace("\n", "\n> ") if evidence else ""
+ c["comment"] = prefix + c.get("comment", "") + suffix
+
+ # Check CI status to give accurate signal to human reviewers
+ ci_passed, ci_summary = gh.get_ci_status(head_sha) if head_sha else (None, "")
+
+ if not inline_comments:
+ if ci_passed is True:
+ response = "No issues found. CI is passing.\n"
+ elif ci_passed is False:
+ response = f"No code issues found, but {ci_summary}."
+ else:
+ response = "No issues found.\n"
+ else:
+ response = ""
+
_write_artifact({
"action": "RESPOND",
- "labels": [], "response": "No issues found." if not inline_comments else "",
+ "labels": [], "response": response,
"inline_comments": inline_comments,
"title": title, "html_url": html_url, "number": number,
- "is_pr": True, "prompt_id": prompts.prompt_version(tmpl),
+ "is_pr": True, "is_incremental": bool(incremental_diff),
+ "prompt_id": prompts.prompt_version(tmpl),
"model_id": cfg.bedrock_model_id,
- "reason": "no_issues_found" if not inline_comments else "",
})
return
@@ -308,7 +385,11 @@ def act():
sanitized_comments.append({**ic, "comment": safe_comment})
inline_comments = sanitized_comments
if is_pr and inline_comments:
- gh.post_pr_review(number, response + footer, inline_comments)
+ gh.post_pr_review(number, response + footer, inline_comments, event="COMMENT")
+ elif is_pr and response and not inline_comments:
+ gh.post_pr_review(number, response + footer, [], event="COMMENT")
+ elif not response and not inline_comments:
+ logger.info(f"Skip #{number}: nothing to post after sanitization")
else:
gh.post_comment(number, response + footer)
gh.add_labels(number, labels)
@@ -354,7 +435,7 @@ def act():
elif action == "CLOSE" and not is_pr:
msg = (
- "This issue may not be related to the PyDeequ data quality library. "
+ "This issue may not be related to the PyDeequ library. "
"The maintainer team has been notified and will review." + footer
)
gh.post_comment(number, msg)
@@ -584,6 +665,16 @@ def _format_pr_feedback(issue_comments, review_comments):
return "\n".join(parts) if parts else "(no existing feedback)"
+def _extract_diff_files(diff_text):
+ """Extract the set of file paths touched in a unified diff."""
+ files = set()
+ for line in diff_text.split("\n"):
+ m = re.match(r'^diff --git a/.+ b/(.+)$', line)
+ if m:
+ files.add(m.group(1))
+ return files
+
+
def _read_requested_files(gh, file_paths, cfg):
snippets = []
for path in file_paths[:cfg.max_github_search_results]:
diff --git a/scripts/issue_bot/prompts.py b/scripts/issue_bot/prompts.py
index e2ff385..9fc97ea 100644
--- a/scripts/issue_bot/prompts.py
+++ b/scripts/issue_bot/prompts.py
@@ -1,21 +1,53 @@
import hashlib
+import logging
import os
+import boto3
+
+logger = logging.getLogger("issue_bot")
+
+_sm_client = None
+
+
+def _get_sm_client():
+ global _sm_client
+ if _sm_client is None:
+ _sm_client = boto3.client("secretsmanager")
+ return _sm_client
+
+
+def _read_from_sm(secret_id):
+ if not secret_id:
+ return ""
+ try:
+ resp = _get_sm_client().get_secret_value(SecretId=secret_id)
+ return resp["SecretString"]
+ except Exception as e:
+ logger.error("Failed to read prompt from Secrets Manager: %s", type(e).__name__)
+ return ""
+
+
+def _get_prompt(env_var, sm_env_var):
+ val = os.getenv(env_var, "")
+ if val:
+ return val
+ return _read_from_sm(os.getenv(sm_env_var, ""))
+
def get_issue_prompt():
- return os.getenv("ISSUE_CLASSIFY_PROMPT", "")
+ return _get_prompt("ISSUE_CLASSIFY_PROMPT", "SM_ISSUE_CLASSIFY_PROMPT")
def get_issue_respond_prompt():
- return os.getenv("ISSUE_RESPOND_PROMPT", "")
+ return _get_prompt("ISSUE_RESPOND_PROMPT", "SM_ISSUE_RESPOND_PROMPT")
def get_pr_file_review_prompt():
- return os.getenv("PR_FILE_REVIEW_PROMPT", "")
+ return _get_prompt("PR_FILE_REVIEW_PROMPT", "SM_PR_FILE_REVIEW_PROMPT")
def get_followup_prompt():
- return os.getenv("FOLLOWUP_PROMPT", "")
+ return _get_prompt("FOLLOWUP_PROMPT", "SM_FOLLOWUP_PROMPT")
def prompt_version(template):
diff --git a/scripts/issue_bot/schemas/pr_review_response.json b/scripts/issue_bot/schemas/pr_review_response.json
index ef22e07..1901a95 100644
--- a/scripts/issue_bot/schemas/pr_review_response.json
+++ b/scripts/issue_bot/schemas/pr_review_response.json
@@ -12,14 +12,23 @@
"line": {
"type": "integer"
},
+ "severity": {
+ "type": "string",
+ "enum": ["BUG", "EDGE_CASE", "MISSING_TEST", "DESIGN", "NIT"]
+ },
"comment": {
"type": "string"
+ },
+ "evidence": {
+ "type": "string"
}
},
"required": [
"file",
"line",
- "comment"
+ "severity",
+ "comment",
+ "evidence"
],
"additionalProperties": false
}
@@ -29,4 +38,4 @@
"comments"
],
"additionalProperties": false
-}
\ No newline at end of file
+}
diff --git a/tests/test_bot.py b/tests/test_bot.py
index 77992dc..90f0f66 100644
--- a/tests/test_bot.py
+++ b/tests/test_bot.py
@@ -17,6 +17,7 @@
_user_dissatisfied,
_clean_response,
_render,
+ _extract_diff_files,
)
from issue_bot.sanitizer import sanitize, _fix_accidental_issue_refs
@@ -203,6 +204,208 @@ def test_preserves_normal_text(self):
assert _clean_response(text) == text
+class TestGetCiStatus:
+ """Tests for GitHubClient.get_ci_status method."""
+
+ def _make_client(self):
+ import unittest.mock as mock
+ with mock.patch.dict(os.environ, {
+ "GITHUB_TOKEN": "fake", "GITHUB_REPOSITORY": "awslabs/test",
+ "ISSUE_NUMBER": "1", "EVENT_TYPE": "issues", "EVENT_ACTION": "opened",
+ "GITHUB_WORKFLOW": "PyDeequ Bot",
+ }):
+ from issue_bot.config import Config
+ from issue_bot.github_client import GitHubClient
+ cfg = Config()
+ client = GitHubClient(cfg)
+ return client
+
+ def test_all_checks_passed(self):
+ import unittest.mock as mock
+ client = self._make_client()
+ client._get = mock.MagicMock(side_effect=[
+ {"state": "success"}, # commit status
+ {"check_runs": [
+ {"name": "Java CI", "status": "completed", "conclusion": "success"},
+ {"name": "CodeQL", "status": "completed", "conclusion": "success"},
+ ]},
+ ])
+ passed, summary = client.get_ci_status("abc123")
+ assert passed is True
+ assert "passed" in summary.lower()
+
+ def test_check_run_failed(self):
+ import unittest.mock as mock
+ client = self._make_client()
+ client._get = mock.MagicMock(side_effect=[
+ {"state": "success"},
+ {"check_runs": [
+ {"name": "Java CI", "status": "completed", "conclusion": "failure"},
+ {"name": "CodeQL", "status": "completed", "conclusion": "success"},
+ ]},
+ ])
+ passed, summary = client.get_ci_status("abc123")
+ assert passed is False
+ assert "Java CI" in summary
+
+ def test_check_run_pending(self):
+ import unittest.mock as mock
+ client = self._make_client()
+ client._get = mock.MagicMock(side_effect=[
+ {"state": "pending"},
+ {"check_runs": [
+ {"name": "Java CI", "status": "in_progress", "conclusion": None},
+ ]},
+ ])
+ passed, summary = client.get_ci_status("abc123")
+ assert passed is None
+ assert "pending" in summary.lower()
+
+ def test_bot_check_filtered_out(self):
+ import unittest.mock as mock
+ client = self._make_client()
+ client._get = mock.MagicMock(side_effect=[
+ {"state": "success"},
+ {"check_runs": [
+ {"name": "Java CI", "status": "completed", "conclusion": "success"},
+ {"name": "PyDeequ Bot / analyze", "status": "completed", "conclusion": "success"},
+ {"name": "PyDeequ Bot / act", "status": "completed", "conclusion": "success"},
+ ]},
+ ])
+ passed, _ = client.get_ci_status("abc123")
+ assert passed is True
+
+ def test_non_bot_check_with_bot_in_name_not_filtered(self):
+ import unittest.mock as mock
+ client = self._make_client()
+ client._get = mock.MagicMock(side_effect=[
+ {"state": "success"},
+ {"check_runs": [
+ {"name": "robot-tests", "status": "completed", "conclusion": "failure"},
+ ]},
+ ])
+ passed, _ = client.get_ci_status("abc123")
+ assert passed is False
+
+ def test_skipped_and_neutral_count_as_passed(self):
+ import unittest.mock as mock
+ client = self._make_client()
+ client._get = mock.MagicMock(side_effect=[
+ {"state": "success"},
+ {"check_runs": [
+ {"name": "Optional Check", "status": "completed", "conclusion": "skipped"},
+ {"name": "Info Check", "status": "completed", "conclusion": "neutral"},
+ ]},
+ ])
+ passed, _ = client.get_ci_status("abc123")
+ assert passed is True
+
+ def test_api_failure_returns_unknown(self):
+ import unittest.mock as mock
+ client = self._make_client()
+ client._get = mock.MagicMock(return_value=None)
+ passed, summary = client.get_ci_status("abc123")
+ assert passed is None
+
+
+class TestAutoApproveSignal:
+ """Tests that bot posts the correct signal for the auto-approve workflow to act on."""
+
+ def _make_artifact(self, tmp_path, response, inline_comments=None):
+ artifact = {
+ "action": "RESPOND",
+ "labels": [],
+ "response": response,
+ "inline_comments": inline_comments or [],
+ "title": "Fix", "html_url": "https://github.com/x",
+ "number": 42, "is_pr": True, "is_incremental": False,
+ "prompt_id": "abc123", "model_id": "test",
+ }
+ path = str(tmp_path / "result.json")
+ with open(path, "w") as f:
+ json.dump(artifact, f)
+ return path
+
+ def test_no_issues_posts_pr_review_with_signal(self, tmp_path, monkeypatch):
+ """Bot posts 'No issues found' as a PR review — auto-approve.yml looks for this in listReviews."""
+ import unittest.mock as mock
+ path = self._make_artifact(tmp_path, response="No issues found. CI is passing.")
+ monkeypatch.setenv("GITHUB_TOKEN", "fake")
+ monkeypatch.setenv("GITHUB_REPOSITORY", "awslabs/test")
+ monkeypatch.setenv("ISSUE_NUMBER", "42")
+ monkeypatch.setenv("EVENT_TYPE", "pull_request_target")
+ monkeypatch.setenv("EVENT_ACTION", "opened")
+ import issue_bot.main as bot_main
+ monkeypatch.setattr(bot_main, "ARTIFACT_PATH", path)
+
+ with mock.patch("issue_bot.github_client.GitHubClient.post_pr_review") as mock_review, \
+ mock.patch("issue_bot.github_client.GitHubClient.post_comment") as mock_comment, \
+ mock.patch("issue_bot.github_client.GitHubClient.add_labels"), \
+ mock.patch("issue_bot.slack_client.SlackClient.send_escalation"):
+ mock_review.return_value = True
+ bot_main.act()
+ mock_review.assert_called_once()
+ mock_comment.assert_not_called()
+ body = mock_review.call_args[0][1]
+ assert "No issues found" in body
+
+ def test_with_issues_posts_review_not_comment(self, tmp_path, monkeypatch):
+ """Bot posts inline review when there are issues — no approve signal."""
+ import unittest.mock as mock
+ path = self._make_artifact(tmp_path, response="",
+ inline_comments=[{"file": "a.py", "line": 1, "comment": "BUG: issue"}])
+ monkeypatch.setenv("GITHUB_TOKEN", "fake")
+ monkeypatch.setenv("GITHUB_REPOSITORY", "awslabs/test")
+ monkeypatch.setenv("ISSUE_NUMBER", "42")
+ monkeypatch.setenv("EVENT_TYPE", "pull_request_target")
+ monkeypatch.setenv("EVENT_ACTION", "opened")
+ import issue_bot.main as bot_main
+ monkeypatch.setattr(bot_main, "ARTIFACT_PATH", path)
+
+ with mock.patch("issue_bot.github_client.GitHubClient.post_pr_review") as mock_review, \
+ mock.patch("issue_bot.github_client.GitHubClient.post_comment") as mock_comment, \
+ mock.patch("issue_bot.github_client.GitHubClient.add_labels"), \
+ mock.patch("issue_bot.slack_client.SlackClient.send_escalation"):
+ mock_review.return_value = True
+ bot_main.act()
+ mock_review.assert_called_once()
+ mock_comment.assert_not_called()
+
+
+class TestPrompts:
+ def test_env_var_takes_precedence(self, monkeypatch):
+ monkeypatch.setenv("PR_FILE_REVIEW_PROMPT", "from env")
+ monkeypatch.setenv("SM_PR_FILE_REVIEW_PROMPT", "deequ-bot/pr-file-review-prompt")
+ from issue_bot.prompts import get_pr_file_review_prompt
+ assert get_pr_file_review_prompt() == "from env"
+
+ def test_empty_env_var_falls_through_to_sm(self, monkeypatch):
+ import unittest.mock as mock
+ monkeypatch.setenv("PR_FILE_REVIEW_PROMPT", "")
+ monkeypatch.setenv("SM_PR_FILE_REVIEW_PROMPT", "deequ-bot/pr-file-review-prompt")
+ with mock.patch("issue_bot.prompts._read_from_sm", return_value="from sm") as m:
+ from issue_bot.prompts import get_pr_file_review_prompt
+ result = get_pr_file_review_prompt()
+ assert result == "from sm"
+ m.assert_called_once_with("deequ-bot/pr-file-review-prompt")
+
+ def test_no_sm_env_var_returns_empty(self, monkeypatch):
+ monkeypatch.setenv("PR_FILE_REVIEW_PROMPT", "")
+ monkeypatch.setenv("SM_PR_FILE_REVIEW_PROMPT", "")
+ from issue_bot.prompts import get_pr_file_review_prompt
+ # No env var, no SM secret name → empty string
+ assert get_pr_file_review_prompt() == ""
+
+ def test_sm_failure_returns_empty(self, monkeypatch):
+ import unittest.mock as mock
+ monkeypatch.setenv("FOLLOWUP_PROMPT", "")
+ monkeypatch.setenv("SM_FOLLOWUP_PROMPT", "deequ-bot/followup-prompt")
+ with mock.patch("issue_bot.prompts._get_sm_client") as mock_client:
+ mock_client.return_value.get_secret_value.side_effect = Exception("timeout")
+ from issue_bot.prompts import get_followup_prompt
+ assert get_followup_prompt() == ""
+
+
class TestSmoke:
def test_main_module_imports(self):
from issue_bot import main
@@ -320,3 +523,588 @@ def test_no_guardrail_no_config(self):
client.invoke("system", "user")
kwargs = client._client.converse.call_args[1]
assert "guardrailConfig" not in kwargs
+
+
+class TestExtractDiffFiles:
+ def test_single_file(self):
+ diff = (
+ "diff --git a/src/foo.py b/src/foo.py\n"
+ "index abc1234..def5678 100644\n"
+ "--- a/src/foo.py\n"
+ "+++ b/src/foo.py\n"
+ "@@ -1,3 +1,4 @@\n"
+ "+new line\n"
+ )
+ assert _extract_diff_files(diff) == {"src/foo.py"}
+
+ def test_multiple_files(self):
+ diff = (
+ "diff --git a/a.py b/a.py\n"
+ "--- a/a.py\n"
+ "+++ b/a.py\n"
+ "@@ -1 +1 @@\n"
+ "-old\n"
+ "+new\n"
+ "diff --git a/b.py b/b.py\n"
+ "--- a/b.py\n"
+ "+++ b/b.py\n"
+ "@@ -1 +1 @@\n"
+ "-old\n"
+ "+new\n"
+ )
+ assert _extract_diff_files(diff) == {"a.py", "b.py"}
+
+ def test_empty_diff(self):
+ assert _extract_diff_files("") == set()
+
+ def test_renamed_file(self):
+ diff = "diff --git a/old_name.py b/new_name.py\n"
+ assert _extract_diff_files(diff) == {"new_name.py"}
+
+ def test_path_with_spaces(self):
+ diff = "diff --git a/path with spaces/file.py b/path with spaces/file.py\n"
+ assert _extract_diff_files(diff) == {"path with spaces/file.py"}
+
+
+class TestIncrementalFiltering:
+ """Test that the incremental file filter drops comments on unrelated files."""
+
+ def test_comments_filtered_to_incremental_files(self):
+ incremental_files = {"src/changed.py"}
+ inline_comments = [
+ {"file": "src/changed.py", "line": 10, "comment": "new issue"},
+ {"file": "src/untouched.py", "line": 5, "comment": "old issue re-raised"},
+ ]
+ filtered = [c for c in inline_comments if c.get("file", "") in incremental_files]
+ assert len(filtered) == 1
+ assert filtered[0]["file"] == "src/changed.py"
+
+ def test_empty_incremental_files_passes_all(self):
+ incremental_files = set()
+ inline_comments = [
+ {"file": "src/any.py", "line": 1, "comment": "comment"},
+ ]
+ # When incremental_files is empty (fallback to full review), no filtering
+ if incremental_files:
+ filtered = [c for c in inline_comments if c.get("file", "") in incremental_files]
+ else:
+ filtered = inline_comments
+ assert len(filtered) == 1
+
+ def test_all_comments_filtered_yields_empty(self):
+ incremental_files = {"src/only_this.py"}
+ inline_comments = [
+ {"file": "src/other.py", "line": 1, "comment": "stale"},
+ {"file": "src/another.py", "line": 2, "comment": "stale too"},
+ ]
+ filtered = [c for c in inline_comments if c.get("file", "") in incremental_files]
+ assert filtered == []
+
+
+class TestNitFilterAndFormatting:
+ """Tests for hard NIT filter on re-reviews and evidence formatting."""
+
+ def test_nits_dropped_on_re_review(self, tmp_path, monkeypatch):
+ import unittest.mock as mock
+ monkeypatch.setenv("GITHUB_TOKEN", "fake")
+ monkeypatch.setenv("GITHUB_REPOSITORY", "awslabs/test")
+ monkeypatch.setenv("ISSUE_NUMBER", "10")
+ monkeypatch.setenv("EVENT_TYPE", "pull_request_target")
+ monkeypatch.setenv("EVENT_ACTION", "synchronize")
+ monkeypatch.setenv("EVENT_BEFORE", "aaa")
+ monkeypatch.setenv("EVENT_AFTER", "bbb")
+ monkeypatch.setenv("KB_S3_BUCKET", "")
+ monkeypatch.setenv("KB_S3_KEY", "")
+ monkeypatch.setenv("PR_FILE_REVIEW_PROMPT", "Review. Date: {current_date}")
+ import issue_bot.main as bot_main
+ monkeypatch.setattr(bot_main, "ARTIFACT_PATH", str(tmp_path / "result.json"))
+
+ incremental = "diff --git a/f.py b/f.py\n--- a/f.py\n+++ b/f.py\n@@ -1 +1 @@\n-x\n+y\n"
+
+ with mock.patch("issue_bot.github_client.GitHubClient.get_pr") as mock_pr, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_comments") as mock_comments, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_pr_diff"), \
+ mock.patch("issue_bot.github_client.GitHubClient.get_pr_review_comments") as mock_rc, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_compare_diff") as mock_compare, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_pr_files") as mock_files, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_file_content") as mock_content, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_codebase_map"), \
+ mock.patch("issue_bot.github_client.GitHubClient.get_ci_status") as mock_ci, \
+ mock.patch("issue_bot.knowledge_base.KnowledgeBase.load"), \
+ mock.patch("issue_bot.knowledge_base.KnowledgeBase.build_context", return_value=""), \
+ mock.patch("issue_bot.bedrock_client.BedrockClient.__init__", return_value=None), \
+ mock.patch("issue_bot.bedrock_client.BedrockClient.invoke") as mock_bedrock:
+
+ mock_pr.return_value = {
+ "user": {"login": "dev"}, "title": "Fix", "body": "",
+ "state": "open", "html_url": "https://github.com/x",
+ "head": {"sha": "bbb"},
+ }
+ mock_comments.return_value = [{"user": {"login": "github-actions[bot]"}, "body": "prior"}]
+ mock_rc.return_value = []
+ mock_compare.return_value = incremental
+ mock_files.return_value = [{"filename": "f.py"}]
+ mock_content.return_value = "content"
+ mock_ci.return_value = (True, "CI passed")
+ mock_bedrock.return_value = json.dumps({"comments": [
+ {"file": "f.py", "line": 1, "severity": "BUG", "comment": "real bug",
+ "evidence": "line 1 divides by zero"},
+ {"file": "f.py", "line": 1, "severity": "NIT", "comment": "rename var",
+ "evidence": "x is not descriptive"},
+ ]})
+
+ bot_main.analyze()
+
+ with open(str(tmp_path / "result.json")) as f:
+ result = json.load(f)
+
+ # NIT should be filtered, BUG should remain
+ assert result["action"] == "RESPOND"
+ assert len(result["inline_comments"]) == 1
+ assert "real bug" in result["inline_comments"][0]["comment"]
+
+ def test_nits_kept_on_first_review(self, tmp_path, monkeypatch):
+ import unittest.mock as mock
+ monkeypatch.setenv("GITHUB_TOKEN", "fake")
+ monkeypatch.setenv("GITHUB_REPOSITORY", "awslabs/test")
+ monkeypatch.setenv("ISSUE_NUMBER", "10")
+ monkeypatch.setenv("EVENT_TYPE", "pull_request_target")
+ monkeypatch.setenv("EVENT_ACTION", "opened")
+ monkeypatch.setenv("EVENT_BEFORE", "")
+ monkeypatch.setenv("EVENT_AFTER", "")
+ monkeypatch.setenv("KB_S3_BUCKET", "")
+ monkeypatch.setenv("KB_S3_KEY", "")
+ monkeypatch.setenv("PR_FILE_REVIEW_PROMPT", "Review. Date: {current_date}")
+ import issue_bot.main as bot_main
+ monkeypatch.setattr(bot_main, "ARTIFACT_PATH", str(tmp_path / "result.json"))
+
+ with mock.patch("issue_bot.github_client.GitHubClient.get_pr") as mock_pr, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_comments") as mock_comments, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_pr_diff"), \
+ mock.patch("issue_bot.github_client.GitHubClient.get_pr_review_comments") as mock_rc, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_pr_files") as mock_files, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_file_content") as mock_content, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_codebase_map"), \
+ mock.patch("issue_bot.github_client.GitHubClient.get_ci_status") as mock_ci, \
+ mock.patch("issue_bot.knowledge_base.KnowledgeBase.load"), \
+ mock.patch("issue_bot.knowledge_base.KnowledgeBase.build_context", return_value=""), \
+ mock.patch("issue_bot.bedrock_client.BedrockClient.__init__", return_value=None), \
+ mock.patch("issue_bot.bedrock_client.BedrockClient.invoke") as mock_bedrock:
+
+ mock_pr.return_value = {
+ "user": {"login": "dev"}, "title": "Fix", "body": "",
+ "state": "open", "html_url": "https://github.com/x",
+ "head": {"sha": "abc123"},
+ }
+ mock_comments.return_value = []
+ mock_rc.return_value = []
+ mock_files.return_value = [{"filename": "f.py"}]
+ mock_content.return_value = "content"
+ mock_ci.return_value = (True, "CI passed")
+ mock_bedrock.return_value = json.dumps({"comments": [
+ {"file": "f.py", "line": 1, "severity": "BUG", "comment": "bug",
+ "evidence": "evidence1"},
+ {"file": "f.py", "line": 2, "severity": "NIT", "comment": "nit",
+ "evidence": "evidence2"},
+ ]})
+
+ bot_main.analyze()
+
+ with open(str(tmp_path / "result.json")) as f:
+ result = json.load(f)
+
+ # Both BUG and NIT should be present on first review
+ assert len(result["inline_comments"]) == 2
+
+ def test_evidence_formatted_in_comment(self, tmp_path, monkeypatch):
+ import unittest.mock as mock
+ monkeypatch.setenv("GITHUB_TOKEN", "fake")
+ monkeypatch.setenv("GITHUB_REPOSITORY", "awslabs/test")
+ monkeypatch.setenv("ISSUE_NUMBER", "10")
+ monkeypatch.setenv("EVENT_TYPE", "pull_request_target")
+ monkeypatch.setenv("EVENT_ACTION", "opened")
+ monkeypatch.setenv("EVENT_BEFORE", "")
+ monkeypatch.setenv("EVENT_AFTER", "")
+ monkeypatch.setenv("KB_S3_BUCKET", "")
+ monkeypatch.setenv("KB_S3_KEY", "")
+ monkeypatch.setenv("PR_FILE_REVIEW_PROMPT", "Review. Date: {current_date}")
+ import issue_bot.main as bot_main
+ monkeypatch.setattr(bot_main, "ARTIFACT_PATH", str(tmp_path / "result.json"))
+
+ with mock.patch("issue_bot.github_client.GitHubClient.get_pr") as mock_pr, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_comments") as mock_comments, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_pr_diff"), \
+ mock.patch("issue_bot.github_client.GitHubClient.get_pr_review_comments") as mock_rc, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_pr_files") as mock_files, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_file_content") as mock_content, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_codebase_map"), \
+ mock.patch("issue_bot.github_client.GitHubClient.get_ci_status") as mock_ci, \
+ mock.patch("issue_bot.knowledge_base.KnowledgeBase.load"), \
+ mock.patch("issue_bot.knowledge_base.KnowledgeBase.build_context", return_value=""), \
+ mock.patch("issue_bot.bedrock_client.BedrockClient.__init__", return_value=None), \
+ mock.patch("issue_bot.bedrock_client.BedrockClient.invoke") as mock_bedrock:
+
+ mock_pr.return_value = {
+ "user": {"login": "dev"}, "title": "Fix", "body": "",
+ "state": "open", "html_url": "https://github.com/x",
+ "head": {"sha": "abc123"},
+ }
+ mock_comments.return_value = []
+ mock_rc.return_value = []
+ mock_files.return_value = [{"filename": "f.py"}]
+ mock_content.return_value = "content"
+ mock_ci.return_value = (True, "CI passed")
+ mock_bedrock.return_value = json.dumps({"comments": [
+ {"file": "f.py", "line": 5, "severity": "BUG",
+ "comment": "division by zero",
+ "evidence": "line 3 sets count=0, line 5 divides by count"},
+ ]})
+
+ bot_main.analyze()
+
+ with open(str(tmp_path / "result.json")) as f:
+ result = json.load(f)
+
+ comment_text = result["inline_comments"][0]["comment"]
+ assert comment_text.startswith("**BUG**: ")
+ assert "division by zero" in comment_text
+ assert "line 3 sets count=0" in comment_text
+
+
+class TestIncrementalReviewIntegration:
+ """End-to-end tests for the incremental review path through analyze()."""
+
+ def _setup_env(self, tmp_path, monkeypatch, event_before="abc123", event_after="def456"):
+ monkeypatch.setenv("GITHUB_TOKEN", "fake")
+ monkeypatch.setenv("GITHUB_REPOSITORY", "awslabs/test")
+ monkeypatch.setenv("ISSUE_NUMBER", "99")
+ monkeypatch.setenv("EVENT_TYPE", "pull_request_target")
+ monkeypatch.setenv("EVENT_ACTION", "synchronize")
+ monkeypatch.setenv("EVENT_BEFORE", event_before)
+ monkeypatch.setenv("EVENT_AFTER", event_after)
+ monkeypatch.setenv("GITHUB_ACTOR", "contributor")
+ monkeypatch.setenv("KB_S3_BUCKET", "")
+ monkeypatch.setenv("KB_S3_KEY", "")
+ monkeypatch.setenv("PR_FILE_REVIEW_PROMPT", "Review this PR. Date: {current_date}")
+ import issue_bot.main as bot_main
+ monkeypatch.setattr(bot_main, "ARTIFACT_PATH", str(tmp_path / "result.json"))
+
+ def test_incremental_review_filters_stale_comments(self, tmp_path, monkeypatch):
+ import unittest.mock as mock
+ self._setup_env(tmp_path, monkeypatch)
+
+ incremental_diff = (
+ "diff --git a/src/fixed.py b/src/fixed.py\n"
+ "--- a/src/fixed.py\n+++ b/src/fixed.py\n"
+ "@@ -1 +1 @@\n-old\n+new\n"
+ )
+
+ with mock.patch("issue_bot.github_client.GitHubClient.get_pr") as mock_pr, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_comments") as mock_comments, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_pr_diff") as mock_diff, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_pr_review_comments") as mock_rc, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_compare_diff") as mock_compare, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_pr_files") as mock_files, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_file_content") as mock_content, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_codebase_map") as mock_map, \
+ mock.patch("issue_bot.knowledge_base.KnowledgeBase.load"), \
+ mock.patch("issue_bot.knowledge_base.KnowledgeBase.build_context") as mock_kb, \
+ mock.patch("issue_bot.bedrock_client.BedrockClient.__init__", return_value=None), \
+ mock.patch("issue_bot.bedrock_client.BedrockClient.invoke") as mock_bedrock:
+
+ mock_pr.return_value = {"user": {"login": "contributor"}, "title": "Fix bug",
+ "body": "Fixes the thing", "state": "open", "html_url": "https://github.com/x"}
+ mock_comments.return_value = [{"user": {"login": "github-actions[bot]"}, "body": "prior review"}]
+ mock_diff.return_value = "full diff here"
+ mock_rc.return_value = []
+ mock_compare.return_value = incremental_diff
+ mock_files.return_value = [{"filename": "src/fixed.py"}, {"filename": "src/untouched.py"}]
+ mock_content.return_value = "file content"
+ mock_map.return_value = ""
+ mock_kb.return_value = ""
+ mock_bedrock.return_value = json.dumps({
+ "comments": [
+ {"file": "src/fixed.py", "line": 1, "comment": "new issue in changed file"},
+ {"file": "src/untouched.py", "line": 5, "comment": "stale comment on unchanged file"},
+ ]
+ })
+
+ from issue_bot.main import analyze
+ analyze()
+
+ with open(str(tmp_path / "result.json")) as f:
+ result = json.load(f)
+
+ assert result["action"] == "RESPOND"
+ assert result["is_incremental"] is True
+ assert len(result["inline_comments"]) == 1
+ assert result["inline_comments"][0]["file"] == "src/fixed.py"
+
+ def test_force_push_falls_back_to_full_review(self, tmp_path, monkeypatch):
+ import unittest.mock as mock
+ self._setup_env(tmp_path, monkeypatch)
+
+ with mock.patch("issue_bot.github_client.GitHubClient.get_pr") as mock_pr, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_comments") as mock_comments, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_pr_diff") as mock_diff, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_pr_review_comments") as mock_rc, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_compare_diff") as mock_compare, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_pr_files") as mock_files, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_file_content") as mock_content, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_codebase_map") as mock_map, \
+ mock.patch("issue_bot.knowledge_base.KnowledgeBase.load"), \
+ mock.patch("issue_bot.knowledge_base.KnowledgeBase.build_context") as mock_kb, \
+ mock.patch("issue_bot.bedrock_client.BedrockClient.__init__", return_value=None), \
+ mock.patch("issue_bot.bedrock_client.BedrockClient.invoke") as mock_bedrock:
+
+ mock_pr.return_value = {"user": {"login": "contributor"}, "title": "Fix bug",
+ "body": "Fixes the thing", "state": "open", "html_url": "https://github.com/x"}
+ mock_comments.return_value = [{"user": {"login": "github-actions[bot]"}, "body": "prior review"}]
+ mock_diff.return_value = "full diff here"
+ mock_rc.return_value = []
+ mock_compare.return_value = "" # Force push — compare fails
+ mock_files.return_value = [{"filename": "src/a.py"}]
+ mock_content.return_value = "content"
+ mock_map.return_value = ""
+ mock_kb.return_value = ""
+ mock_bedrock.return_value = json.dumps({
+ "comments": [
+ {"file": "src/a.py", "line": 1, "comment": "issue found"},
+ ]
+ })
+
+ from issue_bot.main import analyze
+ analyze()
+
+ with open(str(tmp_path / "result.json")) as f:
+ result = json.load(f)
+
+ # Falls back to full review — no filtering, not marked incremental
+ assert result["action"] == "RESPOND"
+ assert result["is_incremental"] is False
+ assert len(result["inline_comments"]) == 1
+
+ def test_no_before_sha_skips_incremental(self, tmp_path, monkeypatch):
+ import unittest.mock as mock
+ self._setup_env(tmp_path, monkeypatch, event_before="", event_after="def456")
+
+ with mock.patch("issue_bot.github_client.GitHubClient.get_pr") as mock_pr, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_comments") as mock_comments, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_pr_diff") as mock_diff, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_pr_review_comments") as mock_rc, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_compare_diff") as mock_compare, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_pr_files") as mock_files, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_file_content") as mock_content, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_codebase_map") as mock_map, \
+ mock.patch("issue_bot.knowledge_base.KnowledgeBase.load"), \
+ mock.patch("issue_bot.knowledge_base.KnowledgeBase.build_context") as mock_kb, \
+ mock.patch("issue_bot.bedrock_client.BedrockClient.__init__", return_value=None), \
+ mock.patch("issue_bot.bedrock_client.BedrockClient.invoke") as mock_bedrock:
+
+ mock_pr.return_value = {"user": {"login": "contributor"}, "title": "Fix",
+ "body": "Fix", "state": "open", "html_url": "https://github.com/x"}
+ mock_comments.return_value = [{"user": {"login": "github-actions[bot]"}, "body": "review"}]
+ mock_diff.return_value = "full diff"
+ mock_rc.return_value = []
+ mock_compare.return_value = "should not be called"
+ mock_files.return_value = [{"filename": "src/a.py"}]
+ mock_content.return_value = "content"
+ mock_map.return_value = ""
+ mock_kb.return_value = ""
+ mock_bedrock.return_value = json.dumps({"comments": [
+ {"file": "src/a.py", "line": 1, "comment": "issue"},
+ ]})
+
+ from issue_bot.main import analyze
+ analyze()
+
+ # Should NOT have called compare because event_before is empty
+ mock_compare.assert_not_called()
+
+ with open(str(tmp_path / "result.json")) as f:
+ result = json.load(f)
+ assert result["is_incremental"] is False
+
+
+class TestFileContentUsesHeadSha:
+ """Verify get_file_content is called with PR head SHA, not default branch."""
+
+ def _setup_env(self, tmp_path, monkeypatch):
+ monkeypatch.setenv("GITHUB_TOKEN", "fake")
+ monkeypatch.setenv("GITHUB_REPOSITORY", "awslabs/test")
+ monkeypatch.setenv("ISSUE_NUMBER", "42")
+ monkeypatch.setenv("EVENT_TYPE", "pull_request_target")
+ monkeypatch.setenv("EVENT_ACTION", "opened")
+ monkeypatch.setenv("EVENT_BEFORE", "")
+ monkeypatch.setenv("EVENT_AFTER", "")
+ monkeypatch.setenv("GITHUB_ACTOR", "contributor")
+ monkeypatch.setenv("KB_S3_BUCKET", "")
+ monkeypatch.setenv("KB_S3_KEY", "")
+ monkeypatch.setenv("PR_FILE_REVIEW_PROMPT", "Review this PR. Date: {current_date}")
+ import issue_bot.main as bot_main
+ monkeypatch.setattr(bot_main, "ARTIFACT_PATH", str(tmp_path / "result.json"))
+
+ def test_file_content_fetched_with_head_sha(self, tmp_path, monkeypatch):
+ import unittest.mock as mock
+ self._setup_env(tmp_path, monkeypatch)
+
+ with mock.patch("issue_bot.github_client.GitHubClient.get_pr") as mock_pr, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_comments") as mock_comments, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_pr_diff") as mock_diff, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_pr_review_comments") as mock_rc, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_pr_files") as mock_files, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_file_content") as mock_content, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_codebase_map") as mock_map, \
+ mock.patch("issue_bot.knowledge_base.KnowledgeBase.load"), \
+ mock.patch("issue_bot.knowledge_base.KnowledgeBase.build_context") as mock_kb, \
+ mock.patch("issue_bot.bedrock_client.BedrockClient.__init__", return_value=None), \
+ mock.patch("issue_bot.bedrock_client.BedrockClient.invoke") as mock_bedrock:
+
+ mock_pr.return_value = {
+ "user": {"login": "contributor"}, "title": "Add feature",
+ "body": "New file", "state": "open",
+ "html_url": "https://github.com/x",
+ "head": {"sha": "abc123deadbeef"},
+ }
+ mock_comments.return_value = []
+ mock_diff.return_value = "diff content"
+ mock_rc.return_value = []
+ mock_files.return_value = [
+ {"filename": "src/new_file.py"},
+ {"filename": "src/existing.py"},
+ ]
+ mock_content.return_value = "file content"
+ mock_map.return_value = ""
+ mock_kb.return_value = ""
+ mock_bedrock.return_value = json.dumps({"comments": []})
+
+ from issue_bot.main import analyze
+ analyze()
+
+ # Every get_file_content call must include ref=head_sha
+ for call in mock_content.call_args_list:
+ args, kwargs = call
+ assert kwargs.get("ref") == "abc123deadbeef" or \
+ (len(args) > 1 and args[1] == "abc123deadbeef"), \
+ f"get_file_content called without head SHA: {call}"
+
+ def test_missing_head_sha_falls_back_gracefully(self, tmp_path, monkeypatch):
+ import unittest.mock as mock
+ self._setup_env(tmp_path, monkeypatch)
+
+ with mock.patch("issue_bot.github_client.GitHubClient.get_pr") as mock_pr, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_comments") as mock_comments, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_pr_diff") as mock_diff, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_pr_review_comments") as mock_rc, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_pr_files") as mock_files, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_file_content") as mock_content, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_codebase_map") as mock_map, \
+ mock.patch("issue_bot.knowledge_base.KnowledgeBase.load"), \
+ mock.patch("issue_bot.knowledge_base.KnowledgeBase.build_context") as mock_kb, \
+ mock.patch("issue_bot.bedrock_client.BedrockClient.__init__", return_value=None), \
+ mock.patch("issue_bot.bedrock_client.BedrockClient.invoke") as mock_bedrock:
+
+ # PR object without head.sha (shouldn't happen, but defensive)
+ mock_pr.return_value = {
+ "user": {"login": "contributor"}, "title": "Fix",
+ "body": "Fix", "state": "open",
+ "html_url": "https://github.com/x",
+ }
+ mock_comments.return_value = []
+ mock_diff.return_value = "diff"
+ mock_rc.return_value = []
+ mock_files.return_value = [{"filename": "src/a.py"}]
+ mock_content.return_value = "content"
+ mock_map.return_value = ""
+ mock_kb.return_value = ""
+ mock_bedrock.return_value = json.dumps({"comments": []})
+
+ from issue_bot.main import analyze
+ analyze()
+
+ # Should still work — falls back to no ref (default branch)
+ with open(str(tmp_path / "result.json")) as f:
+ result = json.load(f)
+ # First review with 0 comments → RESPOND with CI-aware message
+ assert result["action"] == "RESPOND"
+ assert "No issues found" in result["response"]
+
+
+class TestReviewEventType:
+ """Verify bot always uses COMMENT event type, never REQUEST_CHANGES."""
+
+ def _setup_env(self, tmp_path, monkeypatch, event_action="opened"):
+ monkeypatch.setenv("GITHUB_TOKEN", "fake")
+ monkeypatch.setenv("GITHUB_REPOSITORY", "awslabs/test")
+ monkeypatch.setenv("ISSUE_NUMBER", "50")
+ monkeypatch.setenv("EVENT_TYPE", "pull_request_target")
+ monkeypatch.setenv("EVENT_ACTION", event_action)
+ monkeypatch.setenv("EVENT_BEFORE", "aaa111" if event_action == "synchronize" else "")
+ monkeypatch.setenv("EVENT_AFTER", "bbb222" if event_action == "synchronize" else "")
+ monkeypatch.setenv("GITHUB_ACTOR", "contributor")
+ monkeypatch.setenv("KB_S3_BUCKET", "")
+ monkeypatch.setenv("KB_S3_KEY", "")
+ monkeypatch.setenv("PR_FILE_REVIEW_PROMPT", "Review. Date: {current_date}")
+ import issue_bot.main as bot_main
+ monkeypatch.setattr(bot_main, "ARTIFACT_PATH", str(tmp_path / "result.json"))
+
+ def _run_and_get_artifact(self, tmp_path, monkeypatch, mock, event_action="opened"):
+ self._setup_env(tmp_path, monkeypatch, event_action=event_action)
+
+ with mock.patch("issue_bot.github_client.GitHubClient.get_pr") as mock_pr, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_comments") as mock_comments, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_pr_diff") as mock_diff, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_pr_review_comments") as mock_rc, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_compare_diff") as mock_compare, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_pr_files") as mock_files, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_file_content") as mock_content, \
+ mock.patch("issue_bot.github_client.GitHubClient.get_codebase_map") as mock_map, \
+ mock.patch("issue_bot.knowledge_base.KnowledgeBase.load"), \
+ mock.patch("issue_bot.knowledge_base.KnowledgeBase.build_context") as mock_kb, \
+ mock.patch("issue_bot.bedrock_client.BedrockClient.__init__", return_value=None), \
+ mock.patch("issue_bot.bedrock_client.BedrockClient.invoke") as mock_bedrock, \
+ mock.patch("issue_bot.github_client.GitHubClient.post_pr_review") as mock_post:
+
+ mock_pr.return_value = {
+ "user": {"login": "contributor"}, "title": "Fix",
+ "body": "Fix", "state": "open",
+ "html_url": "https://github.com/x",
+ "head": {"sha": "abc123"},
+ }
+ mock_comments.return_value = (
+ [{"user": {"login": "github-actions[bot]"}, "body": "prior"}]
+ if event_action == "synchronize" else []
+ )
+ mock_diff.return_value = "diff"
+ mock_rc.return_value = []
+ mock_compare.return_value = (
+ "diff --git a/f.py b/f.py\n--- a/f.py\n+++ b/f.py\n@@ -1 +1 @@\n-x\n+y\n"
+ if event_action == "synchronize" else ""
+ )
+ mock_files.return_value = [{"filename": "f.py"}]
+ mock_content.return_value = "content"
+ mock_map.return_value = ""
+ mock_kb.return_value = ""
+ mock_bedrock.return_value = json.dumps({
+ "comments": [{"file": "f.py", "line": 1, "comment": "issue"}]
+ })
+ mock_post.return_value = True
+
+ from issue_bot.main import analyze, act
+ analyze()
+
+ with open(str(tmp_path / "result.json")) as f:
+ return json.load(f), mock_post
+
+ def test_first_review_uses_comment_event(self, tmp_path, monkeypatch):
+ import unittest.mock as mock
+ result, _ = self._run_and_get_artifact(tmp_path, monkeypatch, mock, "opened")
+ assert result["action"] == "RESPOND"
+ assert result.get("is_incremental") is False
+ assert len(result["inline_comments"]) > 0
+
+ def test_incremental_review_uses_comment_event(self, tmp_path, monkeypatch):
+ import unittest.mock as mock
+ result, _ = self._run_and_get_artifact(tmp_path, monkeypatch, mock, "synchronize")
+ assert result["action"] == "RESPOND"
+ assert result.get("is_incremental") is True
diff --git a/tests/test_verification.py b/tests/test_verification.py
new file mode 100644
index 0000000..682cbd9
--- /dev/null
+++ b/tests/test_verification.py
@@ -0,0 +1,164 @@
+# -*- coding: utf-8 -*-
+import unittest
+
+import pandas as pd
+from pyspark.sql import Row
+from pyspark.sql.types import BooleanType
+
+from pydeequ.checks import Check, CheckLevel
+from pydeequ.verification import VerificationResult, VerificationSuite
+from tests.conftest import setup_pyspark
+
+
+class TestRowLevelResults(unittest.TestCase):
+ @classmethod
+ def setUpClass(cls):
+ cls.spark = setup_pyspark().appName("test-row-level-results-local").getOrCreate()
+ cls.sc = cls.spark.sparkContext
+ cls.df = cls.sc.parallelize(
+ [
+ Row(a="foo", b=1, c=5),
+ Row(a="bar", b=2, c=6),
+ Row(a="baz", b=3, c=None),
+ ]
+ ).toDF()
+
+ @classmethod
+ def tearDownClass(cls):
+ # Must shutdown callback for tests to stop
+ # TODO Document this call to users or encapsulate in PyDeequSession
+ cls.spark.sparkContext._gateway.shutdown_callback_server()
+ cls.spark.stop()
+
+ def test_row_level_results_with_completeness(self):
+ """Test that isComplete produces a Boolean column with correct per-row values."""
+ check = Check(self.spark, CheckLevel.Error, "completeness_check")
+ check = check.isComplete("c")
+
+ result = VerificationSuite(self.spark).onData(self.df).addCheck(check).run()
+ row_level_df = VerificationResult.rowLevelResultsAsDataFrame(self.spark, result, self.df)
+
+ # Should have same row count as original DataFrame
+ self.assertEqual(row_level_df.count(), self.df.count())
+
+ # Should have original columns (a, b, c) plus one Boolean column for the check
+ self.assertIn("completeness_check", row_level_df.columns)
+ self.assertTrue(isinstance(row_level_df.schema["completeness_check"].dataType, BooleanType))
+
+ # Order by b to ensure deterministic row ordering
+ # b=1: c=5 (complete), b=2: c=6 (complete), b=3: c=None (incomplete)
+ results = row_level_df.orderBy("b").select("completeness_check").collect()
+ values = [row["completeness_check"] for row in results]
+ self.assertEqual(values, [True, True, False])
+
+ def test_row_level_results_with_contained_in(self):
+ """Test that isContainedIn produces correct row-level results."""
+ check = Check(self.spark, CheckLevel.Error, "contained_check")
+ check = check.isContainedIn("a", ["foo", "bar"])
+
+ result = VerificationSuite(self.spark).onData(self.df).addCheck(check).run()
+ row_level_df = VerificationResult.rowLevelResultsAsDataFrame(self.spark, result, self.df)
+
+ self.assertIn("contained_check", row_level_df.columns)
+
+ # Order by a to ensure deterministic row ordering
+ # a="bar" (contained), a="baz" (not contained), a="foo" (contained)
+ results = row_level_df.orderBy("a").select("contained_check").collect()
+ values = [row["contained_check"] for row in results]
+ self.assertEqual(values, [True, False, True])
+
+ def test_row_level_results_multiple_constraints_anded(self):
+ """Test that multiple constraints in one Check are ANDed into a single column."""
+ check = Check(self.spark, CheckLevel.Error, "multi_check")
+ check = check.isContainedIn("a", ["foo", "baz"]).isComplete("c")
+
+ result = VerificationSuite(self.spark).onData(self.df).addCheck(check).run()
+ row_level_df = VerificationResult.rowLevelResultsAsDataFrame(self.spark, result, self.df)
+
+ self.assertIn("multi_check", row_level_df.columns)
+
+ # Order by b to ensure deterministic row ordering
+ # b=1: a=foo (contained), c=5 (complete) -> True AND True = True
+ # b=2: a=bar (NOT contained), c=6 (complete) -> False AND True = False
+ # b=3: a=baz (contained), c=None (NOT complete) -> True AND False = False
+ results = row_level_df.orderBy("b").select("multi_check").collect()
+ values = [row["multi_check"] for row in results]
+ self.assertEqual(values, [True, False, False])
+
+ def test_row_level_results_aggregate_only_check(self):
+ """Test that aggregate-only checks (hasSize) don't add columns."""
+ check = Check(self.spark, CheckLevel.Warning, "size_check")
+ check = check.hasSize(lambda x: x >= 3)
+
+ result = VerificationSuite(self.spark).onData(self.df).addCheck(check).run()
+ row_level_df = VerificationResult.rowLevelResultsAsDataFrame(self.spark, result, self.df)
+
+ # hasSize is aggregate-only, so no new column should be added
+ self.assertEqual(sorted(row_level_df.columns), sorted(self.df.columns))
+
+ def test_row_level_results_preserves_original_columns(self):
+ """Test that the original DataFrame columns are preserved."""
+ check = Check(self.spark, CheckLevel.Error, "preserve_check")
+ check = check.isComplete("c")
+
+ result = VerificationSuite(self.spark).onData(self.df).addCheck(check).run()
+ row_level_df = VerificationResult.rowLevelResultsAsDataFrame(self.spark, result, self.df)
+
+ for col in self.df.columns:
+ self.assertIn(col, row_level_df.columns)
+
+ # Verify original data is unchanged (ordered for deterministic comparison)
+ original_values = self.df.orderBy("b").select("a", "b").collect()
+ result_values = row_level_df.orderBy("b").select("a", "b").collect()
+ self.assertEqual(original_values, result_values)
+
+ def test_row_level_results_multiple_checks(self):
+ """Test that multiple separate Check objects produce multiple Boolean columns."""
+ check1 = Check(self.spark, CheckLevel.Error, "completeness_check")
+ check1 = check1.isComplete("c")
+
+ check2 = Check(self.spark, CheckLevel.Error, "value_check")
+ check2 = check2.isContainedIn("a", ["foo", "bar"])
+
+ result = (
+ VerificationSuite(self.spark)
+ .onData(self.df)
+ .addCheck(check1)
+ .addCheck(check2)
+ .run()
+ )
+ row_level_df = VerificationResult.rowLevelResultsAsDataFrame(self.spark, result, self.df)
+
+ # Each Check should produce its own Boolean column
+ self.assertIn("completeness_check", row_level_df.columns)
+ self.assertIn("value_check", row_level_df.columns)
+ self.assertEqual(row_level_df.count(), 3)
+
+ # Verify values: c is null for row 3, and "baz" is not in ["foo", "bar"]
+ results = row_level_df.orderBy("b").select("completeness_check", "value_check").collect()
+ # Row 1 (a=foo, c=x): complete=True, contained=True
+ self.assertTrue(results[0]["completeness_check"])
+ self.assertTrue(results[0]["value_check"])
+ # Row 2 (a=bar, c=y): complete=True, contained=True
+ self.assertTrue(results[1]["completeness_check"])
+ self.assertTrue(results[1]["value_check"])
+ # Row 3 (a=baz, c=None): complete=False, contained=False
+ self.assertFalse(results[2]["completeness_check"])
+ self.assertFalse(results[2]["value_check"])
+
+ def test_row_level_results_as_pandas(self):
+ """Test the pandas=True option returns a Pandas DataFrame."""
+ check = Check(self.spark, CheckLevel.Error, "pandas_check")
+ check = check.isComplete("c")
+
+ result = VerificationSuite(self.spark).onData(self.df).addCheck(check).run()
+ row_level_df = VerificationResult.rowLevelResultsAsDataFrame(
+ self.spark, result, self.df, pandas=True
+ )
+
+ self.assertIsInstance(row_level_df, pd.DataFrame)
+ self.assertIn("pandas_check", row_level_df.columns)
+
+
+if __name__ == "__main__":
+ unittest.main()