diff --git a/.github/workflows/auto-approve.yml b/.github/workflows/auto-approve.yml new file mode 100644 index 0000000..f24c211 --- /dev/null +++ b/.github/workflows/auto-approve.yml @@ -0,0 +1,101 @@ +name: Auto-Approve Clean PRs + +on: + workflow_run: + workflows: [".github/workflows/base.yml", "PyDeequ Bot"] + types: [completed] + +permissions: + pull-requests: write + actions: read + +jobs: + approve: + runs-on: ubuntu-latest + if: github.event.workflow_run.event == 'pull_request' || github.event.workflow_run.event == 'pull_request_target' + timeout-minutes: 2 + + steps: + - name: Find PR and check both conditions + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 + with: + script: | + const sha = context.payload.workflow_run.head_sha; + const owner = context.repo.owner; + const repo = context.repo.repo; + + // Find the PR for this SHA + let prNumber = null; + const prs = context.payload.workflow_run.pull_requests; + if (prs && prs.length > 0) { + prNumber = prs[0].number; + } else { + const {data: searchResult} = await github.rest.pulls.list({ + owner, repo, state: 'open', sort: 'updated', direction: 'desc', per_page: 30 + }); + const match = searchResult.find(pr => pr.head.sha === sha); + if (match) { + prNumber = match.number; + } + } + + if (!prNumber) { + core.info(`No open PR found for SHA ${sha}, skipping`); + return; + } + + core.info(`Found PR #${prNumber} for SHA ${sha}`); + + // Verify the PR head SHA still matches (no new push since trigger) + const {data: pr} = await github.rest.pulls.get({ + owner, repo, pull_number: prNumber + }); + if (pr.head.sha !== sha) { + core.info(`PR head ${pr.head.sha} differs from trigger SHA ${sha} — new push arrived, skipping`); + return; + } + + // Condition 1: CI must have passed for this SHA + const {data: workflowRuns} = await github.rest.actions.listWorkflowRunsForRepo({ + owner, repo, head_sha: sha, status: 'completed' + }); + const ciRun = workflowRuns.workflow_runs.find(r => + r.name === '.github/workflows/base.yml' && r.conclusion === 'success' + ); + if (!ciRun) { + core.info(`CI has not passed for SHA ${sha}, skipping`); + return; + } + + // Condition 2: Bot must have posted a clean review for this SHA + const {data: reviews} = await github.rest.pulls.listReviews({ + owner, repo, pull_number: prNumber + }); + + const CLEAN_MARKER = ''; + + const latestBot = reviews + .filter(r => r.user.login === 'github-actions[bot]') + .sort((a, b) => new Date(b.submitted_at) - new Date(a.submitted_at))[0]; + + if (!latestBot || !latestBot.body.includes(CLEAN_MARKER) || latestBot.commit_id !== sha) { + core.info('Bot has not posted a clean review for this SHA, skipping'); + return; + } + + // Both conditions met — check for existing approval to prevent doubles + const botApprovals = reviews.filter(r => + r.user.login === 'github-actions[bot]' && r.state === 'APPROVED' + ); + if (botApprovals.length > 0) { + core.info('Bot already approved this PR, skipping'); + return; + } + + // Approve + core.info(`Approving PR #${prNumber}: bot review clean + CI passed for SHA ${sha}`); + await github.rest.pulls.createReview({ + owner, repo, pull_number: prNumber, + event: 'APPROVE', + body: `No issues found and CI is passing. Auto-approved.\n\n---\n*Generated by AI — human merge required.*` + }); diff --git a/.github/workflows/issue-bot.yml b/.github/workflows/issue-bot.yml index b24a577..96a2056 100644 --- a/.github/workflows/issue-bot.yml +++ b/.github/workflows/issue-bot.yml @@ -61,16 +61,20 @@ jobs: ISSUE_NUMBER: ${{ github.event.issue.number || github.event.pull_request.number || inputs.issue_number }} EVENT_TYPE: ${{ github.event_name }} EVENT_ACTION: ${{ github.event.action }} + EVENT_BEFORE: ${{ github.event.before }} + EVENT_AFTER: ${{ github.event.pull_request.head.sha || github.event.after }} GITHUB_ACTOR: ${{ github.actor }} KB_S3_BUCKET: ${{ secrets.KB_S3_BUCKET }} KB_S3_KEY: ${{ secrets.KB_S3_KEY }} BEDROCK_MODEL_ID: ${{ secrets.BEDROCK_MODEL_ID }} GUARDRAIL_ID: ${{ secrets.GUARDRAIL_ID }} GUARDRAIL_VERSION: ${{ secrets.GUARDRAIL_VERSION }} - ISSUE_CLASSIFY_PROMPT: ${{ secrets.ISSUE_CLASSIFY_PROMPT }} - ISSUE_RESPOND_PROMPT: ${{ secrets.ISSUE_RESPOND_PROMPT }} - PR_FILE_REVIEW_PROMPT: ${{ secrets.PR_FILE_REVIEW_PROMPT }} - FOLLOWUP_PROMPT: ${{ secrets.FOLLOWUP_PROMPT }} + SM_ISSUE_CLASSIFY_PROMPT: pydeequ-bot/issue-classify-prompt + SM_ISSUE_RESPOND_PROMPT: pydeequ-bot/issue-respond-prompt + SM_PR_FILE_REVIEW_PROMPT: pydeequ-bot/pr-file-review-prompt + SM_FOLLOWUP_PROMPT: pydeequ-bot/followup-prompt + CODEBASE_SRC_DIR: pydeequ + CODEBASE_FILE_EXT: .py DRY_RUN: ${{ inputs.dry_run || 'false' }} ARTIFACT_PATH: ${{ runner.temp }}/bot_result.json run: python -m issue_bot.main analyze diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml new file mode 100644 index 0000000..94ae395 --- /dev/null +++ b/.github/workflows/stale.yml @@ -0,0 +1,36 @@ +name: Manage Stale Issues and PRs + +on: + schedule: + - cron: '0 9 * * MON' + workflow_dispatch: + +permissions: + issues: write + pull-requests: write + +jobs: + stale: + runs-on: ubuntu-latest + steps: + - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9.0.0 + with: + days-before-stale: 60 + days-before-close: 14 + stale-issue-label: 'stale' + stale-pr-label: 'stale' + stale-issue-message: > + This issue has been inactive for 60 days. It will be closed in 14 days + if there is no further activity. If this is still relevant, please comment + to keep it open. + stale-pr-message: > + This PR has been inactive for 60 days. It will be closed in 14 days + if there is no further activity. If you are still working on this, + please push an update or comment to keep it open. + close-issue-message: > + Closed due to inactivity. Feel free to reopen if this is still relevant. + close-pr-message: > + Closed due to inactivity. Feel free to reopen if you'd like to continue this work. + exempt-issue-labels: 'bug,enhancement,help-wanted' + exempt-pr-labels: 'help-wanted' + operations-per-run: 50 diff --git a/README.md b/README.md index a6003c9..bde2d24 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,6 @@ There are 4 main components of Deequ, and they are: - With PyDeequ v0.1.8+, we now officially support Spark3 ! Just make sure you have an environment variable `SPARK_VERSION` to specify your Spark version! - We've release a blogpost on integrating PyDeequ onto AWS leveraging services such as AWS Glue, Athena, and SageMaker! Check it out: [Monitor data quality in your data lake using PyDeequ and AWS Glue](https://aws.amazon.com/blogs/big-data/monitor-data-quality-in-your-data-lake-using-pydeequ-and-aws-glue/). - Check out the [PyDeequ Release Announcement Blogpost](https://aws.amazon.com/blogs/big-data/testing-data-quality-at-scale-with-pydeequ/) with a tutorial walkthrough the Amazon Reviews dataset! -- Join the PyDeequ community on [PyDeequ Slack](https://join.slack.com/t/pydeequ/shared_invite/zt-te6bntpu-yaqPy7bhiN8Lu0NxpZs47Q) to chat with the devs! ## Quickstart @@ -120,6 +119,17 @@ checkResult_df = VerificationResult.checkResultsAsDataFrame(spark, checkResult) checkResult_df.show() ``` +#### Row-Level Results + +You can also get row-level results to see which individual rows passed or failed each check. This is useful for quarantining rows with data quality issues: + +```python +rowLevelResult_df = VerificationResult.rowLevelResultsAsDataFrame(spark, checkResult, df) +rowLevelResult_df.show() +``` + +Each check produces a Boolean column (named after the check description) indicating pass/fail per row. When a single Check contains multiple constraints, they are ANDed together into one Boolean column — the row passes only if all constraints in that Check pass. Only checks with row-level-capable constraints (e.g., `isComplete`, `isContainedIn`, `hasPattern`, `isUnique`) will produce output columns. + ### Repository Save to a Metrics Repository by adding the `useRepository()` and `saveOrAppendResult()` calls to your Analysis Runner. diff --git a/poetry.lock b/poetry.lock index 6563511..62f13bc 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,59 +1,49 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. - -[[package]] -name = "atomicwrites" -version = "1.4.1" -description = "Atomic file writes." -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -files = [ - {file = "atomicwrites-1.4.1.tar.gz", hash = "sha256:81b2c9071a49367a7f770170e5eec8cb66567cfbbc8c73d20ce5ca4a8d71cf11"}, -] - -[[package]] -name = "attrs" -version = "22.1.0" -description = "Classes Without Boilerplate" -optional = false -python-versions = ">=3.5" -files = [ - {file = "attrs-22.1.0-py2.py3-none-any.whl", hash = "sha256:86efa402f67bf2df34f51a335487cf46b1ec130d02b8d39fd248abfd30da551c"}, - {file = "attrs-22.1.0.tar.gz", hash = "sha256:29adc2665447e5191d0e7c568fde78b21f9672d344281d0c6e1ab085429b22b6"}, -] - -[package.extras] -dev = ["cloudpickle", "coverage[toml] (>=5.0.2)", "furo", "hypothesis", "mypy (>=0.900,!=0.940)", "pre-commit", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "sphinx", "sphinx-notfound-page", "zope.interface"] -docs = ["furo", "sphinx", "sphinx-notfound-page", "zope.interface"] -tests = ["cloudpickle", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy (>=0.900,!=0.940)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "zope.interface"] -tests-no-zope = ["cloudpickle", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy (>=0.900,!=0.940)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins"] +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "black" -version = "21.12b0" +version = "24.10.0" description = "The uncompromising code formatter." optional = false -python-versions = ">=3.6.2" -files = [ - {file = "black-21.12b0-py3-none-any.whl", hash = "sha256:a615e69ae185e08fdd73e4715e260e2479c861b5740057fde6e8b4e3b7dd589f"}, - {file = "black-21.12b0.tar.gz", hash = "sha256:77b80f693a569e2e527958459634f18df9b0ba2625ba4e0c2d5da5be42e6f2b3"}, +python-versions = ">=3.9" +files = [ + {file = "black-24.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e6668650ea4b685440857138e5fe40cde4d652633b1bdffc62933d0db4ed9812"}, + {file = "black-24.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1c536fcf674217e87b8cc3657b81809d3c085d7bf3ef262ead700da345bfa6ea"}, + {file = "black-24.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:649fff99a20bd06c6f727d2a27f401331dc0cc861fb69cde910fe95b01b5928f"}, + {file = "black-24.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:fe4d6476887de70546212c99ac9bd803d90b42fc4767f058a0baa895013fbb3e"}, + {file = "black-24.10.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5a2221696a8224e335c28816a9d331a6c2ae15a2ee34ec857dcf3e45dbfa99ad"}, + {file = "black-24.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f9da3333530dbcecc1be13e69c250ed8dfa67f43c4005fb537bb426e19200d50"}, + {file = "black-24.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4007b1393d902b48b36958a216c20c4482f601569d19ed1df294a496eb366392"}, + {file = "black-24.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:394d4ddc64782e51153eadcaaca95144ac4c35e27ef9b0a42e121ae7e57a9175"}, + {file = "black-24.10.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b5e39e0fae001df40f95bd8cc36b9165c5e2ea88900167bddf258bacef9bbdc3"}, + {file = "black-24.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d37d422772111794b26757c5b55a3eade028aa3fde43121ab7b673d050949d65"}, + {file = "black-24.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:14b3502784f09ce2443830e3133dacf2c0110d45191ed470ecb04d0f5f6fcb0f"}, + {file = "black-24.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:30d2c30dc5139211dda799758559d1b049f7f14c580c409d6ad925b74a4208a8"}, + {file = "black-24.10.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1cbacacb19e922a1d75ef2b6ccaefcd6e93a2c05ede32f06a21386a04cedb981"}, + {file = "black-24.10.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1f93102e0c5bb3907451063e08b9876dbeac810e7da5a8bfb7aeb5a9ef89066b"}, + {file = "black-24.10.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ddacb691cdcdf77b96f549cf9591701d8db36b2f19519373d60d31746068dbf2"}, + {file = "black-24.10.0-cp313-cp313-win_amd64.whl", hash = "sha256:680359d932801c76d2e9c9068d05c6b107f2584b2a5b88831c83962eb9984c1b"}, + {file = "black-24.10.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:17374989640fbca88b6a448129cd1745c5eb8d9547b464f281b251dd00155ccd"}, + {file = "black-24.10.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:63f626344343083322233f175aaf372d326de8436f5928c042639a4afbbf1d3f"}, + {file = "black-24.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ccfa1d0cb6200857f1923b602f978386a3a2758a65b52e0950299ea014be6800"}, + {file = "black-24.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:2cd9c95431d94adc56600710f8813ee27eea544dd118d45896bb734e9d7a0dc7"}, + {file = "black-24.10.0-py3-none-any.whl", hash = "sha256:3bb2b7a1f7b685f85b11fed1ef10f8a9148bceb49853e47a294a3dd963c1dd7d"}, + {file = "black-24.10.0.tar.gz", hash = "sha256:846ea64c97afe3bc677b761787993be4991810ecc7a4a937816dd6bddedc4875"}, ] [package.dependencies] -click = ">=7.1.2" +click = ">=8.0.0" mypy-extensions = ">=0.4.3" -pathspec = ">=0.9.0,<1" +packaging = ">=22.0" +pathspec = ">=0.9.0" platformdirs = ">=2" -tomli = ">=0.2.6,<2.0.0" -typing-extensions = [ - {version = ">=3.10.0.0,<3.10.0.1 || >3.10.0.1", markers = "python_version >= \"3.10\""}, - {version = ">=3.10.0.0", markers = "python_version < \"3.10\""}, -] +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""} [package.extras] colorama = ["colorama (>=0.4.3)"] -d = ["aiohttp (>=3.7.4)"] +d = ["aiohttp (>=3.10)"] jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] -python2 = ["typed-ast (>=1.4.3)"] uvloop = ["uvloop (>=0.15.2)"] [[package]] @@ -214,67 +204,122 @@ files = [ [[package]] name = "coverage" -version = "5.5" +version = "7.10.7" description = "Code coverage measurement for Python" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4" -files = [ - {file = "coverage-5.5-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:b6d534e4b2ab35c9f93f46229363e17f63c53ad01330df9f2d6bd1187e5eaacf"}, - {file = "coverage-5.5-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:b7895207b4c843c76a25ab8c1e866261bcfe27bfaa20c192de5190121770672b"}, - {file = "coverage-5.5-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:c2723d347ab06e7ddad1a58b2a821218239249a9e4365eaff6649d31180c1669"}, - {file = "coverage-5.5-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:900fbf7759501bc7807fd6638c947d7a831fc9fdf742dc10f02956ff7220fa90"}, - {file = "coverage-5.5-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:004d1880bed2d97151facef49f08e255a20ceb6f9432df75f4eef018fdd5a78c"}, - {file = "coverage-5.5-cp27-cp27m-win32.whl", hash = "sha256:06191eb60f8d8a5bc046f3799f8a07a2d7aefb9504b0209aff0b47298333302a"}, - {file = "coverage-5.5-cp27-cp27m-win_amd64.whl", hash = "sha256:7501140f755b725495941b43347ba8a2777407fc7f250d4f5a7d2a1050ba8e82"}, - {file = "coverage-5.5-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:372da284cfd642d8e08ef606917846fa2ee350f64994bebfbd3afb0040436905"}, - {file = "coverage-5.5-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:8963a499849a1fc54b35b1c9f162f4108017b2e6db2c46c1bed93a72262ed083"}, - {file = "coverage-5.5-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:869a64f53488f40fa5b5b9dcb9e9b2962a66a87dab37790f3fcfb5144b996ef5"}, - {file = "coverage-5.5-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:4a7697d8cb0f27399b0e393c0b90f0f1e40c82023ea4d45d22bce7032a5d7b81"}, - {file = "coverage-5.5-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:8d0a0725ad7c1a0bcd8d1b437e191107d457e2ec1084b9f190630a4fb1af78e6"}, - {file = "coverage-5.5-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:51cb9476a3987c8967ebab3f0fe144819781fca264f57f89760037a2ea191cb0"}, - {file = "coverage-5.5-cp310-cp310-win_amd64.whl", hash = "sha256:c0891a6a97b09c1f3e073a890514d5012eb256845c451bd48f7968ef939bf4ae"}, - {file = "coverage-5.5-cp35-cp35m-macosx_10_9_x86_64.whl", hash = "sha256:3487286bc29a5aa4b93a072e9592f22254291ce96a9fbc5251f566b6b7343cdb"}, - {file = "coverage-5.5-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:deee1077aae10d8fa88cb02c845cfba9b62c55e1183f52f6ae6a2df6a2187160"}, - {file = "coverage-5.5-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:f11642dddbb0253cc8853254301b51390ba0081750a8ac03f20ea8103f0c56b6"}, - {file = "coverage-5.5-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:6c90e11318f0d3c436a42409f2749ee1a115cd8b067d7f14c148f1ce5574d701"}, - {file = "coverage-5.5-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:30c77c1dc9f253283e34c27935fded5015f7d1abe83bc7821680ac444eaf7793"}, - {file = "coverage-5.5-cp35-cp35m-win32.whl", hash = "sha256:9a1ef3b66e38ef8618ce5fdc7bea3d9f45f3624e2a66295eea5e57966c85909e"}, - {file = "coverage-5.5-cp35-cp35m-win_amd64.whl", hash = "sha256:972c85d205b51e30e59525694670de6a8a89691186012535f9d7dbaa230e42c3"}, - {file = "coverage-5.5-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:af0e781009aaf59e25c5a678122391cb0f345ac0ec272c7961dc5455e1c40066"}, - {file = "coverage-5.5-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:74d881fc777ebb11c63736622b60cb9e4aee5cace591ce274fb69e582a12a61a"}, - {file = "coverage-5.5-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:92b017ce34b68a7d67bd6d117e6d443a9bf63a2ecf8567bb3d8c6c7bc5014465"}, - {file = "coverage-5.5-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:d636598c8305e1f90b439dbf4f66437de4a5e3c31fdf47ad29542478c8508bbb"}, - {file = "coverage-5.5-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:41179b8a845742d1eb60449bdb2992196e211341818565abded11cfa90efb821"}, - {file = "coverage-5.5-cp36-cp36m-win32.whl", hash = "sha256:040af6c32813fa3eae5305d53f18875bedd079960822ef8ec067a66dd8afcd45"}, - {file = "coverage-5.5-cp36-cp36m-win_amd64.whl", hash = "sha256:5fec2d43a2cc6965edc0bb9e83e1e4b557f76f843a77a2496cbe719583ce8184"}, - {file = "coverage-5.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:18ba8bbede96a2c3dde7b868de9dcbd55670690af0988713f0603f037848418a"}, - {file = "coverage-5.5-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:2910f4d36a6a9b4214bb7038d537f015346f413a975d57ca6b43bf23d6563b53"}, - {file = "coverage-5.5-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:f0b278ce10936db1a37e6954e15a3730bea96a0997c26d7fee88e6c396c2086d"}, - {file = "coverage-5.5-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:796c9c3c79747146ebd278dbe1e5c5c05dd6b10cc3bcb8389dfdf844f3ead638"}, - {file = "coverage-5.5-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:53194af30d5bad77fcba80e23a1441c71abfb3e01192034f8246e0d8f99528f3"}, - {file = "coverage-5.5-cp37-cp37m-win32.whl", hash = "sha256:184a47bbe0aa6400ed2d41d8e9ed868b8205046518c52464fde713ea06e3a74a"}, - {file = "coverage-5.5-cp37-cp37m-win_amd64.whl", hash = "sha256:2949cad1c5208b8298d5686d5a85b66aae46d73eec2c3e08c817dd3513e5848a"}, - {file = "coverage-5.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:217658ec7187497e3f3ebd901afdca1af062b42cfe3e0dafea4cced3983739f6"}, - {file = "coverage-5.5-cp38-cp38-manylinux1_i686.whl", hash = "sha256:1aa846f56c3d49205c952d8318e76ccc2ae23303351d9270ab220004c580cfe2"}, - {file = "coverage-5.5-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:24d4a7de75446be83244eabbff746d66b9240ae020ced65d060815fac3423759"}, - {file = "coverage-5.5-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:d1f8bf7b90ba55699b3a5e44930e93ff0189aa27186e96071fac7dd0d06a1873"}, - {file = "coverage-5.5-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:970284a88b99673ccb2e4e334cfb38a10aab7cd44f7457564d11898a74b62d0a"}, - {file = "coverage-5.5-cp38-cp38-win32.whl", hash = "sha256:01d84219b5cdbfc8122223b39a954820929497a1cb1422824bb86b07b74594b6"}, - {file = "coverage-5.5-cp38-cp38-win_amd64.whl", hash = "sha256:2e0d881ad471768bf6e6c2bf905d183543f10098e3b3640fc029509530091502"}, - {file = "coverage-5.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d1f9ce122f83b2305592c11d64f181b87153fc2c2bbd3bb4a3dde8303cfb1a6b"}, - {file = "coverage-5.5-cp39-cp39-manylinux1_i686.whl", hash = "sha256:13c4ee887eca0f4c5a247b75398d4114c37882658300e153113dafb1d76de529"}, - {file = "coverage-5.5-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:52596d3d0e8bdf3af43db3e9ba8dcdaac724ba7b5ca3f6358529d56f7a166f8b"}, - {file = "coverage-5.5-cp39-cp39-manylinux2010_i686.whl", hash = "sha256:2cafbbb3af0733db200c9b5f798d18953b1a304d3f86a938367de1567f4b5bff"}, - {file = "coverage-5.5-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:44d654437b8ddd9eee7d1eaee28b7219bec228520ff809af170488fd2fed3e2b"}, - {file = "coverage-5.5-cp39-cp39-win32.whl", hash = "sha256:d314ed732c25d29775e84a960c3c60808b682c08d86602ec2c3008e1202e3bb6"}, - {file = "coverage-5.5-cp39-cp39-win_amd64.whl", hash = "sha256:13034c4409db851670bc9acd836243aeee299949bd5673e11844befcb0149f03"}, - {file = "coverage-5.5-pp36-none-any.whl", hash = "sha256:f030f8873312a16414c0d8e1a1ddff2d3235655a2174e3648b4fa66b3f2f1079"}, - {file = "coverage-5.5-pp37-none-any.whl", hash = "sha256:2a3859cb82dcbda1cfd3e6f71c27081d18aa251d20a17d87d26d4cd216fb0af4"}, - {file = "coverage-5.5.tar.gz", hash = "sha256:ebe78fe9a0e874362175b02371bdfbee64d8edc42a044253ddf4ee7d3c15212c"}, +python-versions = ">=3.9" +files = [ + {file = "coverage-7.10.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:fc04cc7a3db33664e0c2d10eb8990ff6b3536f6842c9590ae8da4c614b9ed05a"}, + {file = "coverage-7.10.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e201e015644e207139f7e2351980feb7040e6f4b2c2978892f3e3789d1c125e5"}, + {file = "coverage-7.10.7-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:240af60539987ced2c399809bd34f7c78e8abe0736af91c3d7d0e795df633d17"}, + {file = "coverage-7.10.7-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:8421e088bc051361b01c4b3a50fd39a4b9133079a2229978d9d30511fd05231b"}, + {file = "coverage-7.10.7-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6be8ed3039ae7f7ac5ce058c308484787c86e8437e72b30bf5e88b8ea10f3c87"}, + {file = "coverage-7.10.7-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e28299d9f2e889e6d51b1f043f58d5f997c373cc12e6403b90df95b8b047c13e"}, + {file = "coverage-7.10.7-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:c4e16bd7761c5e454f4efd36f345286d6f7c5fa111623c355691e2755cae3b9e"}, + {file = "coverage-7.10.7-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:b1c81d0e5e160651879755c9c675b974276f135558cf4ba79fee7b8413a515df"}, + {file = "coverage-7.10.7-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:606cc265adc9aaedcc84f1f064f0e8736bc45814f15a357e30fca7ecc01504e0"}, + {file = "coverage-7.10.7-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:10b24412692df990dbc34f8fb1b6b13d236ace9dfdd68df5b28c2e39cafbba13"}, + {file = "coverage-7.10.7-cp310-cp310-win32.whl", hash = "sha256:b51dcd060f18c19290d9b8a9dd1e0181538df2ce0717f562fff6cf74d9fc0b5b"}, + {file = "coverage-7.10.7-cp310-cp310-win_amd64.whl", hash = "sha256:3a622ac801b17198020f09af3eaf45666b344a0d69fc2a6ffe2ea83aeef1d807"}, + {file = "coverage-7.10.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a609f9c93113be646f44c2a0256d6ea375ad047005d7f57a5c15f614dc1b2f59"}, + {file = "coverage-7.10.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:65646bb0359386e07639c367a22cf9b5bf6304e8630b565d0626e2bdf329227a"}, + {file = "coverage-7.10.7-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:5f33166f0dfcce728191f520bd2692914ec70fac2713f6bf3ce59c3deacb4699"}, + {file = "coverage-7.10.7-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:35f5e3f9e455bb17831876048355dca0f758b6df22f49258cb5a91da23ef437d"}, + {file = "coverage-7.10.7-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4da86b6d62a496e908ac2898243920c7992499c1712ff7c2b6d837cc69d9467e"}, + {file = "coverage-7.10.7-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:6b8b09c1fad947c84bbbc95eca841350fad9cbfa5a2d7ca88ac9f8d836c92e23"}, + {file = "coverage-7.10.7-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:4376538f36b533b46f8971d3a3e63464f2c7905c9800db97361c43a2b14792ab"}, + {file = "coverage-7.10.7-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:121da30abb574f6ce6ae09840dae322bef734480ceafe410117627aa54f76d82"}, + {file = "coverage-7.10.7-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:88127d40df529336a9836870436fc2751c339fbaed3a836d42c93f3e4bd1d0a2"}, + {file = "coverage-7.10.7-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ba58bbcd1b72f136080c0bccc2400d66cc6115f3f906c499013d065ac33a4b61"}, + {file = "coverage-7.10.7-cp311-cp311-win32.whl", hash = "sha256:972b9e3a4094b053a4e46832b4bc829fc8a8d347160eb39d03f1690316a99c14"}, + {file = "coverage-7.10.7-cp311-cp311-win_amd64.whl", hash = "sha256:a7b55a944a7f43892e28ad4bc0561dfd5f0d73e605d1aa5c3c976b52aea121d2"}, + {file = "coverage-7.10.7-cp311-cp311-win_arm64.whl", hash = "sha256:736f227fb490f03c6488f9b6d45855f8e0fd749c007f9303ad30efab0e73c05a"}, + {file = "coverage-7.10.7-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7bb3b9ddb87ef7725056572368040c32775036472d5a033679d1fa6c8dc08417"}, + {file = "coverage-7.10.7-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:18afb24843cbc175687225cab1138c95d262337f5473512010e46831aa0c2973"}, + {file = "coverage-7.10.7-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:399a0b6347bcd3822be369392932884b8216d0944049ae22925631a9b3d4ba4c"}, + {file = "coverage-7.10.7-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:314f2c326ded3f4b09be11bc282eb2fc861184bc95748ae67b360ac962770be7"}, + {file = "coverage-7.10.7-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c41e71c9cfb854789dee6fc51e46743a6d138b1803fab6cb860af43265b42ea6"}, + {file = "coverage-7.10.7-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc01f57ca26269c2c706e838f6422e2a8788e41b3e3c65e2f41148212e57cd59"}, + {file = "coverage-7.10.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a6442c59a8ac8b85812ce33bc4d05bde3fb22321fa8294e2a5b487c3505f611b"}, + {file = "coverage-7.10.7-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:78a384e49f46b80fb4c901d52d92abe098e78768ed829c673fbb53c498bef73a"}, + {file = "coverage-7.10.7-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:5e1e9802121405ede4b0133aa4340ad8186a1d2526de5b7c3eca519db7bb89fb"}, + {file = "coverage-7.10.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d41213ea25a86f69efd1575073d34ea11aabe075604ddf3d148ecfec9e1e96a1"}, + {file = "coverage-7.10.7-cp312-cp312-win32.whl", hash = "sha256:77eb4c747061a6af8d0f7bdb31f1e108d172762ef579166ec84542f711d90256"}, + {file = "coverage-7.10.7-cp312-cp312-win_amd64.whl", hash = "sha256:f51328ffe987aecf6d09f3cd9d979face89a617eacdaea43e7b3080777f647ba"}, + {file = "coverage-7.10.7-cp312-cp312-win_arm64.whl", hash = "sha256:bda5e34f8a75721c96085903c6f2197dc398c20ffd98df33f866a9c8fd95f4bf"}, + {file = "coverage-7.10.7-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:981a651f543f2854abd3b5fcb3263aac581b18209be49863ba575de6edf4c14d"}, + {file = "coverage-7.10.7-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:73ab1601f84dc804f7812dc297e93cd99381162da39c47040a827d4e8dafe63b"}, + {file = "coverage-7.10.7-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:a8b6f03672aa6734e700bbcd65ff050fd19cddfec4b031cc8cf1c6967de5a68e"}, + {file = "coverage-7.10.7-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:10b6ba00ab1132a0ce4428ff68cf50a25efd6840a42cdf4239c9b99aad83be8b"}, + {file = "coverage-7.10.7-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c79124f70465a150e89340de5963f936ee97097d2ef76c869708c4248c63ca49"}, + {file = "coverage-7.10.7-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:69212fbccdbd5b0e39eac4067e20a4a5256609e209547d86f740d68ad4f04911"}, + {file = "coverage-7.10.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7ea7c6c9d0d286d04ed3541747e6597cbe4971f22648b68248f7ddcd329207f0"}, + {file = "coverage-7.10.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b9be91986841a75042b3e3243d0b3cb0b2434252b977baaf0cd56e960fe1e46f"}, + {file = "coverage-7.10.7-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:b281d5eca50189325cfe1f365fafade89b14b4a78d9b40b05ddd1fc7d2a10a9c"}, + {file = "coverage-7.10.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:99e4aa63097ab1118e75a848a28e40d68b08a5e19ce587891ab7fd04475e780f"}, + {file = "coverage-7.10.7-cp313-cp313-win32.whl", hash = "sha256:dc7c389dce432500273eaf48f410b37886be9208b2dd5710aaf7c57fd442c698"}, + {file = "coverage-7.10.7-cp313-cp313-win_amd64.whl", hash = "sha256:cac0fdca17b036af3881a9d2729a850b76553f3f716ccb0360ad4dbc06b3b843"}, + {file = "coverage-7.10.7-cp313-cp313-win_arm64.whl", hash = "sha256:4b6f236edf6e2f9ae8fcd1332da4e791c1b6ba0dc16a2dc94590ceccb482e546"}, + {file = "coverage-7.10.7-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a0ec07fd264d0745ee396b666d47cef20875f4ff2375d7c4f58235886cc1ef0c"}, + {file = "coverage-7.10.7-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:dd5e856ebb7bfb7672b0086846db5afb4567a7b9714b8a0ebafd211ec7ce6a15"}, + {file = "coverage-7.10.7-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:f57b2a3c8353d3e04acf75b3fed57ba41f5c0646bbf1d10c7c282291c97936b4"}, + {file = "coverage-7.10.7-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:1ef2319dd15a0b009667301a3f84452a4dc6fddfd06b0c5c53ea472d3989fbf0"}, + {file = "coverage-7.10.7-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:83082a57783239717ceb0ad584de3c69cf581b2a95ed6bf81ea66034f00401c0"}, + {file = "coverage-7.10.7-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:50aa94fb1fb9a397eaa19c0d5ec15a5edd03a47bf1a3a6111a16b36e190cff65"}, + {file = "coverage-7.10.7-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2120043f147bebb41c85b97ac45dd173595ff14f2a584f2963891cbcc3091541"}, + {file = "coverage-7.10.7-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:2fafd773231dd0378fdba66d339f84904a8e57a262f583530f4f156ab83863e6"}, + {file = "coverage-7.10.7-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:0b944ee8459f515f28b851728ad224fa2d068f1513ef6b7ff1efafeb2185f999"}, + {file = "coverage-7.10.7-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4b583b97ab2e3efe1b3e75248a9b333bd3f8b0b1b8e5b45578e05e5850dfb2c2"}, + {file = "coverage-7.10.7-cp313-cp313t-win32.whl", hash = "sha256:2a78cd46550081a7909b3329e2266204d584866e8d97b898cd7fb5ac8d888b1a"}, + {file = "coverage-7.10.7-cp313-cp313t-win_amd64.whl", hash = "sha256:33a5e6396ab684cb43dc7befa386258acb2d7fae7f67330ebb85ba4ea27938eb"}, + {file = "coverage-7.10.7-cp313-cp313t-win_arm64.whl", hash = "sha256:86b0e7308289ddde73d863b7683f596d8d21c7d8664ce1dee061d0bcf3fbb4bb"}, + {file = "coverage-7.10.7-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:b06f260b16ead11643a5a9f955bd4b5fd76c1a4c6796aeade8520095b75de520"}, + {file = "coverage-7.10.7-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:212f8f2e0612778f09c55dd4872cb1f64a1f2b074393d139278ce902064d5b32"}, + {file = "coverage-7.10.7-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3445258bcded7d4aa630ab8296dea4d3f15a255588dd535f980c193ab6b95f3f"}, + {file = "coverage-7.10.7-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:bb45474711ba385c46a0bfe696c695a929ae69ac636cda8f532be9e8c93d720a"}, + {file = "coverage-7.10.7-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:813922f35bd800dca9994c5971883cbc0d291128a5de6b167c7aa697fcf59360"}, + {file = "coverage-7.10.7-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:93c1b03552081b2a4423091d6fb3787265b8f86af404cff98d1b5342713bdd69"}, + {file = "coverage-7.10.7-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:cc87dd1b6eaf0b848eebb1c86469b9f72a1891cb42ac7adcfbce75eadb13dd14"}, + {file = "coverage-7.10.7-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:39508ffda4f343c35f3236fe8d1a6634a51f4581226a1262769d7f970e73bffe"}, + {file = "coverage-7.10.7-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:925a1edf3d810537c5a3abe78ec5530160c5f9a26b1f4270b40e62cc79304a1e"}, + {file = "coverage-7.10.7-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2c8b9a0636f94c43cd3576811e05b89aa9bc2d0a85137affc544ae5cb0e4bfbd"}, + {file = "coverage-7.10.7-cp314-cp314-win32.whl", hash = "sha256:b7b8288eb7cdd268b0304632da8cb0bb93fadcfec2fe5712f7b9cc8f4d487be2"}, + {file = "coverage-7.10.7-cp314-cp314-win_amd64.whl", hash = "sha256:1ca6db7c8807fb9e755d0379ccc39017ce0a84dcd26d14b5a03b78563776f681"}, + {file = "coverage-7.10.7-cp314-cp314-win_arm64.whl", hash = "sha256:097c1591f5af4496226d5783d036bf6fd6cd0cbc132e071b33861de756efb880"}, + {file = "coverage-7.10.7-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:a62c6ef0d50e6de320c270ff91d9dd0a05e7250cac2a800b7784bae474506e63"}, + {file = "coverage-7.10.7-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:9fa6e4dd51fe15d8738708a973470f67a855ca50002294852e9571cdbd9433f2"}, + {file = "coverage-7.10.7-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:8fb190658865565c549b6b4706856d6a7b09302c797eb2cf8e7fe9dabb043f0d"}, + {file = "coverage-7.10.7-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:affef7c76a9ef259187ef31599a9260330e0335a3011732c4b9effa01e1cd6e0"}, + {file = "coverage-7.10.7-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e16e07d85ca0cf8bafe5f5d23a0b850064e8e945d5677492b06bbe6f09cc699"}, + {file = "coverage-7.10.7-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:03ffc58aacdf65d2a82bbeb1ffe4d01ead4017a21bfd0454983b88ca73af94b9"}, + {file = "coverage-7.10.7-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1b4fd784344d4e52647fd7857b2af5b3fbe6c239b0b5fa63e94eb67320770e0f"}, + {file = "coverage-7.10.7-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:0ebbaddb2c19b71912c6f2518e791aa8b9f054985a0769bdb3a53ebbc765c6a1"}, + {file = "coverage-7.10.7-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:a2d9a3b260cc1d1dbdb1c582e63ddcf5363426a1a68faa0f5da28d8ee3c722a0"}, + {file = "coverage-7.10.7-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a3cc8638b2480865eaa3926d192e64ce6c51e3d29c849e09d5b4ad95efae5399"}, + {file = "coverage-7.10.7-cp314-cp314t-win32.whl", hash = "sha256:67f8c5cbcd3deb7a60b3345dffc89a961a484ed0af1f6f73de91705cc6e31235"}, + {file = "coverage-7.10.7-cp314-cp314t-win_amd64.whl", hash = "sha256:e1ed71194ef6dea7ed2d5cb5f7243d4bcd334bfb63e59878519be558078f848d"}, + {file = "coverage-7.10.7-cp314-cp314t-win_arm64.whl", hash = "sha256:7fe650342addd8524ca63d77b2362b02345e5f1a093266787d210c70a50b471a"}, + {file = "coverage-7.10.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:fff7b9c3f19957020cac546c70025331113d2e61537f6e2441bc7657913de7d3"}, + {file = "coverage-7.10.7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:bc91b314cef27742da486d6839b677b3f2793dfe52b51bbbb7cf736d5c29281c"}, + {file = "coverage-7.10.7-cp39-cp39-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:567f5c155eda8df1d3d439d40a45a6a5f029b429b06648235f1e7e51b522b396"}, + {file = "coverage-7.10.7-cp39-cp39-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2af88deffcc8a4d5974cf2d502251bc3b2db8461f0b66d80a449c33757aa9f40"}, + {file = "coverage-7.10.7-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c7315339eae3b24c2d2fa1ed7d7a38654cba34a13ef19fbcb9425da46d3dc594"}, + {file = "coverage-7.10.7-cp39-cp39-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:912e6ebc7a6e4adfdbb1aec371ad04c68854cd3bf3608b3514e7ff9062931d8a"}, + {file = "coverage-7.10.7-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:f49a05acd3dfe1ce9715b657e28d138578bc40126760efb962322c56e9ca344b"}, + {file = "coverage-7.10.7-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:cce2109b6219f22ece99db7644b9622f54a4e915dad65660ec435e89a3ea7cc3"}, + {file = "coverage-7.10.7-cp39-cp39-musllinux_1_2_riscv64.whl", hash = "sha256:f3c887f96407cea3916294046fc7dab611c2552beadbed4ea901cbc6a40cc7a0"}, + {file = "coverage-7.10.7-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:635adb9a4507c9fd2ed65f39693fa31c9a3ee3a8e6dc64df033e8fdf52a7003f"}, + {file = "coverage-7.10.7-cp39-cp39-win32.whl", hash = "sha256:5a02d5a850e2979b0a014c412573953995174743a3f7fa4ea5a6e9a3c5617431"}, + {file = "coverage-7.10.7-cp39-cp39-win_amd64.whl", hash = "sha256:c134869d5ffe34547d14e174c866fd8fe2254918cc0a95e99052903bc1543e07"}, + {file = "coverage-7.10.7-py3-none-any.whl", hash = "sha256:f7941f6f2fe6dd6807a1208737b8a0cbcf1cc6d7b07d24998ad2d63590868260"}, + {file = "coverage-7.10.7.tar.gz", hash = "sha256:f4ab143ab113be368a3e9b795f9cd7906c5ef407d6173fe9675a902e1fffc239"}, ] +[package.dependencies] +tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""} + [package.extras] -toml = ["toml"] +toml = ["tomli"] [[package]] name = "cryptography" @@ -353,23 +398,18 @@ files = [ ] [[package]] -name = "dparse" -version = "0.6.0" -description = "A parser for Python dependency files" +name = "exceptiongroup" +version = "1.2.2" +description = "Backport of PEP 654 (exception groups)" optional = false -python-versions = ">=3.5" +python-versions = ">=3.7" files = [ - {file = "dparse-0.6.0-py3-none-any.whl", hash = "sha256:3cb489bd06bfa8d285c85f7dec69d9ee8f89c29dd5f4ab48e159746dc13b78b2"}, - {file = "dparse-0.6.0.tar.gz", hash = "sha256:57068bb61859b1676c6beb10f399906eecb41a75b5d3fbc99d0311059cb67213"}, + {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, + {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, ] -[package.dependencies] -packaging = "*" -toml = "*" - [package.extras] -conda = ["pyyaml"] -pipenv = ["pipenv"] +test = ["pytest (>=6)"] [[package]] name = "filelock" @@ -388,29 +428,29 @@ testing = ["covdefaults (>=2.2)", "coverage (>=6.4.2)", "pytest (>=7.1.2)", "pyt [[package]] name = "flake8" -version = "3.9.2" +version = "7.3.0" description = "the modular source code checker: pep8 pyflakes and co" optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" +python-versions = ">=3.9" files = [ - {file = "flake8-3.9.2-py2.py3-none-any.whl", hash = "sha256:bf8fd333346d844f616e8d47905ef3a3384edae6b4e9beb0c5101e25e3110907"}, - {file = "flake8-3.9.2.tar.gz", hash = "sha256:07528381786f2a6237b061f6e96610a4167b226cb926e2aa2b6b1d78057c576b"}, + {file = "flake8-7.3.0-py2.py3-none-any.whl", hash = "sha256:b9696257b9ce8beb888cdbe31cf885c90d31928fe202be0889a7cdafad32f01e"}, + {file = "flake8-7.3.0.tar.gz", hash = "sha256:fe044858146b9fc69b551a4b490d69cf960fcb78ad1edcb84e7fbb1b4a8e3872"}, ] [package.dependencies] -mccabe = ">=0.6.0,<0.7.0" -pycodestyle = ">=2.7.0,<2.8.0" -pyflakes = ">=2.3.0,<2.4.0" +mccabe = ">=0.7.0,<0.8.0" +pycodestyle = ">=2.14.0,<2.15.0" +pyflakes = ">=3.4.0,<3.5.0" [[package]] name = "flake8-docstrings" -version = "1.6.0" +version = "1.7.0" description = "Extension for flake8 which uses pydocstyle to check docstrings" optional = false -python-versions = "*" +python-versions = ">=3.7" files = [ - {file = "flake8-docstrings-1.6.0.tar.gz", hash = "sha256:9fe7c6a306064af8e62a055c2f61e9eb1da55f84bb39caef2b84ce53708ac34b"}, - {file = "flake8_docstrings-1.6.0-py2.py3-none-any.whl", hash = "sha256:99cac583d6c7e32dd28bbfbef120a7c0d1b6dde4adb5a9fd441c4227a6534bde"}, + {file = "flake8_docstrings-1.7.0-py2.py3-none-any.whl", hash = "sha256:51f2344026da083fc084166a9353f5082b01f72901df422f74b4d953ae88ac75"}, + {file = "flake8_docstrings-1.7.0.tar.gz", hash = "sha256:4c8cc748dc16e6869728699e5d0d685da9a10b0ea718e090b1ba088e67a941af"}, ] [package.dependencies] @@ -527,15 +567,50 @@ SecretStorage = {version = ">=3.2", markers = "sys_platform == \"linux\""} docs = ["jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx"] testing = ["flake8 (<5)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] +[[package]] +name = "markdown-it-py" +version = "3.0.0" +description = "Python port of markdown-it. Markdown parsing, done right!" +optional = false +python-versions = ">=3.8" +files = [ + {file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"}, + {file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"}, +] + +[package.dependencies] +mdurl = ">=0.1,<1.0" + +[package.extras] +benchmarking = ["psutil", "pytest", "pytest-benchmark"] +code-style = ["pre-commit (>=3.0,<4.0)"] +compare = ["commonmark (>=0.9,<1.0)", "markdown (>=3.4,<4.0)", "mistletoe (>=1.0,<2.0)", "mistune (>=2.0,<3.0)", "panflute (>=2.3,<3.0)"] +linkify = ["linkify-it-py (>=1,<3)"] +plugins = ["mdit-py-plugins"] +profiling = ["gprof2dot"] +rtd = ["jupyter_sphinx", "mdit-py-plugins", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"] +testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] + [[package]] name = "mccabe" -version = "0.6.1" +version = "0.7.0" description = "McCabe checker, plugin for flake8" optional = false -python-versions = "*" +python-versions = ">=3.6" +files = [ + {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"}, + {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, +] + +[[package]] +name = "mdurl" +version = "0.1.2" +description = "Markdown URL utilities" +optional = false +python-versions = ">=3.7" files = [ - {file = "mccabe-0.6.1-py2.py3-none-any.whl", hash = "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42"}, - {file = "mccabe-0.6.1.tar.gz", hash = "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"}, + {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"}, + {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, ] [[package]] @@ -613,18 +688,15 @@ files = [ [[package]] name = "packaging" -version = "21.3" +version = "26.2" description = "Core utilities for Python packages" optional = false -python-versions = ">=3.6" +python-versions = ">=3.8" files = [ - {file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"}, - {file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"}, + {file = "packaging-26.2-py3-none-any.whl", hash = "sha256:5fc45236b9446107ff2415ce77c807cee2862cb6fac22b8a73826d0693b0980e"}, + {file = "packaging-26.2.tar.gz", hash = "sha256:ff452ff5a3e828ce110190feff1178bb1f2ea2281fa2075aadb987c2fb221661"}, ] -[package.dependencies] -pyparsing = ">=2.0.2,<3.0.5 || >3.0.5" - [[package]] name = "pandas" version = "1.4.4" @@ -657,10 +729,10 @@ files = [ [package.dependencies] numpy = [ - {version = ">=1.21.0", markers = "python_version >= \"3.10\""}, {version = ">=1.18.5", markers = "(platform_machine != \"aarch64\" and platform_machine != \"arm64\") and python_version < \"3.10\""}, {version = ">=1.19.2", markers = "platform_machine == \"aarch64\" and python_version < \"3.10\""}, {version = ">=1.20.0", markers = "platform_machine == \"arm64\" and python_version < \"3.10\""}, + {version = ">=1.21.0", markers = "python_version >= \"3.10\""}, ] python-dateutil = ">=2.8.1" pytz = ">=2020.1" @@ -710,28 +782,28 @@ test = ["appdirs (==1.4.4)", "pytest (>=6)", "pytest-cov (>=2.7)", "pytest-mock [[package]] name = "pluggy" -version = "1.0.0" +version = "1.6.0" description = "plugin and hook calling mechanisms for python" optional = false -python-versions = ">=3.6" +python-versions = ">=3.9" files = [ - {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"}, - {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"}, + {file = "pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746"}, + {file = "pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3"}, ] [package.extras] dev = ["pre-commit", "tox"] -testing = ["pytest", "pytest-benchmark"] +testing = ["coverage", "pytest", "pytest-benchmark"] [[package]] name = "pre-commit" -version = "2.20.0" +version = "3.8.0" description = "A framework for managing and maintaining multi-language pre-commit hooks." optional = false -python-versions = ">=3.7" +python-versions = ">=3.9" files = [ - {file = "pre_commit-2.20.0-py2.py3-none-any.whl", hash = "sha256:51a5ba7c480ae8072ecdb6933df22d2f812dc897d5fe848778116129a681aac7"}, - {file = "pre_commit-2.20.0.tar.gz", hash = "sha256:a978dac7bc9ec0bcee55c18a277d553b0f419d259dadb4b9418ff2d00eb43959"}, + {file = "pre_commit-3.8.0-py2.py3-none-any.whl", hash = "sha256:9a90a53bf82fdd8778d58085faf8d83df56e40dfe18f45b19446e26bf1b3a63f"}, + {file = "pre_commit-3.8.0.tar.gz", hash = "sha256:8bb6494d4a20423842e198980c9ecf9f96607a07ea29549e180eef9ae80fe7af"}, ] [package.dependencies] @@ -739,19 +811,7 @@ cfgv = ">=2.0.0" identify = ">=1.0.0" nodeenv = ">=0.11.1" pyyaml = ">=5.1" -toml = "*" -virtualenv = ">=20.0.8" - -[[package]] -name = "py" -version = "1.11.0" -description = "library with cross-python path, ini-parsing, io, code, log facilities" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" -files = [ - {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"}, - {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, -] +virtualenv = ">=20.10.0" [[package]] name = "py4j" @@ -766,13 +826,13 @@ files = [ [[package]] name = "pycodestyle" -version = "2.7.0" +version = "2.14.0" description = "Python style guide checker" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +python-versions = ">=3.9" files = [ - {file = "pycodestyle-2.7.0-py2.py3-none-any.whl", hash = "sha256:514f76d918fcc0b55c6680472f0a37970994e07bbb80725808c17089be302068"}, - {file = "pycodestyle-2.7.0.tar.gz", hash = "sha256:c389c1d06bf7904078ca03399a4816f974a1d590090fecea0c63ec26ebaf1cef"}, + {file = "pycodestyle-2.14.0-py2.py3-none-any.whl", hash = "sha256:dd6bf7cb4ee77f8e016f9c8e74a35ddd9f67e1d5fd4184d86c3b98e07099f42d"}, + {file = "pycodestyle-2.14.0.tar.gz", hash = "sha256:c4b5b517d278089ff9d0abdec919cd97262a3367449ea1c8b49b91529167b783"}, ] [[package]] @@ -805,13 +865,13 @@ toml = ["toml"] [[package]] name = "pyflakes" -version = "2.3.1" +version = "3.4.0" description = "passive checker of Python programs" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +python-versions = ">=3.9" files = [ - {file = "pyflakes-2.3.1-py2.py3-none-any.whl", hash = "sha256:7893783d01b8a89811dd72d7dfd4d84ff098e5eed95cfa8905b22bbffe52efc3"}, - {file = "pyflakes-2.3.1.tar.gz", hash = "sha256:f5bc8ecabc05bb9d291eb5203d6810b49040f6ff446a756326104746cc00c1db"}, + {file = "pyflakes-3.4.0-py2.py3-none-any.whl", hash = "sha256:f742a7dbd0d9cb9ea41e9a24a918996e8170c799fa528688d40dd582c8265f4f"}, + {file = "pyflakes-3.4.0.tar.gz", hash = "sha256:b24f96fafb7d2ab0ec5075b7350b3d2d2218eab42003821c06344973d3ea2f58"}, ] [[package]] @@ -828,20 +888,6 @@ files = [ [package.extras] plugins = ["importlib-metadata"] -[[package]] -name = "pyparsing" -version = "3.0.9" -description = "pyparsing module - Classes and methods to define and execute parsing grammars" -optional = false -python-versions = ">=3.6.8" -files = [ - {file = "pyparsing-3.0.9-py3-none-any.whl", hash = "sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc"}, - {file = "pyparsing-3.0.9.tar.gz", hash = "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb"}, -] - -[package.extras] -diagrams = ["jinja2", "railroad-diagrams"] - [[package]] name = "pyspark" version = "3.3.2" @@ -863,91 +909,59 @@ sql = ["pandas (>=1.0.5)", "pyarrow (>=1.0.0)"] [[package]] name = "pytest" -version = "6.2.5" +version = "8.4.2" description = "pytest: simple powerful testing with Python" optional = false -python-versions = ">=3.6" +python-versions = ">=3.9" files = [ - {file = "pytest-6.2.5-py3-none-any.whl", hash = "sha256:7310f8d27bc79ced999e760ca304d69f6ba6c6649c0b60fb0e04a4a77cacc134"}, - {file = "pytest-6.2.5.tar.gz", hash = "sha256:131b36680866a76e6781d13f101efb86cf674ebb9762eb70d3082b6f29889e89"}, + {file = "pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79"}, + {file = "pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01"}, ] [package.dependencies] -atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""} -attrs = ">=19.2.0" -colorama = {version = "*", markers = "sys_platform == \"win32\""} -iniconfig = "*" -packaging = "*" -pluggy = ">=0.12,<2.0" -py = ">=1.8.2" -toml = "*" +colorama = {version = ">=0.4", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1", markers = "python_version < \"3.11\""} +iniconfig = ">=1" +packaging = ">=20" +pluggy = ">=1.5,<2" +pygments = ">=2.7.2" +tomli = {version = ">=1", markers = "python_version < \"3.11\""} [package.extras] -testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xmlschema"] +dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "requests", "setuptools", "xmlschema"] [[package]] name = "pytest-cov" -version = "2.12.1" +version = "5.0.0" description = "Pytest plugin for measuring coverage." optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +python-versions = ">=3.8" files = [ - {file = "pytest-cov-2.12.1.tar.gz", hash = "sha256:261ceeb8c227b726249b376b8526b600f38667ee314f910353fa318caa01f4d7"}, - {file = "pytest_cov-2.12.1-py2.py3-none-any.whl", hash = "sha256:261bb9e47e65bd099c89c3edf92972865210c36813f80ede5277dceb77a4a62a"}, + {file = "pytest-cov-5.0.0.tar.gz", hash = "sha256:5837b58e9f6ebd335b0f8060eecce69b662415b16dc503883a02f45dfeb14857"}, + {file = "pytest_cov-5.0.0-py3-none-any.whl", hash = "sha256:4f0764a1219df53214206bf1feea4633c3b558a2925c8b59f144f682861ce652"}, ] [package.dependencies] -coverage = ">=5.2.1" +coverage = {version = ">=5.2.1", extras = ["toml"]} pytest = ">=4.6" -toml = "*" [package.extras] -testing = ["fields", "hunter", "process-tests", "pytest-xdist", "six", "virtualenv"] - -[[package]] -name = "pytest-flake8" -version = "1.1.0" -description = "pytest plugin to check FLAKE8 requirements" -optional = false -python-versions = "*" -files = [ - {file = "pytest-flake8-1.1.0.tar.gz", hash = "sha256:358d449ca06b80dbadcb43506cd3e38685d273b4968ac825da871bd4cc436202"}, - {file = "pytest_flake8-1.1.0-py2.py3-none-any.whl", hash = "sha256:f1b19dad0b9f0aa651d391c9527ebc20ac1a0f847aa78581094c747462bfa182"}, -] - -[package.dependencies] -flake8 = ">=3.5" -pytest = ">=3.5" +testing = ["fields", "hunter", "process-tests", "pytest-xdist", "virtualenv"] [[package]] name = "pytest-rerunfailures" -version = "9.1.1" +version = "14.0" description = "pytest plugin to re-run tests to eliminate flaky failures" optional = false -python-versions = ">=3.5" +python-versions = ">=3.8" files = [ - {file = "pytest-rerunfailures-9.1.1.tar.gz", hash = "sha256:1cb11a17fc121b3918414eb5eaf314ee325f2e693ac7cb3f6abf7560790827f2"}, - {file = "pytest_rerunfailures-9.1.1-py3-none-any.whl", hash = "sha256:2eb7d0ad651761fbe80e064b0fd415cf6730cdbc53c16a145fd84b66143e609f"}, + {file = "pytest-rerunfailures-14.0.tar.gz", hash = "sha256:4a400bcbcd3c7a4ad151ab8afac123d90eca3abe27f98725dc4d9702887d2e92"}, + {file = "pytest_rerunfailures-14.0-py3-none-any.whl", hash = "sha256:4197bdd2eaeffdbf50b5ea6e7236f47ff0e44d1def8dae08e409f536d84e7b32"}, ] [package.dependencies] -pytest = ">=5.0" -setuptools = ">=40.0" - -[[package]] -name = "pytest-runner" -version = "5.3.2" -description = "Invoke py.test as distutils command with dependency resolution" -optional = false -python-versions = ">=3.6" -files = [ - {file = "pytest-runner-5.3.2.tar.gz", hash = "sha256:48934ec94301f6727d30615af1960539ff62063f6c9b71b7227174e51ba5fb34"}, - {file = "pytest_runner-5.3.2-py3-none-any.whl", hash = "sha256:c7d785ea6c612396c11ddbaf467764d2cc746ef96a713fbe1a296c221503b7c3"}, -] - -[package.extras] -docs = ["jaraco.packaging (>=8.2)", "rst.linker (>=1.9)", "sphinx"] -testing = ["pytest (>=4.6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.0.1)", "pytest-flake8", "pytest-mypy", "pytest-virtualenv"] +packaging = ">=17.1" +pytest = ">=7.2" [[package]] name = "python-dateutil" @@ -1103,22 +1117,22 @@ files = [ idna2008 = ["idna"] [[package]] -name = "safety" -version = "1.10.3" -description = "Checks installed dependencies for known vulnerabilities." +name = "rich" +version = "15.0.0" +description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" optional = false -python-versions = ">=3.5" +python-versions = ">=3.9.0" files = [ - {file = "safety-1.10.3-py2.py3-none-any.whl", hash = "sha256:5f802ad5df5614f9622d8d71fedec2757099705c2356f862847c58c6dfe13e84"}, - {file = "safety-1.10.3.tar.gz", hash = "sha256:30e394d02a20ac49b7f65292d19d38fa927a8f9582cdfd3ad1adbbc66c641ad5"}, + {file = "rich-15.0.0-py3-none-any.whl", hash = "sha256:33bd4ef74232fb73fe9279a257718407f169c09b78a87ad3d296f548e27de0bb"}, + {file = "rich-15.0.0.tar.gz", hash = "sha256:edd07a4824c6b40189fb7ac9bc4c52536e9780fbbfbddf6f1e2502c31b068c36"}, ] [package.dependencies] -Click = ">=6.0" -dparse = ">=0.5.1" -packaging = "*" -requests = "*" -setuptools = "*" +markdown-it-py = ">=2.2.0" +pygments = ">=2.13.0,<3.0.0" + +[package.extras] +jupyter = ["ipywidgets (>=7.5.1,<9)"] [[package]] name = "secretstorage" @@ -1172,17 +1186,6 @@ files = [ {file = "snowballstemmer-2.2.0.tar.gz", hash = "sha256:09b16deb8547d3412ad7b590689584cd0fe25ec8db3be37788be3810cbf19cb1"}, ] -[[package]] -name = "toml" -version = "0.10.2" -description = "Python Library for Tom's Obvious, Minimal Language" -optional = false -python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" -files = [ - {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, - {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, -] - [[package]] name = "tomli" version = "1.2.3" @@ -1194,47 +1197,26 @@ files = [ {file = "tomli-1.2.3.tar.gz", hash = "sha256:05b6166bff487dc068d322585c7ea4ef78deed501cc124060e0f238e89a9231f"}, ] -[[package]] -name = "tqdm" -version = "4.64.1" -description = "Fast, Extensible Progress Meter" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7" -files = [ - {file = "tqdm-4.64.1-py2.py3-none-any.whl", hash = "sha256:6fee160d6ffcd1b1c68c65f14c829c22832bc401726335ce92c52d395944a6a1"}, - {file = "tqdm-4.64.1.tar.gz", hash = "sha256:5f4f682a004951c1b450bc753c710e9280c5746ce6ffedee253ddbcbf54cf1e4"}, -] - -[package.dependencies] -colorama = {version = "*", markers = "platform_system == \"Windows\""} - -[package.extras] -dev = ["py-make (>=0.1.0)", "twine", "wheel"] -notebook = ["ipywidgets (>=6)"] -slack = ["slack-sdk"] -telegram = ["requests"] - [[package]] name = "twine" -version = "3.8.0" +version = "5.1.1" description = "Collection of utilities for publishing packages on PyPI" optional = false -python-versions = ">=3.6" +python-versions = ">=3.8" files = [ - {file = "twine-3.8.0-py3-none-any.whl", hash = "sha256:d0550fca9dc19f3d5e8eadfce0c227294df0a2a951251a4385797c8a6198b7c8"}, - {file = "twine-3.8.0.tar.gz", hash = "sha256:8efa52658e0ae770686a13b675569328f1fba9837e5de1867bfe5f46a9aefe19"}, + {file = "twine-5.1.1-py3-none-any.whl", hash = "sha256:215dbe7b4b94c2c50a7315c0275d2258399280fbb7d04182c7e55e24b5f93997"}, + {file = "twine-5.1.1.tar.gz", hash = "sha256:9aa0825139c02b3434d913545c7b847a21c835e11597f5255842d457da2322db"}, ] [package.dependencies] -colorama = ">=0.4.3" importlib-metadata = ">=3.6" keyring = ">=15.1" -pkginfo = ">=1.8.1" -readme-renderer = ">=21.0" +pkginfo = ">=1.8.1,<1.11" +readme-renderer = ">=35.0" requests = ">=2.20" requests-toolbelt = ">=0.8.0,<0.9.0 || >0.9.0" rfc3986 = ">=1.4.0" -tqdm = ">=4.14" +rich = ">=12.0.0" urllib3 = ">=1.26.0" [[package]] @@ -1315,5 +1297,5 @@ pyspark = ["pyspark"] [metadata] lock-version = "2.0" -python-versions = ">=3.8,<4" -content-hash = "616e9d9c99206a718797187683520c788474e89486e6549d92eeed712ff6ae55" +python-versions = ">=3.9,<4" +content-hash = "9ca1d776b68eead781bf0161f4f2ee552085c8e201b482b6f78d18c06a2a5d79" diff --git a/pydeequ/verification.py b/pydeequ/verification.py index c164246..38da74f 100644 --- a/pydeequ/verification.py +++ b/pydeequ/verification.py @@ -143,6 +143,34 @@ def checkResultsAsDataFrame( ) return DataFrame(df, sql_ctx).toPandas() if pandas else DataFrame(df, sql_ctx) + @classmethod + def rowLevelResultsAsDataFrame( + cls, spark_session: SparkSession, verificationResult, data: DataFrame, pandas: bool = False + ): + """ + Returns the original DataFrame with additional Boolean columns indicating which rows + passed or failed each Check. Each Check produces one Boolean column named after its + description, where multiple constraints within a Check are ANDed together. + + Only checks with row-level-capable constraints (e.g., isComplete, hasPattern, isContainedIn, + isUnique) will produce output columns. Aggregate-only checks (e.g., hasSize) are skipped. + + :param SparkSession spark_session: SparkSession + :param verificationResult: The results of the verification run + :param DataFrame data: The original input DataFrame that was verified + :param bool pandas: If True, return a Pandas DataFrame instead of PySpark + :return: DataFrame with original columns plus Boolean columns per qualifying Check + """ + df = spark_session._jvm.com.amazon.deequ.VerificationResult.rowLevelResultsAsDataFrame( + spark_session._jsparkSession, verificationResult.verificationRun, data._jdf + ) + sql_ctx = SQLContext( + sparkContext=spark_session._sc, + sparkSession=spark_session, + jsqlContext=spark_session._jsparkSession.sqlContext(), + ) + return DataFrame(df, sql_ctx).toPandas() if pandas else DataFrame(df, sql_ctx) + class VerificationRunBuilder: # TODO Remaining Methods diff --git a/pyproject.toml b/pyproject.toml index 4120233..e6755d1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,24 +28,21 @@ classifiers = [ [tool.poetry.dependencies] -python = ">=3.8,<4" +python = ">=3.9,<4" numpy = ">=1.14.1" pandas = ">=0.23.0" pyspark = { version = ">=2.4.7,<4.0.0", optional = true } [tool.poetry.dev-dependencies] -pytest = "^6.2.4" -pytest-cov = "^2.11.1" -coverage = "^5.5" -pytest-runner = "^5.3.0" -black = "^21.5b1" -flake8 = "^3.9.2" -flake8-docstrings = "^1.6.0" -pytest-flake8 = "^1.0.7" -pre-commit = "^2.12.1" -pytest-rerunfailures = "^9.1.1" -twine = "^3.4.1" -safety = "^1.10.3" +pytest = "^8.0" +pytest-cov = "^5.0" +coverage = "^7.0" +black = "^24.0" +flake8 = "^7.0" +flake8-docstrings = "^1.7" +pre-commit = "^3.5" +pytest-rerunfailures = "^14.0" +twine = "^5.0" [tool.poetry.extras] pyspark = ["pyspark"] @@ -62,7 +59,7 @@ include_trailing_comma = true force_grid_wrap = 0 use_parentheses = true ensure_newline_before_comments = true -target_version = ['py38'] +target_version = ['py39'] include = '\.pyi?$' exclude = ''' /( diff --git a/scripts/issue_bot/config.py b/scripts/issue_bot/config.py index b6fdb0c..5fff510 100644 --- a/scripts/issue_bot/config.py +++ b/scripts/issue_bot/config.py @@ -17,6 +17,8 @@ def __init__(self): sys.exit(1) self.repo = _require("GITHUB_REPOSITORY") self.actor = os.getenv("GITHUB_ACTOR", "") + self.event_before = os.getenv("EVENT_BEFORE", "") + self.event_after = os.getenv("EVENT_AFTER", "") self.bedrock_model_id = os.getenv("BEDROCK_MODEL_ID", "us.anthropic.claude-opus-4-6-v1") @@ -32,14 +34,16 @@ def __init__(self): self.enable_repo_search = os.getenv("ENABLE_REPO_SEARCH", "true").lower() == "true" self.upstream_repo = os.getenv("UPSTREAM_REPO", "awslabs/python-deequ") + self.codebase_src_dir = os.getenv("CODEBASE_SRC_DIR", "pydeequ") + self.codebase_file_ext = os.getenv("CODEBASE_FILE_EXT", ".py") - self.bedrock_timeout = 120 - self.max_context_chars = 200000 + self.bedrock_timeout = 240 + self.max_context_chars = 800000 self.max_github_search_results = 8 self.github_api_timeout = 10 self.allowed_labels = { "bug", "enhancement", "question", "documentation", - "help-wanted", "analyzer", "check", "spark-compatibility", "installation", + "help wanted", "python", } diff --git a/scripts/issue_bot/github_client.py b/scripts/issue_bot/github_client.py index 82d0ed2..5c2d046 100644 --- a/scripts/issue_bot/github_client.py +++ b/scripts/issue_bot/github_client.py @@ -11,6 +11,7 @@ def __init__(self, cfg): self._repo = cfg.repo self._timeout = cfg.github_api_timeout self._dry_run = cfg.dry_run + self._cfg = cfg self._repo_root = os.getenv("GITHUB_WORKSPACE", os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))) self._headers = { "Authorization": f"token {self._token}", @@ -48,6 +49,59 @@ def get_pr_diff(self, number): logger.error(f"PR diff fetch failed: {e}") return "" + def get_compare_diff(self, base_sha, head_sha): + """Fetch the diff between two commits using the Compare API. + Returns the diff text, or empty string on failure (e.g. force-push + where base_sha no longer exists).""" + headers = {**self._headers, "Accept": "application/vnd.github.v3.diff"} + try: + resp = requests.get( + f"https://api.github.com/repos/{self._repo}/compare/{base_sha}...{head_sha}", + headers=headers, timeout=self._timeout, + ) + if resp.status_code == 200: + return resp.text + logger.warning(f"Compare API {base_sha[:7]}...{head_sha[:7]}: {resp.status_code}") + return "" + except Exception as e: + logger.error(f"Compare diff failed: {e}") + return "" + + def get_ci_status(self, sha): + """Check commit statuses and check runs. Returns (passed, summary). + passed: True (all green), False (something failed), None (pending/unknown).""" + status = self._get(f"/repos/{self._repo}/commits/{sha}/status") + if status is None: + return None, "CI status unavailable" + combined_state = status.get("state", "pending") + + check_data = self._get(f"/repos/{self._repo}/commits/{sha}/check-runs") + runs = check_data.get("check_runs", []) if check_data else [] + + def _is_own_check(name): + lower = name.lower() + return "bot" in lower and ("analyze" in lower or "/ act" in lower) + + external_runs = [r for r in runs if not _is_own_check(r.get("name", ""))] + + failed = [] + pending = [] + for r in external_runs: + if r.get("status") != "completed": + pending.append(r["name"]) + elif r.get("conclusion") not in ("success", "neutral", "skipped"): + failed.append(r["name"]) + + if failed: + return False, f"CI failing: {', '.join(failed)}" + if pending: + return None, f"CI pending: {', '.join(pending)}" + if combined_state == "failure": + return False, "CI failing (status checks)" + if combined_state == "pending": + return None, "CI pending (status checks)" + return True, "CI passed" + def get_pr_files(self, number): return self._get(f"/repos/{self._repo}/pulls/{number}/files") or [] @@ -64,16 +118,18 @@ def get_pr_review_comments(self, number, max_pages=10): page += 1 return comments - def get_codebase_map(self, src_dir="pydeequ"): - """List all Python source files (excluding tests) as relative paths.""" + def get_codebase_map(self): + """List source files (excluding tests) as relative paths.""" + src_dir = self._cfg.codebase_src_dir + file_ext = self._cfg.codebase_file_ext full_dir = os.path.join(self._repo_root, src_dir) prefix = self._repo_root.rstrip("/") + "/" try: paths = [] for root, dirs, files in os.walk(full_dir): - dirs[:] = [d for d in dirs if d not in ("tests", "__pycache__", ".git")] + dirs[:] = [d for d in dirs if d not in ("examples", "__pycache__", ".git", "tests", "test")] for f in files: - if f.endswith(".py"): + if f.endswith(file_ext): full = os.path.join(root, f) rel = full[len(prefix):] if full.startswith(prefix) else full paths.append(rel) @@ -116,9 +172,9 @@ def post_comment(self, number, body): return True return self._post(f"/repos/{self._repo}/issues/{number}/comments", {"body": body}) - def post_pr_review(self, number, summary, inline_comments): + def post_pr_review(self, number, summary, inline_comments, event="COMMENT"): if self._dry_run: - logger.info(f"[DRY RUN] PR review on #{number}: {len(inline_comments)} inline comments") + logger.info(f"[DRY RUN] PR review on #{number}: {len(inline_comments)} inline comments, event={event}") return True # Get valid diff lines per file from the PR @@ -134,31 +190,34 @@ def post_pr_review(self, number, summary, inline_comments): else: invalid_comments.append(ic) + body = summary + if invalid_comments: + body += "\n\n**Additional feedback:**\n" + for ic in invalid_comments: + line_ref = f":{ic['line']}" if ic.get('line') else "" + body += f"\n`{ic['file']}{line_ref}` — {ic['comment']}\n" + + payload = {"body": body, "event": event} if valid_comments: - body = summary - if invalid_comments: - body += "\n\n**Additional feedback:**\n" - for ic in invalid_comments: - line_ref = f":{ic['line']}" if ic.get('line') else "" - body += f"\n`{ic['file']}{line_ref}` — {ic['comment']}\n" - payload = {"body": body, "event": "REQUEST_CHANGES", "comments": valid_comments} - try: - resp = requests.post( - f"https://api.github.com/repos/{self._repo}/pulls/{number}/reviews", - headers=self._headers, json=payload, timeout=self._timeout, - ) - if resp.status_code in (200, 201): - return True - logger.error(f"PR review API failed: {resp.status_code}, falling back to comment") - except Exception as e: - logger.error(f"PR review API failed: {e}, falling back to comment") - - # Fallback: post all as regular comment - all_comments = inline_comments + payload["comments"] = valid_comments + + try: + resp = requests.post( + f"https://api.github.com/repos/{self._repo}/pulls/{number}/reviews", + headers=self._headers, json=payload, timeout=self._timeout, + ) + if resp.status_code in (200, 201): + return True + logger.error(f"PR review API failed: {resp.status_code}, falling back to comment") + logger.error(f"Response: {resp.text[:500]}") + except Exception as e: + logger.error(f"PR review API failed: {e}, falling back to comment") + + # Fallback: post as regular comment if review API fails body = summary - if all_comments: + if inline_comments: body += "\n\n**Inline feedback:**\n" - for ic in all_comments: + for ic in inline_comments: line_ref = f":{ic['line']}" if ic.get('line') else "" body += f"\n`{ic['file']}{line_ref}` — {ic['comment']}\n" return self._post(f"/repos/{self._repo}/issues/{number}/comments", {"body": body}) diff --git a/scripts/issue_bot/main.py b/scripts/issue_bot/main.py index 77f5ef7..593eaa8 100644 --- a/scripts/issue_bot/main.py +++ b/scripts/issue_bot/main.py @@ -6,6 +6,7 @@ """ import json +import re import sys import os import datetime @@ -149,17 +150,57 @@ def analyze(): diff = gh.get_pr_diff(number) review_comments = gh.get_pr_review_comments(number) existing_feedback = _format_pr_feedback(comments_data, review_comments) + + # Incremental review: on synchronize, compute what changed since last push + incremental_diff = "" + incremental_files = set() + if is_pr_update and cfg.event_before and cfg.event_after: + incremental_diff = gh.get_compare_diff(cfg.event_before, cfg.event_after) + if incremental_diff: + incremental_files = _extract_diff_files(incremental_diff) + + # Fetch full source files at the SHA the diff is anchored to + head_sha = cfg.event_after or item.get("head", {}).get("sha", "") + pr_files = gh.get_pr_files(number) + full_sources = "" + for pf in pr_files: + fname = pf.get("filename", "") + content = gh.get_file_content(fname, ref=head_sha) if head_sha else gh.get_file_content(fname) + if content: + entry = f"\n### `{fname}`\n```\n{content}\n```\n" + if len(full_sources) + len(entry) > 3_000_000: + full_sources += f"\n### `{fname}` — SKIPPED (context budget)\n" + break + full_sources += entry + + # Build incremental review instructions + incremental_section = "" + if incremental_diff: + incremental_section = ( + "\n\n" + "This is a RE-REVIEW after the author pushed new commits. " + "The below shows ONLY what changed since the last push. " + "You MUST limit your comments to lines/files in the incremental diff. " + "Do NOT re-raise issues on unchanged code — the author already saw prior feedback. " + "Do NOT comment on lines that are not part of the incremental diff. " + "If the incremental diff only fixes issues from prior feedback, respond with zero comments." + "\n\n" + f"\n{incremental_diff}\n\n" + ) + # System prompt: instructions + all trusted context (not scanned by guardrail) system_prompt = _render(tmpl, current_date=datetime.date.today().isoformat()) + ( f"\n\n\n{context}\n\n" f"\n{codebase_map}\n\n" + f"\n{full_sources}\n\n" f"\n{diff}\n\n" - f"\n{existing_feedback}\n" + f"\n{existing_feedback}\n\n" + f"{incremental_section}" ) # User prompt: only user-authored content (scanned by guardrail) user_prompt = f"\nTitle: {title}\nBody: {body}\n" raw = bedrock.invoke(system_prompt, user_prompt, - max_tokens=4000, json_schema=PR_REVIEW_SCHEMA) + max_tokens=8000, json_schema=PR_REVIEW_SCHEMA) if raw is None: _write_artifact({ "action": "ESCALATE", "reason": "bedrock_unavailable", "title": title, @@ -172,14 +213,50 @@ def analyze(): inline_comments = pr_result.get("comments", []) except json.JSONDecodeError: inline_comments = _parse_file_review_multi(raw) + + # Hard filter: on incremental review, drop comments on files not in the incremental diff + if incremental_files and inline_comments: + inline_comments = [ + c for c in inline_comments + if c.get("file", "") in incremental_files + ] + + # Hard filter: drop NITs on re-reviews (code-enforced, not prompt-dependent) + if is_pr_update and inline_comments: + inline_comments = [ + c for c in inline_comments + if c.get("severity", "").upper() != "NIT" + ] + + # Format comments: prepend severity, append evidence as context + for c in inline_comments: + severity = c.get("severity", "") + evidence = c.get("evidence", "") + prefix = f"**{severity}**: " if severity else "" + suffix = "\n\n> " + evidence.replace("\n", "\n> ") if evidence else "" + c["comment"] = prefix + c.get("comment", "") + suffix + + # Check CI status to give accurate signal to human reviewers + ci_passed, ci_summary = gh.get_ci_status(head_sha) if head_sha else (None, "") + + if not inline_comments: + if ci_passed is True: + response = "No issues found. CI is passing.\n" + elif ci_passed is False: + response = f"No code issues found, but {ci_summary}." + else: + response = "No issues found.\n" + else: + response = "" + _write_artifact({ "action": "RESPOND", - "labels": [], "response": "No issues found." if not inline_comments else "", + "labels": [], "response": response, "inline_comments": inline_comments, "title": title, "html_url": html_url, "number": number, - "is_pr": True, "prompt_id": prompts.prompt_version(tmpl), + "is_pr": True, "is_incremental": bool(incremental_diff), + "prompt_id": prompts.prompt_version(tmpl), "model_id": cfg.bedrock_model_id, - "reason": "no_issues_found" if not inline_comments else "", }) return @@ -308,7 +385,11 @@ def act(): sanitized_comments.append({**ic, "comment": safe_comment}) inline_comments = sanitized_comments if is_pr and inline_comments: - gh.post_pr_review(number, response + footer, inline_comments) + gh.post_pr_review(number, response + footer, inline_comments, event="COMMENT") + elif is_pr and response and not inline_comments: + gh.post_pr_review(number, response + footer, [], event="COMMENT") + elif not response and not inline_comments: + logger.info(f"Skip #{number}: nothing to post after sanitization") else: gh.post_comment(number, response + footer) gh.add_labels(number, labels) @@ -354,7 +435,7 @@ def act(): elif action == "CLOSE" and not is_pr: msg = ( - "This issue may not be related to the PyDeequ data quality library. " + "This issue may not be related to the PyDeequ library. " "The maintainer team has been notified and will review." + footer ) gh.post_comment(number, msg) @@ -584,6 +665,16 @@ def _format_pr_feedback(issue_comments, review_comments): return "\n".join(parts) if parts else "(no existing feedback)" +def _extract_diff_files(diff_text): + """Extract the set of file paths touched in a unified diff.""" + files = set() + for line in diff_text.split("\n"): + m = re.match(r'^diff --git a/.+ b/(.+)$', line) + if m: + files.add(m.group(1)) + return files + + def _read_requested_files(gh, file_paths, cfg): snippets = [] for path in file_paths[:cfg.max_github_search_results]: diff --git a/scripts/issue_bot/prompts.py b/scripts/issue_bot/prompts.py index e2ff385..9fc97ea 100644 --- a/scripts/issue_bot/prompts.py +++ b/scripts/issue_bot/prompts.py @@ -1,21 +1,53 @@ import hashlib +import logging import os +import boto3 + +logger = logging.getLogger("issue_bot") + +_sm_client = None + + +def _get_sm_client(): + global _sm_client + if _sm_client is None: + _sm_client = boto3.client("secretsmanager") + return _sm_client + + +def _read_from_sm(secret_id): + if not secret_id: + return "" + try: + resp = _get_sm_client().get_secret_value(SecretId=secret_id) + return resp["SecretString"] + except Exception as e: + logger.error("Failed to read prompt from Secrets Manager: %s", type(e).__name__) + return "" + + +def _get_prompt(env_var, sm_env_var): + val = os.getenv(env_var, "") + if val: + return val + return _read_from_sm(os.getenv(sm_env_var, "")) + def get_issue_prompt(): - return os.getenv("ISSUE_CLASSIFY_PROMPT", "") + return _get_prompt("ISSUE_CLASSIFY_PROMPT", "SM_ISSUE_CLASSIFY_PROMPT") def get_issue_respond_prompt(): - return os.getenv("ISSUE_RESPOND_PROMPT", "") + return _get_prompt("ISSUE_RESPOND_PROMPT", "SM_ISSUE_RESPOND_PROMPT") def get_pr_file_review_prompt(): - return os.getenv("PR_FILE_REVIEW_PROMPT", "") + return _get_prompt("PR_FILE_REVIEW_PROMPT", "SM_PR_FILE_REVIEW_PROMPT") def get_followup_prompt(): - return os.getenv("FOLLOWUP_PROMPT", "") + return _get_prompt("FOLLOWUP_PROMPT", "SM_FOLLOWUP_PROMPT") def prompt_version(template): diff --git a/scripts/issue_bot/schemas/pr_review_response.json b/scripts/issue_bot/schemas/pr_review_response.json index ef22e07..1901a95 100644 --- a/scripts/issue_bot/schemas/pr_review_response.json +++ b/scripts/issue_bot/schemas/pr_review_response.json @@ -12,14 +12,23 @@ "line": { "type": "integer" }, + "severity": { + "type": "string", + "enum": ["BUG", "EDGE_CASE", "MISSING_TEST", "DESIGN", "NIT"] + }, "comment": { "type": "string" + }, + "evidence": { + "type": "string" } }, "required": [ "file", "line", - "comment" + "severity", + "comment", + "evidence" ], "additionalProperties": false } @@ -29,4 +38,4 @@ "comments" ], "additionalProperties": false -} \ No newline at end of file +} diff --git a/tests/test_bot.py b/tests/test_bot.py index 77992dc..90f0f66 100644 --- a/tests/test_bot.py +++ b/tests/test_bot.py @@ -17,6 +17,7 @@ _user_dissatisfied, _clean_response, _render, + _extract_diff_files, ) from issue_bot.sanitizer import sanitize, _fix_accidental_issue_refs @@ -203,6 +204,208 @@ def test_preserves_normal_text(self): assert _clean_response(text) == text +class TestGetCiStatus: + """Tests for GitHubClient.get_ci_status method.""" + + def _make_client(self): + import unittest.mock as mock + with mock.patch.dict(os.environ, { + "GITHUB_TOKEN": "fake", "GITHUB_REPOSITORY": "awslabs/test", + "ISSUE_NUMBER": "1", "EVENT_TYPE": "issues", "EVENT_ACTION": "opened", + "GITHUB_WORKFLOW": "PyDeequ Bot", + }): + from issue_bot.config import Config + from issue_bot.github_client import GitHubClient + cfg = Config() + client = GitHubClient(cfg) + return client + + def test_all_checks_passed(self): + import unittest.mock as mock + client = self._make_client() + client._get = mock.MagicMock(side_effect=[ + {"state": "success"}, # commit status + {"check_runs": [ + {"name": "Java CI", "status": "completed", "conclusion": "success"}, + {"name": "CodeQL", "status": "completed", "conclusion": "success"}, + ]}, + ]) + passed, summary = client.get_ci_status("abc123") + assert passed is True + assert "passed" in summary.lower() + + def test_check_run_failed(self): + import unittest.mock as mock + client = self._make_client() + client._get = mock.MagicMock(side_effect=[ + {"state": "success"}, + {"check_runs": [ + {"name": "Java CI", "status": "completed", "conclusion": "failure"}, + {"name": "CodeQL", "status": "completed", "conclusion": "success"}, + ]}, + ]) + passed, summary = client.get_ci_status("abc123") + assert passed is False + assert "Java CI" in summary + + def test_check_run_pending(self): + import unittest.mock as mock + client = self._make_client() + client._get = mock.MagicMock(side_effect=[ + {"state": "pending"}, + {"check_runs": [ + {"name": "Java CI", "status": "in_progress", "conclusion": None}, + ]}, + ]) + passed, summary = client.get_ci_status("abc123") + assert passed is None + assert "pending" in summary.lower() + + def test_bot_check_filtered_out(self): + import unittest.mock as mock + client = self._make_client() + client._get = mock.MagicMock(side_effect=[ + {"state": "success"}, + {"check_runs": [ + {"name": "Java CI", "status": "completed", "conclusion": "success"}, + {"name": "PyDeequ Bot / analyze", "status": "completed", "conclusion": "success"}, + {"name": "PyDeequ Bot / act", "status": "completed", "conclusion": "success"}, + ]}, + ]) + passed, _ = client.get_ci_status("abc123") + assert passed is True + + def test_non_bot_check_with_bot_in_name_not_filtered(self): + import unittest.mock as mock + client = self._make_client() + client._get = mock.MagicMock(side_effect=[ + {"state": "success"}, + {"check_runs": [ + {"name": "robot-tests", "status": "completed", "conclusion": "failure"}, + ]}, + ]) + passed, _ = client.get_ci_status("abc123") + assert passed is False + + def test_skipped_and_neutral_count_as_passed(self): + import unittest.mock as mock + client = self._make_client() + client._get = mock.MagicMock(side_effect=[ + {"state": "success"}, + {"check_runs": [ + {"name": "Optional Check", "status": "completed", "conclusion": "skipped"}, + {"name": "Info Check", "status": "completed", "conclusion": "neutral"}, + ]}, + ]) + passed, _ = client.get_ci_status("abc123") + assert passed is True + + def test_api_failure_returns_unknown(self): + import unittest.mock as mock + client = self._make_client() + client._get = mock.MagicMock(return_value=None) + passed, summary = client.get_ci_status("abc123") + assert passed is None + + +class TestAutoApproveSignal: + """Tests that bot posts the correct signal for the auto-approve workflow to act on.""" + + def _make_artifact(self, tmp_path, response, inline_comments=None): + artifact = { + "action": "RESPOND", + "labels": [], + "response": response, + "inline_comments": inline_comments or [], + "title": "Fix", "html_url": "https://github.com/x", + "number": 42, "is_pr": True, "is_incremental": False, + "prompt_id": "abc123", "model_id": "test", + } + path = str(tmp_path / "result.json") + with open(path, "w") as f: + json.dump(artifact, f) + return path + + def test_no_issues_posts_pr_review_with_signal(self, tmp_path, monkeypatch): + """Bot posts 'No issues found' as a PR review — auto-approve.yml looks for this in listReviews.""" + import unittest.mock as mock + path = self._make_artifact(tmp_path, response="No issues found. CI is passing.") + monkeypatch.setenv("GITHUB_TOKEN", "fake") + monkeypatch.setenv("GITHUB_REPOSITORY", "awslabs/test") + monkeypatch.setenv("ISSUE_NUMBER", "42") + monkeypatch.setenv("EVENT_TYPE", "pull_request_target") + monkeypatch.setenv("EVENT_ACTION", "opened") + import issue_bot.main as bot_main + monkeypatch.setattr(bot_main, "ARTIFACT_PATH", path) + + with mock.patch("issue_bot.github_client.GitHubClient.post_pr_review") as mock_review, \ + mock.patch("issue_bot.github_client.GitHubClient.post_comment") as mock_comment, \ + mock.patch("issue_bot.github_client.GitHubClient.add_labels"), \ + mock.patch("issue_bot.slack_client.SlackClient.send_escalation"): + mock_review.return_value = True + bot_main.act() + mock_review.assert_called_once() + mock_comment.assert_not_called() + body = mock_review.call_args[0][1] + assert "No issues found" in body + + def test_with_issues_posts_review_not_comment(self, tmp_path, monkeypatch): + """Bot posts inline review when there are issues — no approve signal.""" + import unittest.mock as mock + path = self._make_artifact(tmp_path, response="", + inline_comments=[{"file": "a.py", "line": 1, "comment": "BUG: issue"}]) + monkeypatch.setenv("GITHUB_TOKEN", "fake") + monkeypatch.setenv("GITHUB_REPOSITORY", "awslabs/test") + monkeypatch.setenv("ISSUE_NUMBER", "42") + monkeypatch.setenv("EVENT_TYPE", "pull_request_target") + monkeypatch.setenv("EVENT_ACTION", "opened") + import issue_bot.main as bot_main + monkeypatch.setattr(bot_main, "ARTIFACT_PATH", path) + + with mock.patch("issue_bot.github_client.GitHubClient.post_pr_review") as mock_review, \ + mock.patch("issue_bot.github_client.GitHubClient.post_comment") as mock_comment, \ + mock.patch("issue_bot.github_client.GitHubClient.add_labels"), \ + mock.patch("issue_bot.slack_client.SlackClient.send_escalation"): + mock_review.return_value = True + bot_main.act() + mock_review.assert_called_once() + mock_comment.assert_not_called() + + +class TestPrompts: + def test_env_var_takes_precedence(self, monkeypatch): + monkeypatch.setenv("PR_FILE_REVIEW_PROMPT", "from env") + monkeypatch.setenv("SM_PR_FILE_REVIEW_PROMPT", "deequ-bot/pr-file-review-prompt") + from issue_bot.prompts import get_pr_file_review_prompt + assert get_pr_file_review_prompt() == "from env" + + def test_empty_env_var_falls_through_to_sm(self, monkeypatch): + import unittest.mock as mock + monkeypatch.setenv("PR_FILE_REVIEW_PROMPT", "") + monkeypatch.setenv("SM_PR_FILE_REVIEW_PROMPT", "deequ-bot/pr-file-review-prompt") + with mock.patch("issue_bot.prompts._read_from_sm", return_value="from sm") as m: + from issue_bot.prompts import get_pr_file_review_prompt + result = get_pr_file_review_prompt() + assert result == "from sm" + m.assert_called_once_with("deequ-bot/pr-file-review-prompt") + + def test_no_sm_env_var_returns_empty(self, monkeypatch): + monkeypatch.setenv("PR_FILE_REVIEW_PROMPT", "") + monkeypatch.setenv("SM_PR_FILE_REVIEW_PROMPT", "") + from issue_bot.prompts import get_pr_file_review_prompt + # No env var, no SM secret name → empty string + assert get_pr_file_review_prompt() == "" + + def test_sm_failure_returns_empty(self, monkeypatch): + import unittest.mock as mock + monkeypatch.setenv("FOLLOWUP_PROMPT", "") + monkeypatch.setenv("SM_FOLLOWUP_PROMPT", "deequ-bot/followup-prompt") + with mock.patch("issue_bot.prompts._get_sm_client") as mock_client: + mock_client.return_value.get_secret_value.side_effect = Exception("timeout") + from issue_bot.prompts import get_followup_prompt + assert get_followup_prompt() == "" + + class TestSmoke: def test_main_module_imports(self): from issue_bot import main @@ -320,3 +523,588 @@ def test_no_guardrail_no_config(self): client.invoke("system", "user") kwargs = client._client.converse.call_args[1] assert "guardrailConfig" not in kwargs + + +class TestExtractDiffFiles: + def test_single_file(self): + diff = ( + "diff --git a/src/foo.py b/src/foo.py\n" + "index abc1234..def5678 100644\n" + "--- a/src/foo.py\n" + "+++ b/src/foo.py\n" + "@@ -1,3 +1,4 @@\n" + "+new line\n" + ) + assert _extract_diff_files(diff) == {"src/foo.py"} + + def test_multiple_files(self): + diff = ( + "diff --git a/a.py b/a.py\n" + "--- a/a.py\n" + "+++ b/a.py\n" + "@@ -1 +1 @@\n" + "-old\n" + "+new\n" + "diff --git a/b.py b/b.py\n" + "--- a/b.py\n" + "+++ b/b.py\n" + "@@ -1 +1 @@\n" + "-old\n" + "+new\n" + ) + assert _extract_diff_files(diff) == {"a.py", "b.py"} + + def test_empty_diff(self): + assert _extract_diff_files("") == set() + + def test_renamed_file(self): + diff = "diff --git a/old_name.py b/new_name.py\n" + assert _extract_diff_files(diff) == {"new_name.py"} + + def test_path_with_spaces(self): + diff = "diff --git a/path with spaces/file.py b/path with spaces/file.py\n" + assert _extract_diff_files(diff) == {"path with spaces/file.py"} + + +class TestIncrementalFiltering: + """Test that the incremental file filter drops comments on unrelated files.""" + + def test_comments_filtered_to_incremental_files(self): + incremental_files = {"src/changed.py"} + inline_comments = [ + {"file": "src/changed.py", "line": 10, "comment": "new issue"}, + {"file": "src/untouched.py", "line": 5, "comment": "old issue re-raised"}, + ] + filtered = [c for c in inline_comments if c.get("file", "") in incremental_files] + assert len(filtered) == 1 + assert filtered[0]["file"] == "src/changed.py" + + def test_empty_incremental_files_passes_all(self): + incremental_files = set() + inline_comments = [ + {"file": "src/any.py", "line": 1, "comment": "comment"}, + ] + # When incremental_files is empty (fallback to full review), no filtering + if incremental_files: + filtered = [c for c in inline_comments if c.get("file", "") in incremental_files] + else: + filtered = inline_comments + assert len(filtered) == 1 + + def test_all_comments_filtered_yields_empty(self): + incremental_files = {"src/only_this.py"} + inline_comments = [ + {"file": "src/other.py", "line": 1, "comment": "stale"}, + {"file": "src/another.py", "line": 2, "comment": "stale too"}, + ] + filtered = [c for c in inline_comments if c.get("file", "") in incremental_files] + assert filtered == [] + + +class TestNitFilterAndFormatting: + """Tests for hard NIT filter on re-reviews and evidence formatting.""" + + def test_nits_dropped_on_re_review(self, tmp_path, monkeypatch): + import unittest.mock as mock + monkeypatch.setenv("GITHUB_TOKEN", "fake") + monkeypatch.setenv("GITHUB_REPOSITORY", "awslabs/test") + monkeypatch.setenv("ISSUE_NUMBER", "10") + monkeypatch.setenv("EVENT_TYPE", "pull_request_target") + monkeypatch.setenv("EVENT_ACTION", "synchronize") + monkeypatch.setenv("EVENT_BEFORE", "aaa") + monkeypatch.setenv("EVENT_AFTER", "bbb") + monkeypatch.setenv("KB_S3_BUCKET", "") + monkeypatch.setenv("KB_S3_KEY", "") + monkeypatch.setenv("PR_FILE_REVIEW_PROMPT", "Review. Date: {current_date}") + import issue_bot.main as bot_main + monkeypatch.setattr(bot_main, "ARTIFACT_PATH", str(tmp_path / "result.json")) + + incremental = "diff --git a/f.py b/f.py\n--- a/f.py\n+++ b/f.py\n@@ -1 +1 @@\n-x\n+y\n" + + with mock.patch("issue_bot.github_client.GitHubClient.get_pr") as mock_pr, \ + mock.patch("issue_bot.github_client.GitHubClient.get_comments") as mock_comments, \ + mock.patch("issue_bot.github_client.GitHubClient.get_pr_diff"), \ + mock.patch("issue_bot.github_client.GitHubClient.get_pr_review_comments") as mock_rc, \ + mock.patch("issue_bot.github_client.GitHubClient.get_compare_diff") as mock_compare, \ + mock.patch("issue_bot.github_client.GitHubClient.get_pr_files") as mock_files, \ + mock.patch("issue_bot.github_client.GitHubClient.get_file_content") as mock_content, \ + mock.patch("issue_bot.github_client.GitHubClient.get_codebase_map"), \ + mock.patch("issue_bot.github_client.GitHubClient.get_ci_status") as mock_ci, \ + mock.patch("issue_bot.knowledge_base.KnowledgeBase.load"), \ + mock.patch("issue_bot.knowledge_base.KnowledgeBase.build_context", return_value=""), \ + mock.patch("issue_bot.bedrock_client.BedrockClient.__init__", return_value=None), \ + mock.patch("issue_bot.bedrock_client.BedrockClient.invoke") as mock_bedrock: + + mock_pr.return_value = { + "user": {"login": "dev"}, "title": "Fix", "body": "", + "state": "open", "html_url": "https://github.com/x", + "head": {"sha": "bbb"}, + } + mock_comments.return_value = [{"user": {"login": "github-actions[bot]"}, "body": "prior"}] + mock_rc.return_value = [] + mock_compare.return_value = incremental + mock_files.return_value = [{"filename": "f.py"}] + mock_content.return_value = "content" + mock_ci.return_value = (True, "CI passed") + mock_bedrock.return_value = json.dumps({"comments": [ + {"file": "f.py", "line": 1, "severity": "BUG", "comment": "real bug", + "evidence": "line 1 divides by zero"}, + {"file": "f.py", "line": 1, "severity": "NIT", "comment": "rename var", + "evidence": "x is not descriptive"}, + ]}) + + bot_main.analyze() + + with open(str(tmp_path / "result.json")) as f: + result = json.load(f) + + # NIT should be filtered, BUG should remain + assert result["action"] == "RESPOND" + assert len(result["inline_comments"]) == 1 + assert "real bug" in result["inline_comments"][0]["comment"] + + def test_nits_kept_on_first_review(self, tmp_path, monkeypatch): + import unittest.mock as mock + monkeypatch.setenv("GITHUB_TOKEN", "fake") + monkeypatch.setenv("GITHUB_REPOSITORY", "awslabs/test") + monkeypatch.setenv("ISSUE_NUMBER", "10") + monkeypatch.setenv("EVENT_TYPE", "pull_request_target") + monkeypatch.setenv("EVENT_ACTION", "opened") + monkeypatch.setenv("EVENT_BEFORE", "") + monkeypatch.setenv("EVENT_AFTER", "") + monkeypatch.setenv("KB_S3_BUCKET", "") + monkeypatch.setenv("KB_S3_KEY", "") + monkeypatch.setenv("PR_FILE_REVIEW_PROMPT", "Review. Date: {current_date}") + import issue_bot.main as bot_main + monkeypatch.setattr(bot_main, "ARTIFACT_PATH", str(tmp_path / "result.json")) + + with mock.patch("issue_bot.github_client.GitHubClient.get_pr") as mock_pr, \ + mock.patch("issue_bot.github_client.GitHubClient.get_comments") as mock_comments, \ + mock.patch("issue_bot.github_client.GitHubClient.get_pr_diff"), \ + mock.patch("issue_bot.github_client.GitHubClient.get_pr_review_comments") as mock_rc, \ + mock.patch("issue_bot.github_client.GitHubClient.get_pr_files") as mock_files, \ + mock.patch("issue_bot.github_client.GitHubClient.get_file_content") as mock_content, \ + mock.patch("issue_bot.github_client.GitHubClient.get_codebase_map"), \ + mock.patch("issue_bot.github_client.GitHubClient.get_ci_status") as mock_ci, \ + mock.patch("issue_bot.knowledge_base.KnowledgeBase.load"), \ + mock.patch("issue_bot.knowledge_base.KnowledgeBase.build_context", return_value=""), \ + mock.patch("issue_bot.bedrock_client.BedrockClient.__init__", return_value=None), \ + mock.patch("issue_bot.bedrock_client.BedrockClient.invoke") as mock_bedrock: + + mock_pr.return_value = { + "user": {"login": "dev"}, "title": "Fix", "body": "", + "state": "open", "html_url": "https://github.com/x", + "head": {"sha": "abc123"}, + } + mock_comments.return_value = [] + mock_rc.return_value = [] + mock_files.return_value = [{"filename": "f.py"}] + mock_content.return_value = "content" + mock_ci.return_value = (True, "CI passed") + mock_bedrock.return_value = json.dumps({"comments": [ + {"file": "f.py", "line": 1, "severity": "BUG", "comment": "bug", + "evidence": "evidence1"}, + {"file": "f.py", "line": 2, "severity": "NIT", "comment": "nit", + "evidence": "evidence2"}, + ]}) + + bot_main.analyze() + + with open(str(tmp_path / "result.json")) as f: + result = json.load(f) + + # Both BUG and NIT should be present on first review + assert len(result["inline_comments"]) == 2 + + def test_evidence_formatted_in_comment(self, tmp_path, monkeypatch): + import unittest.mock as mock + monkeypatch.setenv("GITHUB_TOKEN", "fake") + monkeypatch.setenv("GITHUB_REPOSITORY", "awslabs/test") + monkeypatch.setenv("ISSUE_NUMBER", "10") + monkeypatch.setenv("EVENT_TYPE", "pull_request_target") + monkeypatch.setenv("EVENT_ACTION", "opened") + monkeypatch.setenv("EVENT_BEFORE", "") + monkeypatch.setenv("EVENT_AFTER", "") + monkeypatch.setenv("KB_S3_BUCKET", "") + monkeypatch.setenv("KB_S3_KEY", "") + monkeypatch.setenv("PR_FILE_REVIEW_PROMPT", "Review. Date: {current_date}") + import issue_bot.main as bot_main + monkeypatch.setattr(bot_main, "ARTIFACT_PATH", str(tmp_path / "result.json")) + + with mock.patch("issue_bot.github_client.GitHubClient.get_pr") as mock_pr, \ + mock.patch("issue_bot.github_client.GitHubClient.get_comments") as mock_comments, \ + mock.patch("issue_bot.github_client.GitHubClient.get_pr_diff"), \ + mock.patch("issue_bot.github_client.GitHubClient.get_pr_review_comments") as mock_rc, \ + mock.patch("issue_bot.github_client.GitHubClient.get_pr_files") as mock_files, \ + mock.patch("issue_bot.github_client.GitHubClient.get_file_content") as mock_content, \ + mock.patch("issue_bot.github_client.GitHubClient.get_codebase_map"), \ + mock.patch("issue_bot.github_client.GitHubClient.get_ci_status") as mock_ci, \ + mock.patch("issue_bot.knowledge_base.KnowledgeBase.load"), \ + mock.patch("issue_bot.knowledge_base.KnowledgeBase.build_context", return_value=""), \ + mock.patch("issue_bot.bedrock_client.BedrockClient.__init__", return_value=None), \ + mock.patch("issue_bot.bedrock_client.BedrockClient.invoke") as mock_bedrock: + + mock_pr.return_value = { + "user": {"login": "dev"}, "title": "Fix", "body": "", + "state": "open", "html_url": "https://github.com/x", + "head": {"sha": "abc123"}, + } + mock_comments.return_value = [] + mock_rc.return_value = [] + mock_files.return_value = [{"filename": "f.py"}] + mock_content.return_value = "content" + mock_ci.return_value = (True, "CI passed") + mock_bedrock.return_value = json.dumps({"comments": [ + {"file": "f.py", "line": 5, "severity": "BUG", + "comment": "division by zero", + "evidence": "line 3 sets count=0, line 5 divides by count"}, + ]}) + + bot_main.analyze() + + with open(str(tmp_path / "result.json")) as f: + result = json.load(f) + + comment_text = result["inline_comments"][0]["comment"] + assert comment_text.startswith("**BUG**: ") + assert "division by zero" in comment_text + assert "line 3 sets count=0" in comment_text + + +class TestIncrementalReviewIntegration: + """End-to-end tests for the incremental review path through analyze().""" + + def _setup_env(self, tmp_path, monkeypatch, event_before="abc123", event_after="def456"): + monkeypatch.setenv("GITHUB_TOKEN", "fake") + monkeypatch.setenv("GITHUB_REPOSITORY", "awslabs/test") + monkeypatch.setenv("ISSUE_NUMBER", "99") + monkeypatch.setenv("EVENT_TYPE", "pull_request_target") + monkeypatch.setenv("EVENT_ACTION", "synchronize") + monkeypatch.setenv("EVENT_BEFORE", event_before) + monkeypatch.setenv("EVENT_AFTER", event_after) + monkeypatch.setenv("GITHUB_ACTOR", "contributor") + monkeypatch.setenv("KB_S3_BUCKET", "") + monkeypatch.setenv("KB_S3_KEY", "") + monkeypatch.setenv("PR_FILE_REVIEW_PROMPT", "Review this PR. Date: {current_date}") + import issue_bot.main as bot_main + monkeypatch.setattr(bot_main, "ARTIFACT_PATH", str(tmp_path / "result.json")) + + def test_incremental_review_filters_stale_comments(self, tmp_path, monkeypatch): + import unittest.mock as mock + self._setup_env(tmp_path, monkeypatch) + + incremental_diff = ( + "diff --git a/src/fixed.py b/src/fixed.py\n" + "--- a/src/fixed.py\n+++ b/src/fixed.py\n" + "@@ -1 +1 @@\n-old\n+new\n" + ) + + with mock.patch("issue_bot.github_client.GitHubClient.get_pr") as mock_pr, \ + mock.patch("issue_bot.github_client.GitHubClient.get_comments") as mock_comments, \ + mock.patch("issue_bot.github_client.GitHubClient.get_pr_diff") as mock_diff, \ + mock.patch("issue_bot.github_client.GitHubClient.get_pr_review_comments") as mock_rc, \ + mock.patch("issue_bot.github_client.GitHubClient.get_compare_diff") as mock_compare, \ + mock.patch("issue_bot.github_client.GitHubClient.get_pr_files") as mock_files, \ + mock.patch("issue_bot.github_client.GitHubClient.get_file_content") as mock_content, \ + mock.patch("issue_bot.github_client.GitHubClient.get_codebase_map") as mock_map, \ + mock.patch("issue_bot.knowledge_base.KnowledgeBase.load"), \ + mock.patch("issue_bot.knowledge_base.KnowledgeBase.build_context") as mock_kb, \ + mock.patch("issue_bot.bedrock_client.BedrockClient.__init__", return_value=None), \ + mock.patch("issue_bot.bedrock_client.BedrockClient.invoke") as mock_bedrock: + + mock_pr.return_value = {"user": {"login": "contributor"}, "title": "Fix bug", + "body": "Fixes the thing", "state": "open", "html_url": "https://github.com/x"} + mock_comments.return_value = [{"user": {"login": "github-actions[bot]"}, "body": "prior review"}] + mock_diff.return_value = "full diff here" + mock_rc.return_value = [] + mock_compare.return_value = incremental_diff + mock_files.return_value = [{"filename": "src/fixed.py"}, {"filename": "src/untouched.py"}] + mock_content.return_value = "file content" + mock_map.return_value = "" + mock_kb.return_value = "" + mock_bedrock.return_value = json.dumps({ + "comments": [ + {"file": "src/fixed.py", "line": 1, "comment": "new issue in changed file"}, + {"file": "src/untouched.py", "line": 5, "comment": "stale comment on unchanged file"}, + ] + }) + + from issue_bot.main import analyze + analyze() + + with open(str(tmp_path / "result.json")) as f: + result = json.load(f) + + assert result["action"] == "RESPOND" + assert result["is_incremental"] is True + assert len(result["inline_comments"]) == 1 + assert result["inline_comments"][0]["file"] == "src/fixed.py" + + def test_force_push_falls_back_to_full_review(self, tmp_path, monkeypatch): + import unittest.mock as mock + self._setup_env(tmp_path, monkeypatch) + + with mock.patch("issue_bot.github_client.GitHubClient.get_pr") as mock_pr, \ + mock.patch("issue_bot.github_client.GitHubClient.get_comments") as mock_comments, \ + mock.patch("issue_bot.github_client.GitHubClient.get_pr_diff") as mock_diff, \ + mock.patch("issue_bot.github_client.GitHubClient.get_pr_review_comments") as mock_rc, \ + mock.patch("issue_bot.github_client.GitHubClient.get_compare_diff") as mock_compare, \ + mock.patch("issue_bot.github_client.GitHubClient.get_pr_files") as mock_files, \ + mock.patch("issue_bot.github_client.GitHubClient.get_file_content") as mock_content, \ + mock.patch("issue_bot.github_client.GitHubClient.get_codebase_map") as mock_map, \ + mock.patch("issue_bot.knowledge_base.KnowledgeBase.load"), \ + mock.patch("issue_bot.knowledge_base.KnowledgeBase.build_context") as mock_kb, \ + mock.patch("issue_bot.bedrock_client.BedrockClient.__init__", return_value=None), \ + mock.patch("issue_bot.bedrock_client.BedrockClient.invoke") as mock_bedrock: + + mock_pr.return_value = {"user": {"login": "contributor"}, "title": "Fix bug", + "body": "Fixes the thing", "state": "open", "html_url": "https://github.com/x"} + mock_comments.return_value = [{"user": {"login": "github-actions[bot]"}, "body": "prior review"}] + mock_diff.return_value = "full diff here" + mock_rc.return_value = [] + mock_compare.return_value = "" # Force push — compare fails + mock_files.return_value = [{"filename": "src/a.py"}] + mock_content.return_value = "content" + mock_map.return_value = "" + mock_kb.return_value = "" + mock_bedrock.return_value = json.dumps({ + "comments": [ + {"file": "src/a.py", "line": 1, "comment": "issue found"}, + ] + }) + + from issue_bot.main import analyze + analyze() + + with open(str(tmp_path / "result.json")) as f: + result = json.load(f) + + # Falls back to full review — no filtering, not marked incremental + assert result["action"] == "RESPOND" + assert result["is_incremental"] is False + assert len(result["inline_comments"]) == 1 + + def test_no_before_sha_skips_incremental(self, tmp_path, monkeypatch): + import unittest.mock as mock + self._setup_env(tmp_path, monkeypatch, event_before="", event_after="def456") + + with mock.patch("issue_bot.github_client.GitHubClient.get_pr") as mock_pr, \ + mock.patch("issue_bot.github_client.GitHubClient.get_comments") as mock_comments, \ + mock.patch("issue_bot.github_client.GitHubClient.get_pr_diff") as mock_diff, \ + mock.patch("issue_bot.github_client.GitHubClient.get_pr_review_comments") as mock_rc, \ + mock.patch("issue_bot.github_client.GitHubClient.get_compare_diff") as mock_compare, \ + mock.patch("issue_bot.github_client.GitHubClient.get_pr_files") as mock_files, \ + mock.patch("issue_bot.github_client.GitHubClient.get_file_content") as mock_content, \ + mock.patch("issue_bot.github_client.GitHubClient.get_codebase_map") as mock_map, \ + mock.patch("issue_bot.knowledge_base.KnowledgeBase.load"), \ + mock.patch("issue_bot.knowledge_base.KnowledgeBase.build_context") as mock_kb, \ + mock.patch("issue_bot.bedrock_client.BedrockClient.__init__", return_value=None), \ + mock.patch("issue_bot.bedrock_client.BedrockClient.invoke") as mock_bedrock: + + mock_pr.return_value = {"user": {"login": "contributor"}, "title": "Fix", + "body": "Fix", "state": "open", "html_url": "https://github.com/x"} + mock_comments.return_value = [{"user": {"login": "github-actions[bot]"}, "body": "review"}] + mock_diff.return_value = "full diff" + mock_rc.return_value = [] + mock_compare.return_value = "should not be called" + mock_files.return_value = [{"filename": "src/a.py"}] + mock_content.return_value = "content" + mock_map.return_value = "" + mock_kb.return_value = "" + mock_bedrock.return_value = json.dumps({"comments": [ + {"file": "src/a.py", "line": 1, "comment": "issue"}, + ]}) + + from issue_bot.main import analyze + analyze() + + # Should NOT have called compare because event_before is empty + mock_compare.assert_not_called() + + with open(str(tmp_path / "result.json")) as f: + result = json.load(f) + assert result["is_incremental"] is False + + +class TestFileContentUsesHeadSha: + """Verify get_file_content is called with PR head SHA, not default branch.""" + + def _setup_env(self, tmp_path, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "fake") + monkeypatch.setenv("GITHUB_REPOSITORY", "awslabs/test") + monkeypatch.setenv("ISSUE_NUMBER", "42") + monkeypatch.setenv("EVENT_TYPE", "pull_request_target") + monkeypatch.setenv("EVENT_ACTION", "opened") + monkeypatch.setenv("EVENT_BEFORE", "") + monkeypatch.setenv("EVENT_AFTER", "") + monkeypatch.setenv("GITHUB_ACTOR", "contributor") + monkeypatch.setenv("KB_S3_BUCKET", "") + monkeypatch.setenv("KB_S3_KEY", "") + monkeypatch.setenv("PR_FILE_REVIEW_PROMPT", "Review this PR. Date: {current_date}") + import issue_bot.main as bot_main + monkeypatch.setattr(bot_main, "ARTIFACT_PATH", str(tmp_path / "result.json")) + + def test_file_content_fetched_with_head_sha(self, tmp_path, monkeypatch): + import unittest.mock as mock + self._setup_env(tmp_path, monkeypatch) + + with mock.patch("issue_bot.github_client.GitHubClient.get_pr") as mock_pr, \ + mock.patch("issue_bot.github_client.GitHubClient.get_comments") as mock_comments, \ + mock.patch("issue_bot.github_client.GitHubClient.get_pr_diff") as mock_diff, \ + mock.patch("issue_bot.github_client.GitHubClient.get_pr_review_comments") as mock_rc, \ + mock.patch("issue_bot.github_client.GitHubClient.get_pr_files") as mock_files, \ + mock.patch("issue_bot.github_client.GitHubClient.get_file_content") as mock_content, \ + mock.patch("issue_bot.github_client.GitHubClient.get_codebase_map") as mock_map, \ + mock.patch("issue_bot.knowledge_base.KnowledgeBase.load"), \ + mock.patch("issue_bot.knowledge_base.KnowledgeBase.build_context") as mock_kb, \ + mock.patch("issue_bot.bedrock_client.BedrockClient.__init__", return_value=None), \ + mock.patch("issue_bot.bedrock_client.BedrockClient.invoke") as mock_bedrock: + + mock_pr.return_value = { + "user": {"login": "contributor"}, "title": "Add feature", + "body": "New file", "state": "open", + "html_url": "https://github.com/x", + "head": {"sha": "abc123deadbeef"}, + } + mock_comments.return_value = [] + mock_diff.return_value = "diff content" + mock_rc.return_value = [] + mock_files.return_value = [ + {"filename": "src/new_file.py"}, + {"filename": "src/existing.py"}, + ] + mock_content.return_value = "file content" + mock_map.return_value = "" + mock_kb.return_value = "" + mock_bedrock.return_value = json.dumps({"comments": []}) + + from issue_bot.main import analyze + analyze() + + # Every get_file_content call must include ref=head_sha + for call in mock_content.call_args_list: + args, kwargs = call + assert kwargs.get("ref") == "abc123deadbeef" or \ + (len(args) > 1 and args[1] == "abc123deadbeef"), \ + f"get_file_content called without head SHA: {call}" + + def test_missing_head_sha_falls_back_gracefully(self, tmp_path, monkeypatch): + import unittest.mock as mock + self._setup_env(tmp_path, monkeypatch) + + with mock.patch("issue_bot.github_client.GitHubClient.get_pr") as mock_pr, \ + mock.patch("issue_bot.github_client.GitHubClient.get_comments") as mock_comments, \ + mock.patch("issue_bot.github_client.GitHubClient.get_pr_diff") as mock_diff, \ + mock.patch("issue_bot.github_client.GitHubClient.get_pr_review_comments") as mock_rc, \ + mock.patch("issue_bot.github_client.GitHubClient.get_pr_files") as mock_files, \ + mock.patch("issue_bot.github_client.GitHubClient.get_file_content") as mock_content, \ + mock.patch("issue_bot.github_client.GitHubClient.get_codebase_map") as mock_map, \ + mock.patch("issue_bot.knowledge_base.KnowledgeBase.load"), \ + mock.patch("issue_bot.knowledge_base.KnowledgeBase.build_context") as mock_kb, \ + mock.patch("issue_bot.bedrock_client.BedrockClient.__init__", return_value=None), \ + mock.patch("issue_bot.bedrock_client.BedrockClient.invoke") as mock_bedrock: + + # PR object without head.sha (shouldn't happen, but defensive) + mock_pr.return_value = { + "user": {"login": "contributor"}, "title": "Fix", + "body": "Fix", "state": "open", + "html_url": "https://github.com/x", + } + mock_comments.return_value = [] + mock_diff.return_value = "diff" + mock_rc.return_value = [] + mock_files.return_value = [{"filename": "src/a.py"}] + mock_content.return_value = "content" + mock_map.return_value = "" + mock_kb.return_value = "" + mock_bedrock.return_value = json.dumps({"comments": []}) + + from issue_bot.main import analyze + analyze() + + # Should still work — falls back to no ref (default branch) + with open(str(tmp_path / "result.json")) as f: + result = json.load(f) + # First review with 0 comments → RESPOND with CI-aware message + assert result["action"] == "RESPOND" + assert "No issues found" in result["response"] + + +class TestReviewEventType: + """Verify bot always uses COMMENT event type, never REQUEST_CHANGES.""" + + def _setup_env(self, tmp_path, monkeypatch, event_action="opened"): + monkeypatch.setenv("GITHUB_TOKEN", "fake") + monkeypatch.setenv("GITHUB_REPOSITORY", "awslabs/test") + monkeypatch.setenv("ISSUE_NUMBER", "50") + monkeypatch.setenv("EVENT_TYPE", "pull_request_target") + monkeypatch.setenv("EVENT_ACTION", event_action) + monkeypatch.setenv("EVENT_BEFORE", "aaa111" if event_action == "synchronize" else "") + monkeypatch.setenv("EVENT_AFTER", "bbb222" if event_action == "synchronize" else "") + monkeypatch.setenv("GITHUB_ACTOR", "contributor") + monkeypatch.setenv("KB_S3_BUCKET", "") + monkeypatch.setenv("KB_S3_KEY", "") + monkeypatch.setenv("PR_FILE_REVIEW_PROMPT", "Review. Date: {current_date}") + import issue_bot.main as bot_main + monkeypatch.setattr(bot_main, "ARTIFACT_PATH", str(tmp_path / "result.json")) + + def _run_and_get_artifact(self, tmp_path, monkeypatch, mock, event_action="opened"): + self._setup_env(tmp_path, monkeypatch, event_action=event_action) + + with mock.patch("issue_bot.github_client.GitHubClient.get_pr") as mock_pr, \ + mock.patch("issue_bot.github_client.GitHubClient.get_comments") as mock_comments, \ + mock.patch("issue_bot.github_client.GitHubClient.get_pr_diff") as mock_diff, \ + mock.patch("issue_bot.github_client.GitHubClient.get_pr_review_comments") as mock_rc, \ + mock.patch("issue_bot.github_client.GitHubClient.get_compare_diff") as mock_compare, \ + mock.patch("issue_bot.github_client.GitHubClient.get_pr_files") as mock_files, \ + mock.patch("issue_bot.github_client.GitHubClient.get_file_content") as mock_content, \ + mock.patch("issue_bot.github_client.GitHubClient.get_codebase_map") as mock_map, \ + mock.patch("issue_bot.knowledge_base.KnowledgeBase.load"), \ + mock.patch("issue_bot.knowledge_base.KnowledgeBase.build_context") as mock_kb, \ + mock.patch("issue_bot.bedrock_client.BedrockClient.__init__", return_value=None), \ + mock.patch("issue_bot.bedrock_client.BedrockClient.invoke") as mock_bedrock, \ + mock.patch("issue_bot.github_client.GitHubClient.post_pr_review") as mock_post: + + mock_pr.return_value = { + "user": {"login": "contributor"}, "title": "Fix", + "body": "Fix", "state": "open", + "html_url": "https://github.com/x", + "head": {"sha": "abc123"}, + } + mock_comments.return_value = ( + [{"user": {"login": "github-actions[bot]"}, "body": "prior"}] + if event_action == "synchronize" else [] + ) + mock_diff.return_value = "diff" + mock_rc.return_value = [] + mock_compare.return_value = ( + "diff --git a/f.py b/f.py\n--- a/f.py\n+++ b/f.py\n@@ -1 +1 @@\n-x\n+y\n" + if event_action == "synchronize" else "" + ) + mock_files.return_value = [{"filename": "f.py"}] + mock_content.return_value = "content" + mock_map.return_value = "" + mock_kb.return_value = "" + mock_bedrock.return_value = json.dumps({ + "comments": [{"file": "f.py", "line": 1, "comment": "issue"}] + }) + mock_post.return_value = True + + from issue_bot.main import analyze, act + analyze() + + with open(str(tmp_path / "result.json")) as f: + return json.load(f), mock_post + + def test_first_review_uses_comment_event(self, tmp_path, monkeypatch): + import unittest.mock as mock + result, _ = self._run_and_get_artifact(tmp_path, monkeypatch, mock, "opened") + assert result["action"] == "RESPOND" + assert result.get("is_incremental") is False + assert len(result["inline_comments"]) > 0 + + def test_incremental_review_uses_comment_event(self, tmp_path, monkeypatch): + import unittest.mock as mock + result, _ = self._run_and_get_artifact(tmp_path, monkeypatch, mock, "synchronize") + assert result["action"] == "RESPOND" + assert result.get("is_incremental") is True diff --git a/tests/test_verification.py b/tests/test_verification.py new file mode 100644 index 0000000..682cbd9 --- /dev/null +++ b/tests/test_verification.py @@ -0,0 +1,164 @@ +# -*- coding: utf-8 -*- +import unittest + +import pandas as pd +from pyspark.sql import Row +from pyspark.sql.types import BooleanType + +from pydeequ.checks import Check, CheckLevel +from pydeequ.verification import VerificationResult, VerificationSuite +from tests.conftest import setup_pyspark + + +class TestRowLevelResults(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.spark = setup_pyspark().appName("test-row-level-results-local").getOrCreate() + cls.sc = cls.spark.sparkContext + cls.df = cls.sc.parallelize( + [ + Row(a="foo", b=1, c=5), + Row(a="bar", b=2, c=6), + Row(a="baz", b=3, c=None), + ] + ).toDF() + + @classmethod + def tearDownClass(cls): + # Must shutdown callback for tests to stop + # TODO Document this call to users or encapsulate in PyDeequSession + cls.spark.sparkContext._gateway.shutdown_callback_server() + cls.spark.stop() + + def test_row_level_results_with_completeness(self): + """Test that isComplete produces a Boolean column with correct per-row values.""" + check = Check(self.spark, CheckLevel.Error, "completeness_check") + check = check.isComplete("c") + + result = VerificationSuite(self.spark).onData(self.df).addCheck(check).run() + row_level_df = VerificationResult.rowLevelResultsAsDataFrame(self.spark, result, self.df) + + # Should have same row count as original DataFrame + self.assertEqual(row_level_df.count(), self.df.count()) + + # Should have original columns (a, b, c) plus one Boolean column for the check + self.assertIn("completeness_check", row_level_df.columns) + self.assertTrue(isinstance(row_level_df.schema["completeness_check"].dataType, BooleanType)) + + # Order by b to ensure deterministic row ordering + # b=1: c=5 (complete), b=2: c=6 (complete), b=3: c=None (incomplete) + results = row_level_df.orderBy("b").select("completeness_check").collect() + values = [row["completeness_check"] for row in results] + self.assertEqual(values, [True, True, False]) + + def test_row_level_results_with_contained_in(self): + """Test that isContainedIn produces correct row-level results.""" + check = Check(self.spark, CheckLevel.Error, "contained_check") + check = check.isContainedIn("a", ["foo", "bar"]) + + result = VerificationSuite(self.spark).onData(self.df).addCheck(check).run() + row_level_df = VerificationResult.rowLevelResultsAsDataFrame(self.spark, result, self.df) + + self.assertIn("contained_check", row_level_df.columns) + + # Order by a to ensure deterministic row ordering + # a="bar" (contained), a="baz" (not contained), a="foo" (contained) + results = row_level_df.orderBy("a").select("contained_check").collect() + values = [row["contained_check"] for row in results] + self.assertEqual(values, [True, False, True]) + + def test_row_level_results_multiple_constraints_anded(self): + """Test that multiple constraints in one Check are ANDed into a single column.""" + check = Check(self.spark, CheckLevel.Error, "multi_check") + check = check.isContainedIn("a", ["foo", "baz"]).isComplete("c") + + result = VerificationSuite(self.spark).onData(self.df).addCheck(check).run() + row_level_df = VerificationResult.rowLevelResultsAsDataFrame(self.spark, result, self.df) + + self.assertIn("multi_check", row_level_df.columns) + + # Order by b to ensure deterministic row ordering + # b=1: a=foo (contained), c=5 (complete) -> True AND True = True + # b=2: a=bar (NOT contained), c=6 (complete) -> False AND True = False + # b=3: a=baz (contained), c=None (NOT complete) -> True AND False = False + results = row_level_df.orderBy("b").select("multi_check").collect() + values = [row["multi_check"] for row in results] + self.assertEqual(values, [True, False, False]) + + def test_row_level_results_aggregate_only_check(self): + """Test that aggregate-only checks (hasSize) don't add columns.""" + check = Check(self.spark, CheckLevel.Warning, "size_check") + check = check.hasSize(lambda x: x >= 3) + + result = VerificationSuite(self.spark).onData(self.df).addCheck(check).run() + row_level_df = VerificationResult.rowLevelResultsAsDataFrame(self.spark, result, self.df) + + # hasSize is aggregate-only, so no new column should be added + self.assertEqual(sorted(row_level_df.columns), sorted(self.df.columns)) + + def test_row_level_results_preserves_original_columns(self): + """Test that the original DataFrame columns are preserved.""" + check = Check(self.spark, CheckLevel.Error, "preserve_check") + check = check.isComplete("c") + + result = VerificationSuite(self.spark).onData(self.df).addCheck(check).run() + row_level_df = VerificationResult.rowLevelResultsAsDataFrame(self.spark, result, self.df) + + for col in self.df.columns: + self.assertIn(col, row_level_df.columns) + + # Verify original data is unchanged (ordered for deterministic comparison) + original_values = self.df.orderBy("b").select("a", "b").collect() + result_values = row_level_df.orderBy("b").select("a", "b").collect() + self.assertEqual(original_values, result_values) + + def test_row_level_results_multiple_checks(self): + """Test that multiple separate Check objects produce multiple Boolean columns.""" + check1 = Check(self.spark, CheckLevel.Error, "completeness_check") + check1 = check1.isComplete("c") + + check2 = Check(self.spark, CheckLevel.Error, "value_check") + check2 = check2.isContainedIn("a", ["foo", "bar"]) + + result = ( + VerificationSuite(self.spark) + .onData(self.df) + .addCheck(check1) + .addCheck(check2) + .run() + ) + row_level_df = VerificationResult.rowLevelResultsAsDataFrame(self.spark, result, self.df) + + # Each Check should produce its own Boolean column + self.assertIn("completeness_check", row_level_df.columns) + self.assertIn("value_check", row_level_df.columns) + self.assertEqual(row_level_df.count(), 3) + + # Verify values: c is null for row 3, and "baz" is not in ["foo", "bar"] + results = row_level_df.orderBy("b").select("completeness_check", "value_check").collect() + # Row 1 (a=foo, c=x): complete=True, contained=True + self.assertTrue(results[0]["completeness_check"]) + self.assertTrue(results[0]["value_check"]) + # Row 2 (a=bar, c=y): complete=True, contained=True + self.assertTrue(results[1]["completeness_check"]) + self.assertTrue(results[1]["value_check"]) + # Row 3 (a=baz, c=None): complete=False, contained=False + self.assertFalse(results[2]["completeness_check"]) + self.assertFalse(results[2]["value_check"]) + + def test_row_level_results_as_pandas(self): + """Test the pandas=True option returns a Pandas DataFrame.""" + check = Check(self.spark, CheckLevel.Error, "pandas_check") + check = check.isComplete("c") + + result = VerificationSuite(self.spark).onData(self.df).addCheck(check).run() + row_level_df = VerificationResult.rowLevelResultsAsDataFrame( + self.spark, result, self.df, pandas=True + ) + + self.assertIsInstance(row_level_df, pd.DataFrame) + self.assertIn("pandas_check", row_level_df.columns) + + +if __name__ == "__main__": + unittest.main()