billpratt · Copilot · May 18, 2026 · May 18, 2026 · May 29, 2026 · Jun 4, 2026
diff --git a/.github/workflows/auto-approve.yml b/.github/workflows/auto-approve.yml
@@ -0,0 +1,101 @@
+name: Auto-Approve Clean PRs
+
+on:
+  workflow_run:
+    workflows: [".github/workflows/base.yml", "PyDeequ Bot"]
+    types: [completed]
+
+permissions:
+  pull-requests: write
+  actions: read
+
+jobs:
+  approve:
+    runs-on: ubuntu-latest
+    if: github.event.workflow_run.event == 'pull_request' || github.event.workflow_run.event == 'pull_request_target'
+    timeout-minutes: 2
+
+    steps:
+      - name: Find PR and check both conditions
+        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
+        with:
+          script: |
+            const sha = context.payload.workflow_run.head_sha;
+            const owner = context.repo.owner;
+            const repo = context.repo.repo;
+
+            // Find the PR for this SHA
+            let prNumber = null;
+            const prs = context.payload.workflow_run.pull_requests;
+            if (prs && prs.length > 0) {
+              prNumber = prs[0].number;
+            } else {
+              const {data: searchResult} = await github.rest.pulls.list({
+                owner, repo, state: 'open', sort: 'updated', direction: 'desc', per_page: 30
+              });
+              const match = searchResult.find(pr => pr.head.sha === sha);
+              if (match) {
+                prNumber = match.number;
+              }
+            }
+
+            if (!prNumber) {
+              core.info(`No open PR found for SHA ${sha}, skipping`);
+              return;
+            }
+
+            core.info(`Found PR #${prNumber} for SHA ${sha}`);
+
+            // Verify the PR head SHA still matches (no new push since trigger)
+            const {data: pr} = await github.rest.pulls.get({
+              owner, repo, pull_number: prNumber
+            });
+            if (pr.head.sha !== sha) {
+              core.info(`PR head ${pr.head.sha} differs from trigger SHA ${sha} — new push arrived, skipping`);
+              return;
+            }
+
+            // Condition 1: CI must have passed for this SHA
+            const {data: workflowRuns} = await github.rest.actions.listWorkflowRunsForRepo({
+              owner, repo, head_sha: sha, status: 'completed'
+            });
+            const ciRun = workflowRuns.workflow_runs.find(r =>
+              r.name === '.github/workflows/base.yml' && r.conclusion === 'success'
+            );
+            if (!ciRun) {
+              core.info(`CI has not passed for SHA ${sha}, skipping`);
+              return;
+            }
+
+            // Condition 2: Bot must have posted a clean review for this SHA
+            const {data: reviews} = await github.rest.pulls.listReviews({
+              owner, repo, pull_number: prNumber
+            });
+
+            const CLEAN_MARKER = '<!-- deequ-bot:clean -->';
+
+            const latestBot = reviews
+              .filter(r => r.user.login === 'github-actions[bot]')
+              .sort((a, b) => new Date(b.submitted_at) - new Date(a.submitted_at))[0];
+
+            if (!latestBot || !latestBot.body.includes(CLEAN_MARKER) || latestBot.commit_id !== sha) {
+              core.info('Bot has not posted a clean review for this SHA, skipping');
+              return;
+            }
+
+            // Both conditions met — check for existing approval to prevent doubles
+            const botApprovals = reviews.filter(r =>
+              r.user.login === 'github-actions[bot]' && r.state === 'APPROVED'
+            );
+            if (botApprovals.length > 0) {
+              core.info('Bot already approved this PR, skipping');
+              return;
+            }
+
+            // Approve
+            core.info(`Approving PR #${prNumber}: bot review clean + CI passed for SHA ${sha}`);
+            await github.rest.pulls.createReview({
+              owner, repo, pull_number: prNumber,
+              event: 'APPROVE',
+              body: `No issues found and CI is passing. Auto-approved.\n\n---\n*Generated by AI — human merge required.*`
+            });
diff --git a/.github/workflows/issue-bot.yml b/.github/workflows/issue-bot.yml
@@ -61,16 +61,20 @@ jobs:
           ISSUE_NUMBER: ${{ github.event.issue.number || github.event.pull_request.number || inputs.issue_number }}
           EVENT_TYPE: ${{ github.event_name }}
           EVENT_ACTION: ${{ github.event.action }}
+          EVENT_BEFORE: ${{ github.event.before }}
+          EVENT_AFTER: ${{ github.event.pull_request.head.sha || github.event.after }}
           GITHUB_ACTOR: ${{ github.actor }}
           KB_S3_BUCKET: ${{ secrets.KB_S3_BUCKET }}
           KB_S3_KEY: ${{ secrets.KB_S3_KEY }}
           BEDROCK_MODEL_ID: ${{ secrets.BEDROCK_MODEL_ID }}
           GUARDRAIL_ID: ${{ secrets.GUARDRAIL_ID }}
           GUARDRAIL_VERSION: ${{ secrets.GUARDRAIL_VERSION }}
-          ISSUE_CLASSIFY_PROMPT: ${{ secrets.ISSUE_CLASSIFY_PROMPT }}
-          ISSUE_RESPOND_PROMPT: ${{ secrets.ISSUE_RESPOND_PROMPT }}
-          PR_FILE_REVIEW_PROMPT: ${{ secrets.PR_FILE_REVIEW_PROMPT }}
-          FOLLOWUP_PROMPT: ${{ secrets.FOLLOWUP_PROMPT }}
+          SM_ISSUE_CLASSIFY_PROMPT: pydeequ-bot/issue-classify-prompt
+          SM_ISSUE_RESPOND_PROMPT: pydeequ-bot/issue-respond-prompt
+          SM_PR_FILE_REVIEW_PROMPT: pydeequ-bot/pr-file-review-prompt
+          SM_FOLLOWUP_PROMPT: pydeequ-bot/followup-prompt
+          CODEBASE_SRC_DIR: pydeequ
+          CODEBASE_FILE_EXT: .py
           DRY_RUN: ${{ inputs.dry_run || 'false' }}
           ARTIFACT_PATH: ${{ runner.temp }}/bot_result.json
         run: python -m issue_bot.main analyze

diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
@@ -0,0 +1,36 @@
+name: Manage Stale Issues and PRs
+
+on:
+  schedule:
+    - cron: '0 9 * * MON'
+  workflow_dispatch:
+
+permissions:
+  issues: write
+  pull-requests: write
+
+jobs:
+  stale:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9.0.0
+        with:
+          days-before-stale: 60
+          days-before-close: 14
+          stale-issue-label: 'stale'
+          stale-pr-label: 'stale'
+          stale-issue-message: >
+            This issue has been inactive for 60 days. It will be closed in 14 days
+            if there is no further activity. If this is still relevant, please comment
+            to keep it open.
+          stale-pr-message: >
+            This PR has been inactive for 60 days. It will be closed in 14 days
+            if there is no further activity. If you are still working on this,
+            please push an update or comment to keep it open.
+          close-issue-message: >
+            Closed due to inactivity. Feel free to reopen if this is still relevant.
+          close-pr-message: >
+            Closed due to inactivity. Feel free to reopen if you'd like to continue this work.
+          exempt-issue-labels: 'bug,enhancement,help-wanted'
+          exempt-pr-labels: 'help-wanted'
+          operations-per-run: 50
diff --git a/README.md b/README.md
@@ -24,7 +24,6 @@ There are 4 main components of Deequ, and they are:
 - With PyDeequ v0.1.8+, we now officially support Spark3 ! Just make sure you have an environment variable `SPARK_VERSION` to specify your Spark version! 
 - We've release a blogpost on integrating PyDeequ onto AWS leveraging services such as AWS Glue, Athena, and SageMaker! Check it out: [Monitor data quality in your data lake using PyDeequ and AWS Glue](https://aws.amazon.com/blogs/big-data/monitor-data-quality-in-your-data-lake-using-pydeequ-and-aws-glue/).
 - Check out the [PyDeequ Release Announcement Blogpost](https://aws.amazon.com/blogs/big-data/testing-data-quality-at-scale-with-pydeequ/) with a tutorial walkthrough the Amazon Reviews dataset!
-- Join the PyDeequ community on [PyDeequ Slack](https://join.slack.com/t/pydeequ/shared_invite/zt-te6bntpu-yaqPy7bhiN8Lu0NxpZs47Q) to chat with the devs!
 
 ## Quickstart
 
@@ -120,6 +119,17 @@ checkResult_df = VerificationResult.checkResultsAsDataFrame(spark, checkResult)
 checkResult_df.show()
 ```
 
+#### Row-Level Results
+
+You can also get row-level results to see which individual rows passed or failed each check. This is useful for quarantining rows with data quality issues:
+
+```python
+rowLevelResult_df = VerificationResult.rowLevelResultsAsDataFrame(spark, checkResult, df)
+rowLevelResult_df.show()
+```
+
+Each check produces a Boolean column (named after the check description) indicating pass/fail per row. When a single Check contains multiple constraints, they are ANDed together into one Boolean column — the row passes only if all constraints in that Check pass. Only checks with row-level-capable constraints (e.g., `isComplete`, `isContainedIn`, `hasPattern`, `isUnique`) will produce output columns.
+
 ### Repository
 
 Save to a Metrics Repository by adding the `useRepository()` and `saveOrAppendResult()` calls to your Analysis Runner.