Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 127 additions & 0 deletions .github/workflows/issue-bot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
name: PyDeequ Bot

on:
issues:
types: [opened, reopened]
pull_request:
types: [opened, reopened, synchronize]
issue_comment:
types: [created]
workflow_dispatch:
inputs:
issue_number:
description: "Issue/PR number to process"
required: true
dry_run:
description: "Dry run (no writes)"
type: boolean
default: true

# Serialize per issue/PR to prevent duplicate comments
concurrency:
group: bot-${{ github.event.issue.number || github.event.pull_request.number || inputs.issue_number }}
cancel-in-progress: false

jobs:
analyze:
runs-on: ubuntu-latest
timeout-minutes: 10
if: >-
(github.event_name == 'workflow_dispatch') ||
(github.actor != 'github-actions[bot]' &&
(github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request') &&
(github.event.issue.pull_request == null || github.event_name == 'pull_request'))
permissions:
contents: read
id-token: write

steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
persist-credentials: false

- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
with:
role-to-assume: ${{ secrets.AWS_ROLE_ARN }}
aws-region: us-east-1

- name: Set up Python
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
with:
python-version: "3.12"

- name: Install dependencies
run: pip install requests==2.33.1 boto3==1.42.94

- name: Run analysis
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_REPOSITORY: ${{ github.repository }}
ISSUE_NUMBER: ${{ github.event.issue.number || github.event.pull_request.number || inputs.issue_number }}
EVENT_TYPE: ${{ github.event_name }}
EVENT_ACTION: ${{ github.event.action }}
GITHUB_ACTOR: ${{ github.actor }}
KB_S3_BUCKET: ${{ secrets.KB_S3_BUCKET }}
KB_S3_KEY: ${{ secrets.KB_S3_KEY }}
BEDROCK_MODEL_ID: ${{ secrets.BEDROCK_MODEL_ID }}
GUARDRAIL_ID: ${{ secrets.GUARDRAIL_ID }}
GUARDRAIL_VERSION: ${{ secrets.GUARDRAIL_VERSION }}
ISSUE_CLASSIFY_PROMPT: ${{ secrets.ISSUE_CLASSIFY_PROMPT }}
ISSUE_RESPOND_PROMPT: ${{ secrets.ISSUE_RESPOND_PROMPT }}
PR_FILE_REVIEW_PROMPT: ${{ secrets.PR_FILE_REVIEW_PROMPT }}
FOLLOWUP_PROMPT: ${{ secrets.FOLLOWUP_PROMPT }}
DRY_RUN: ${{ inputs.dry_run || 'false' }}
ARTIFACT_PATH: ${{ runner.temp }}/bot_result.json
run: python -m issue_bot.main analyze
working-directory: scripts

- name: Upload artifact
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
name: bot-result
path: ${{ runner.temp }}/bot_result.json
retention-days: 30

act:
runs-on: ubuntu-latest
timeout-minutes: 1
needs: analyze
permissions:
contents: read
issues: write
Comment thread
sudsali marked this conversation as resolved.
pull-requests: write

steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
persist-credentials: false

- name: Set up Python
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
with:
python-version: "3.12"

- name: Install dependencies
run: pip install requests==2.33.1 boto3==1.42.94

- name: Download artifact
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: bot-result
path: ${{ runner.temp }}

- name: Execute actions
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_REPOSITORY: ${{ github.repository }}
ISSUE_NUMBER: ${{ github.event.issue.number || github.event.pull_request.number || inputs.issue_number }}
EVENT_TYPE: ${{ github.event_name }}
EVENT_ACTION: ${{ github.event.action }}
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
DRY_RUN: ${{ inputs.dry_run || 'false' }}
ARTIFACT_PATH: ${{ runner.temp }}/bot_result.json
run: python -m issue_bot.main act
working-directory: scripts
1 change: 1 addition & 0 deletions scripts/issue_bot/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__pycache__/
1 change: 1 addition & 0 deletions scripts/issue_bot/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

105 changes: 105 additions & 0 deletions scripts/issue_bot/bedrock_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import logging

import boto3
from botocore.config import Config as BotoConfig
from botocore.exceptions import ClientError, BotoCoreError

logger = logging.getLogger("issue_bot")

_CIRCUIT_BREAKER_THRESHOLD = 3


class BedrockClient:
def __init__(self, cfg):
self._model_id = cfg.bedrock_model_id
self._client = boto3.client(
"bedrock-runtime",
config=BotoConfig(
read_timeout=cfg.bedrock_timeout,
connect_timeout=cfg.bedrock_timeout,
retries={"max_attempts": 3, "mode": "adaptive"},
),
)
self._guardrail_id = cfg.guardrail_id
self._guardrail_version = cfg.guardrail_version
self._failures = 0
self._circuit_open = False # Resets per-process; GHA runs are ephemeral

@property
def available(self):
return not self._circuit_open

def invoke(self, system_prompt, user_prompt, max_tokens=4096,
temperature=0.3, json_schema=None):
"""Invoke Bedrock Converse API with guardrail on user message only.

Follows the GlueML pattern (BedrockModelHelper.java):
- system_prompt: Instructions + trusted context (KB, diffs, codebase).
Passed as plain text SystemContentBlock with cachePoint. The
guardrail does NOT assess system prompts without guardContent.
- user_prompt: Untrusted user input (issue title/body, PR title/body,
comments). When guardrail is configured, wrapped in guardContent
so the guardrail scans it for prompt injection.
"""
if self._circuit_open:
logger.warning("Circuit breaker open, skipping Bedrock call")
return None
try:
if self._guardrail_id:
user_content = [{"guardContent": {"text": {"text": user_prompt}}}]
else:
user_content = [{"text": user_prompt}]

kwargs = {
"modelId": self._model_id,
"messages": [{"role": "user", "content": user_content}],
"inferenceConfig": {"maxTokens": max_tokens, "temperature": temperature},
}

if system_prompt:
kwargs["system"] = [
{"text": system_prompt},
{"cachePoint": {"type": "default"}},
]

if json_schema:
kwargs["outputConfig"] = {
"textFormat": {
"type": "json_schema",
"structure": {"jsonSchema": {
"schema": json_schema,
"name": "bot_response",
}},
}
}

if self._guardrail_id:
kwargs["guardrailConfig"] = {
"guardrailIdentifier": self._guardrail_id,
"guardrailVersion": self._guardrail_version,
"trace": "enabled",
}

resp = self._client.converse(**kwargs)

if resp.get("stopReason") == "guardrail_intervened":
logger.warning("Guardrail intervened: %s", resp.get("trace", ""))
return None

output = resp.get("output", {}).get("message", {}).get("content", [])
if not output:
raise ValueError("Empty Bedrock response")

self._failures = 0
usage = resp.get("usage", {})
logger.info("Bedrock: input=%s, output=%s, cacheRead=%s, cacheWrite=%s",
usage.get("inputTokens"), usage.get("outputTokens"),
usage.get("cacheReadInputTokens"), usage.get("cacheWriteInputTokens"))
return output[0]["text"].strip()
except (ClientError, BotoCoreError, ValueError, ConnectionError) as e:
self._failures += 1
logger.error(f"Bedrock failed ({self._failures}/{_CIRCUIT_BREAKER_THRESHOLD}): {e}")
if self._failures >= _CIRCUIT_BREAKER_THRESHOLD:
self._circuit_open = True
logger.error("Circuit breaker OPEN")
return None
51 changes: 51 additions & 0 deletions scripts/issue_bot/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import os
import sys
import logging

logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
logger = logging.getLogger("issue_bot")


class Config:
def __init__(self):
self.github_token = _require("GITHUB_TOKEN")
self.event_type = _require("EVENT_TYPE")
self.event_action = os.getenv("EVENT_ACTION", "")
self.issue_number = _require("ISSUE_NUMBER")
if not self.issue_number.isdigit():
logger.error(f"ISSUE_NUMBER must be numeric: {self.issue_number}")
sys.exit(1)
self.repo = _require("GITHUB_REPOSITORY")
self.actor = os.getenv("GITHUB_ACTOR", "")

self.bedrock_model_id = os.getenv("BEDROCK_MODEL_ID", "us.anthropic.claude-opus-4-6-v1")

self.kb_s3_bucket = os.getenv("KB_S3_BUCKET", "")
self.kb_s3_key = os.getenv("KB_S3_KEY", "")

self.slack_webhook_url = os.getenv("SLACK_WEBHOOK_URL", "")
self.guardrail_id = os.getenv("GUARDRAIL_ID", "")
self.guardrail_version = os.getenv("GUARDRAIL_VERSION") or "DRAFT"

self.dry_run = os.getenv("DRY_RUN", "false").lower() == "true"
self.enable_slack = bool(self.slack_webhook_url)
self.enable_repo_search = os.getenv("ENABLE_REPO_SEARCH", "true").lower() == "true"

self.upstream_repo = os.getenv("UPSTREAM_REPO", "awslabs/python-deequ")

self.bedrock_timeout = 120
self.max_context_chars = 200000
self.max_github_search_results = 8
self.github_api_timeout = 10
self.allowed_labels = {
"bug", "enhancement", "question", "documentation",
"help-wanted", "analyzer", "check", "spark-compatibility", "installation",
}


def _require(name):
val = os.getenv(name)
if not val:
logger.error(f"Missing required env var: {name}")
sys.exit(1)
return val
Loading
Loading