-
Notifications
You must be signed in to change notification settings - Fork 153
fix: incremental PR review, auto-approve, and bot operational improvements #263
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,101 @@ | ||
| name: Auto-Approve Clean PRs | ||
|
|
||
| on: | ||
| workflow_run: | ||
| workflows: [".github/workflows/base.yml", "PyDeequ Bot"] | ||
| types: [completed] | ||
|
|
||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. BUG: The There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. BUG: The |
||
| permissions: | ||
| pull-requests: write | ||
| actions: read | ||
|
|
||
| jobs: | ||
| approve: | ||
| runs-on: ubuntu-latest | ||
| if: github.event.workflow_run.event == 'pull_request' || github.event.workflow_run.event == 'pull_request_target' | ||
| timeout-minutes: 2 | ||
|
|
||
| steps: | ||
| - name: Find PR and check both conditions | ||
| uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 | ||
| with: | ||
| script: | | ||
| const sha = context.payload.workflow_run.head_sha; | ||
| const owner = context.repo.owner; | ||
| const repo = context.repo.repo; | ||
|
|
||
| // Find the PR for this SHA | ||
| let prNumber = null; | ||
| const prs = context.payload.workflow_run.pull_requests; | ||
| if (prs && prs.length > 0) { | ||
| prNumber = prs[0].number; | ||
| } else { | ||
| const {data: searchResult} = await github.rest.pulls.list({ | ||
| owner, repo, state: 'open', sort: 'updated', direction: 'desc', per_page: 30 | ||
| }); | ||
| const match = searchResult.find(pr => pr.head.sha === sha); | ||
| if (match) { | ||
| prNumber = match.number; | ||
| } | ||
| } | ||
|
|
||
| if (!prNumber) { | ||
| core.info(`No open PR found for SHA ${sha}, skipping`); | ||
| return; | ||
| } | ||
|
|
||
| core.info(`Found PR #${prNumber} for SHA ${sha}`); | ||
|
|
||
| // Verify the PR head SHA still matches (no new push since trigger) | ||
| const {data: pr} = await github.rest.pulls.get({ | ||
| owner, repo, pull_number: prNumber | ||
| }); | ||
| if (pr.head.sha !== sha) { | ||
| core.info(`PR head ${pr.head.sha} differs from trigger SHA ${sha} — new push arrived, skipping`); | ||
| return; | ||
| } | ||
|
|
||
| // Condition 1: CI must have passed for this SHA | ||
| const {data: workflowRuns} = await github.rest.actions.listWorkflowRunsForRepo({ | ||
| owner, repo, head_sha: sha, status: 'completed' | ||
| }); | ||
| const ciRun = workflowRuns.workflow_runs.find(r => | ||
| r.name === '.github/workflows/base.yml' && r.conclusion === 'success' | ||
| ); | ||
| if (!ciRun) { | ||
| core.info(`CI has not passed for SHA ${sha}, skipping`); | ||
| return; | ||
| } | ||
|
|
||
| // Condition 2: Bot must have posted a clean review for this SHA | ||
| const {data: reviews} = await github.rest.pulls.listReviews({ | ||
| owner, repo, pull_number: prNumber | ||
| }); | ||
|
|
||
| const CLEAN_MARKER = '<!-- deequ-bot:clean -->'; | ||
|
|
||
| const latestBot = reviews | ||
| .filter(r => r.user.login === 'github-actions[bot]') | ||
| .sort((a, b) => new Date(b.submitted_at) - new Date(a.submitted_at))[0]; | ||
|
|
||
| if (!latestBot || !latestBot.body.includes(CLEAN_MARKER) || latestBot.commit_id !== sha) { | ||
| core.info('Bot has not posted a clean review for this SHA, skipping'); | ||
| return; | ||
| } | ||
|
|
||
| // Both conditions met — check for existing approval to prevent doubles | ||
| const botApprovals = reviews.filter(r => | ||
| r.user.login === 'github-actions[bot]' && r.state === 'APPROVED' | ||
| ); | ||
| if (botApprovals.length > 0) { | ||
| core.info('Bot already approved this PR, skipping'); | ||
| return; | ||
| } | ||
|
|
||
| // Approve | ||
| core.info(`Approving PR #${prNumber}: bot review clean + CI passed for SHA ${sha}`); | ||
| await github.rest.pulls.createReview({ | ||
| owner, repo, pull_number: prNumber, | ||
| event: 'APPROVE', | ||
| body: `No issues found and CI is passing. Auto-approved.\n\n---\n*Generated by AI — human merge required.*` | ||
| }); | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,36 @@ | ||
| name: Manage Stale Issues and PRs | ||
|
|
||
| on: | ||
| schedule: | ||
| - cron: '0 9 * * MON' | ||
| workflow_dispatch: | ||
|
|
||
| permissions: | ||
| issues: write | ||
| pull-requests: write | ||
|
|
||
| jobs: | ||
| stale: | ||
| runs-on: ubuntu-latest | ||
| steps: | ||
| - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9.0.0 | ||
| with: | ||
| days-before-stale: 60 | ||
| days-before-close: 14 | ||
| stale-issue-label: 'stale' | ||
| stale-pr-label: 'stale' | ||
| stale-issue-message: > | ||
| This issue has been inactive for 60 days. It will be closed in 14 days | ||
| if there is no further activity. If this is still relevant, please comment | ||
| to keep it open. | ||
| stale-pr-message: > | ||
| This PR has been inactive for 60 days. It will be closed in 14 days | ||
| if there is no further activity. If you are still working on this, | ||
| please push an update or comment to keep it open. | ||
| close-issue-message: > | ||
| Closed due to inactivity. Feel free to reopen if this is still relevant. | ||
| close-pr-message: > | ||
| Closed due to inactivity. Feel free to reopen if you'd like to continue this work. | ||
| exempt-issue-labels: 'bug,enhancement,help-wanted' | ||
| exempt-pr-labels: 'help-wanted' | ||
| operations-per-run: 50 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,6 +17,8 @@ def __init__(self): | |
| sys.exit(1) | ||
| self.repo = _require("GITHUB_REPOSITORY") | ||
| self.actor = os.getenv("GITHUB_ACTOR", "") | ||
| self.event_before = os.getenv("EVENT_BEFORE", "") | ||
| self.event_after = os.getenv("EVENT_AFTER", "") | ||
|
|
||
| self.bedrock_model_id = os.getenv("BEDROCK_MODEL_ID", "us.anthropic.claude-opus-4-6-v1") | ||
|
|
||
|
|
@@ -32,14 +34,16 @@ def __init__(self): | |
| self.enable_repo_search = os.getenv("ENABLE_REPO_SEARCH", "true").lower() == "true" | ||
|
|
||
| self.upstream_repo = os.getenv("UPSTREAM_REPO", "awslabs/python-deequ") | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. DESIGN: The |
||
| self.codebase_src_dir = os.getenv("CODEBASE_SRC_DIR", "pydeequ") | ||
| self.codebase_file_ext = os.getenv("CODEBASE_FILE_EXT", ".py") | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. DESIGN: |
||
|
|
||
| self.bedrock_timeout = 120 | ||
| self.max_context_chars = 200000 | ||
| self.bedrock_timeout = 240 | ||
| self.max_context_chars = 800000 | ||
| self.max_github_search_results = 8 | ||
| self.github_api_timeout = 10 | ||
| self.allowed_labels = { | ||
| "bug", "enhancement", "question", "documentation", | ||
| "help-wanted", "analyzer", "check", "spark-compatibility", "installation", | ||
| "help wanted", "python", | ||
| } | ||
|
|
||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -11,6 +11,7 @@ def __init__(self, cfg): | |
| self._repo = cfg.repo | ||
| self._timeout = cfg.github_api_timeout | ||
| self._dry_run = cfg.dry_run | ||
| self._cfg = cfg | ||
| self._repo_root = os.getenv("GITHUB_WORKSPACE", os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))) | ||
| self._headers = { | ||
| "Authorization": f"token {self._token}", | ||
|
|
@@ -48,6 +49,59 @@ def get_pr_diff(self, number): | |
| logger.error(f"PR diff fetch failed: {e}") | ||
| return "" | ||
|
|
||
| def get_compare_diff(self, base_sha, head_sha): | ||
| """Fetch the diff between two commits using the Compare API. | ||
| Returns the diff text, or empty string on failure (e.g. force-push | ||
| where base_sha no longer exists).""" | ||
| headers = {**self._headers, "Accept": "application/vnd.github.v3.diff"} | ||
| try: | ||
| resp = requests.get( | ||
| f"https://api.github.com/repos/{self._repo}/compare/{base_sha}...{head_sha}", | ||
| headers=headers, timeout=self._timeout, | ||
| ) | ||
| if resp.status_code == 200: | ||
| return resp.text | ||
| logger.warning(f"Compare API {base_sha[:7]}...{head_sha[:7]}: {resp.status_code}") | ||
| return "" | ||
| except Exception as e: | ||
| logger.error(f"Compare diff failed: {e}") | ||
| return "" | ||
|
|
||
| def get_ci_status(self, sha): | ||
| """Check commit statuses and check runs. Returns (passed, summary). | ||
| passed: True (all green), False (something failed), None (pending/unknown).""" | ||
| status = self._get(f"/repos/{self._repo}/commits/{sha}/status") | ||
| if status is None: | ||
| return None, "CI status unavailable" | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. EDGE_CASE: |
||
| combined_state = status.get("state", "pending") | ||
|
|
||
| check_data = self._get(f"/repos/{self._repo}/commits/{sha}/check-runs") | ||
| runs = check_data.get("check_runs", []) if check_data else [] | ||
|
|
||
| def _is_own_check(name): | ||
| lower = name.lower() | ||
| return "bot" in lower and ("analyze" in lower or "/ act" in lower) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. DESIGN: The |
||
|
|
||
| external_runs = [r for r in runs if not _is_own_check(r.get("name", ""))] | ||
|
|
||
| failed = [] | ||
| pending = [] | ||
| for r in external_runs: | ||
| if r.get("status") != "completed": | ||
| pending.append(r["name"]) | ||
| elif r.get("conclusion") not in ("success", "neutral", "skipped"): | ||
| failed.append(r["name"]) | ||
|
|
||
| if failed: | ||
| return False, f"CI failing: {', '.join(failed)}" | ||
| if pending: | ||
| return None, f"CI pending: {', '.join(pending)}" | ||
| if combined_state == "failure": | ||
| return False, "CI failing (status checks)" | ||
| if combined_state == "pending": | ||
| return None, "CI pending (status checks)" | ||
| return True, "CI passed" | ||
|
|
||
| def get_pr_files(self, number): | ||
| return self._get(f"/repos/{self._repo}/pulls/{number}/files") or [] | ||
|
|
||
|
|
@@ -64,16 +118,18 @@ def get_pr_review_comments(self, number, max_pages=10): | |
| page += 1 | ||
| return comments | ||
|
|
||
| def get_codebase_map(self, src_dir="pydeequ"): | ||
| """List all Python source files (excluding tests) as relative paths.""" | ||
| def get_codebase_map(self): | ||
| """List source files (excluding tests) as relative paths.""" | ||
| src_dir = self._cfg.codebase_src_dir | ||
| file_ext = self._cfg.codebase_file_ext | ||
| full_dir = os.path.join(self._repo_root, src_dir) | ||
| prefix = self._repo_root.rstrip("/") + "/" | ||
| try: | ||
| paths = [] | ||
| for root, dirs, files in os.walk(full_dir): | ||
| dirs[:] = [d for d in dirs if d not in ("tests", "__pycache__", ".git")] | ||
| dirs[:] = [d for d in dirs if d not in ("examples", "__pycache__", ".git", "tests", "test")] | ||
| for f in files: | ||
| if f.endswith(".py"): | ||
| if f.endswith(file_ext): | ||
| full = os.path.join(root, f) | ||
| rel = full[len(prefix):] if full.startswith(prefix) else full | ||
| paths.append(rel) | ||
|
|
@@ -116,9 +172,9 @@ def post_comment(self, number, body): | |
| return True | ||
| return self._post(f"/repos/{self._repo}/issues/{number}/comments", {"body": body}) | ||
|
|
||
| def post_pr_review(self, number, summary, inline_comments): | ||
| def post_pr_review(self, number, summary, inline_comments, event="COMMENT"): | ||
| if self._dry_run: | ||
| logger.info(f"[DRY RUN] PR review on #{number}: {len(inline_comments)} inline comments") | ||
| logger.info(f"[DRY RUN] PR review on #{number}: {len(inline_comments)} inline comments, event={event}") | ||
| return True | ||
|
|
||
| # Get valid diff lines per file from the PR | ||
|
|
@@ -134,31 +190,34 @@ def post_pr_review(self, number, summary, inline_comments): | |
| else: | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. BUG: When There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. EDGE_CASE: When There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. EDGE_CASE: If |
||
| invalid_comments.append(ic) | ||
|
|
||
| body = summary | ||
| if invalid_comments: | ||
| body += "\n\n**Additional feedback:**\n" | ||
| for ic in invalid_comments: | ||
| line_ref = f":{ic['line']}" if ic.get('line') else "" | ||
| body += f"\n`{ic['file']}{line_ref}` — {ic['comment']}\n" | ||
|
|
||
| payload = {"body": body, "event": event} | ||
| if valid_comments: | ||
| body = summary | ||
| if invalid_comments: | ||
| body += "\n\n**Additional feedback:**\n" | ||
| for ic in invalid_comments: | ||
| line_ref = f":{ic['line']}" if ic.get('line') else "" | ||
| body += f"\n`{ic['file']}{line_ref}` — {ic['comment']}\n" | ||
| payload = {"body": body, "event": "REQUEST_CHANGES", "comments": valid_comments} | ||
| try: | ||
| resp = requests.post( | ||
| f"https://api.github.com/repos/{self._repo}/pulls/{number}/reviews", | ||
| headers=self._headers, json=payload, timeout=self._timeout, | ||
| ) | ||
| if resp.status_code in (200, 201): | ||
| return True | ||
| logger.error(f"PR review API failed: {resp.status_code}, falling back to comment") | ||
| except Exception as e: | ||
| logger.error(f"PR review API failed: {e}, falling back to comment") | ||
|
|
||
| # Fallback: post all as regular comment | ||
| all_comments = inline_comments | ||
| payload["comments"] = valid_comments | ||
|
|
||
| try: | ||
| resp = requests.post( | ||
| f"https://api.github.com/repos/{self._repo}/pulls/{number}/reviews", | ||
| headers=self._headers, json=payload, timeout=self._timeout, | ||
| ) | ||
| if resp.status_code in (200, 201): | ||
| return True | ||
| logger.error(f"PR review API failed: {resp.status_code}, falling back to comment") | ||
| logger.error(f"Response: {resp.text[:500]}") | ||
| except Exception as e: | ||
| logger.error(f"PR review API failed: {e}, falling back to comment") | ||
|
|
||
| # Fallback: post as regular comment if review API fails | ||
| body = summary | ||
| if all_comments: | ||
| if inline_comments: | ||
| body += "\n\n**Inline feedback:**\n" | ||
| for ic in all_comments: | ||
| for ic in inline_comments: | ||
| line_ref = f":{ic['line']}" if ic.get('line') else "" | ||
| body += f"\n`{ic['file']}{line_ref}` — {ic['comment']}\n" | ||
| return self._post(f"/repos/{self._repo}/issues/{number}/comments", {"body": body}) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
BUG: The
workflowsfield inworkflow_runtrigger must match the workflow name (thename:key in the YAML), not the file path.".github/workflows/base.yml"is a file path, not a workflow name. This workflow will never trigger unless base.yml hasname: .github/workflows/base.yml. Change this to the actualname:value from base.yml (e.g.,"CI"or whatever it's named).