From 724f7ca6cfe746ed7d6a4e94cf79cc78f02ce365 Mon Sep 17 00:00:00 2001 From: janhesters Date: Tue, 14 Apr 2026 21:25:38 +0200 Subject: [PATCH 1/3] Move AI eval to daily cron and reduce runs to 1 AI evals were running per-PR and hitting Claude rate limits, blocking merges. Move to a dedicated daily workflow (midnight UTC) with manual dispatch, and reduce --runs from 4 to 1 to stay within rate limits. --- .github/workflows/ai-eval.yml | 46 +++++++++++++++++++++++++++++++++++ .github/workflows/test.yml | 40 ------------------------------ package.json | 2 +- 3 files changed, 47 insertions(+), 41 deletions(-) create mode 100644 .github/workflows/ai-eval.yml diff --git a/.github/workflows/ai-eval.yml b/.github/workflows/ai-eval.yml new file mode 100644 index 00000000..daf3bb4a --- /dev/null +++ b/.github/workflows/ai-eval.yml @@ -0,0 +1,46 @@ +name: AI Eval + +on: + schedule: + - cron: "0 0 * * *" + workflow_dispatch: + +jobs: + ai-eval: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Setup Node.js 22 + uses: actions/setup-node@v4 + with: + node-version: 22 + cache: 'npm' + + - name: Install dependencies + run: npm install + + - name: Install Claude Code + run: npm install -g @anthropic-ai/claude-code + + - name: Check Claude authentication + id: claude-auth + run: node scripts/check-claude-ai-eval-gate.js + env: + CLAUDE_CODE_OAUTH_TOKEN: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} + + - name: Run AI prompt evaluations + if: steps.claude-auth.outputs.available == 'true' + env: + CLAUDE_CODE_OAUTH_TOKEN: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} + run: npm run test:ai-eval + + - name: Upload AI eval responses + if: always() && steps.claude-auth.outputs.available == 'true' + uses: actions/upload-artifact@v4 + with: + name: ai-eval-responses + path: ai-evals/*.responses.md + retention-days: 14 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 75008374..27a96b30 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -32,43 +32,3 @@ jobs: - name: Run tests run: npm test - - ai-eval: - runs-on: ubuntu-latest - needs: test - - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Setup Node.js 22 - uses: actions/setup-node@v4 - with: - node-version: 22 - cache: 'npm' - - - name: Install dependencies - run: npm install - - - name: Install Claude Code - run: npm install -g @anthropic-ai/claude-code - - - name: Check Claude authentication - id: claude-auth - run: node scripts/check-claude-ai-eval-gate.js - env: - CLAUDE_CODE_OAUTH_TOKEN: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} - - - name: Run AI prompt evaluations - if: steps.claude-auth.outputs.available == 'true' - env: - CLAUDE_CODE_OAUTH_TOKEN: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} - run: npm run test:ai-eval - - - name: Upload AI eval responses - if: always() && steps.claude-auth.outputs.available == 'true' - uses: actions/upload-artifact@v4 - with: - name: ai-eval-responses - path: ai-evals/*.responses.md - retention-days: 14 diff --git a/package.json b/package.json index 53d54bfa..3eae00af 100644 --- a/package.json +++ b/package.json @@ -105,7 +105,7 @@ "prepare": "husky", "release": "node release.js", "test": "vitest run && echo 'Test complete.' && npm run -s lint && npm run -s typecheck", - "test:ai-eval": "riteway ai ai-evals/aidd-review/review-skill-test.sudo --runs 4 --threshold 75 --timeout 600000 --agent claude --color --save-responses", + "test:ai-eval": "riteway ai ai-evals/aidd-review/review-skill-test.sudo --runs 1 --threshold 75 --timeout 600000 --agent claude --color --save-responses", "test:e2e": "vitest run **/*-e2e.test.js && echo 'E2E tests complete.'", "test:unit": "vitest run --exclude '**/*-e2e.test.js' && echo 'Unit tests complete.' && npm run -s lint && npm run -s typecheck", "toc": "doctoc README.md", From 8e6c1f548fcadc8db3aa2299f25cbdb026b53723 Mon Sep 17 00:00:00 2001 From: janhesters Date: Tue, 14 Apr 2026 21:29:55 +0200 Subject: [PATCH 2/3] Notify #ai-test-reports Slack channel on AI eval failure Posts a message with a link to the failed run when the daily AI eval fails. Requires SLACK_BOT_TOKEN secret with chat:write scope. --- .github/workflows/ai-eval.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.github/workflows/ai-eval.yml b/.github/workflows/ai-eval.yml index daf3bb4a..78567428 100644 --- a/.github/workflows/ai-eval.yml +++ b/.github/workflows/ai-eval.yml @@ -44,3 +44,13 @@ jobs: name: ai-eval-responses path: ai-evals/*.responses.md retention-days: 14 + + - name: Notify Slack on failure + if: failure() && steps.claude-auth.outputs.available == 'true' + uses: slackapi/slack-github-action@v2.1.0 + with: + method: chat.postMessage + token: ${{ secrets.SLACK_BOT_TOKEN }} + payload: | + channel: "C0A5ZRP7XR5" + text: "🔴 AI Eval failed on `main` — <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View run>" From 64562a1df797e8b6b9e292e3d4dc9427a4bdcd22 Mon Sep 17 00:00:00 2001 From: janhesters Date: Tue, 14 Apr 2026 23:57:35 +0200 Subject: [PATCH 3/3] Restrict AI eval to main branch and fix Slack message Add job-level guard so workflow_dispatch on non-main branches is a no-op. Use github.ref_name in Slack message instead of hardcoded main. --- .github/workflows/ai-eval.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ai-eval.yml b/.github/workflows/ai-eval.yml index 78567428..5f9aa679 100644 --- a/.github/workflows/ai-eval.yml +++ b/.github/workflows/ai-eval.yml @@ -7,6 +7,7 @@ on: jobs: ai-eval: + if: github.ref == 'refs/heads/main' runs-on: ubuntu-latest steps: @@ -53,4 +54,4 @@ jobs: token: ${{ secrets.SLACK_BOT_TOKEN }} payload: | channel: "C0A5ZRP7XR5" - text: "🔴 AI Eval failed on `main` — <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View run>" + text: "🔴 AI Eval failed on `${{ github.ref_name }}` — <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View run>"