Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions .github/workflows/ai-eval.yml
Comment thread
ianwhitedeveloper marked this conversation as resolved.
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
name: AI Eval

on:
schedule:
- cron: "0 0 * * *"
workflow_dispatch:

jobs:
ai-eval:
if: github.ref == 'refs/heads/main'
runs-on: ubuntu-latest

steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Setup Node.js 22
uses: actions/setup-node@v4
with:
node-version: 22
cache: 'npm'

- name: Install dependencies
run: npm install

- name: Install Claude Code
run: npm install -g @anthropic-ai/claude-code

- name: Check Claude authentication
id: claude-auth
run: node scripts/check-claude-ai-eval-gate.js
env:
CLAUDE_CODE_OAUTH_TOKEN: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}

- name: Run AI prompt evaluations
if: steps.claude-auth.outputs.available == 'true'
env:
CLAUDE_CODE_OAUTH_TOKEN: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
run: npm run test:ai-eval

- name: Upload AI eval responses
if: always() && steps.claude-auth.outputs.available == 'true'
uses: actions/upload-artifact@v4
with:
name: ai-eval-responses
path: ai-evals/*.responses.md
retention-days: 14

- name: Notify Slack on failure
if: failure() && steps.claude-auth.outputs.available == 'true'
uses: slackapi/slack-github-action@v2.1.0
with:
method: chat.postMessage
token: ${{ secrets.SLACK_BOT_TOKEN }}
payload: |
channel: "C0A5ZRP7XR5"
text: "🔴 AI Eval failed on `${{ github.ref_name }}` — <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View run>"
40 changes: 0 additions & 40 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,43 +32,3 @@ jobs:

- name: Run tests
run: npm test

ai-eval:
runs-on: ubuntu-latest
needs: test

steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Setup Node.js 22
uses: actions/setup-node@v4
with:
node-version: 22
cache: 'npm'

- name: Install dependencies
run: npm install

- name: Install Claude Code
run: npm install -g @anthropic-ai/claude-code

- name: Check Claude authentication
id: claude-auth
run: node scripts/check-claude-ai-eval-gate.js
env:
CLAUDE_CODE_OAUTH_TOKEN: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}

- name: Run AI prompt evaluations
if: steps.claude-auth.outputs.available == 'true'
env:
CLAUDE_CODE_OAUTH_TOKEN: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
run: npm run test:ai-eval

- name: Upload AI eval responses
if: always() && steps.claude-auth.outputs.available == 'true'
uses: actions/upload-artifact@v4
with:
name: ai-eval-responses
path: ai-evals/*.responses.md
retention-days: 14
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@
"prepare": "husky",
"release": "node release.js",
"test": "vitest run && echo 'Test complete.' && npm run -s lint && npm run -s typecheck",
"test:ai-eval": "riteway ai ai-evals/aidd-review/review-skill-test.sudo --runs 4 --threshold 75 --timeout 600000 --agent claude --color --save-responses",
"test:ai-eval": "riteway ai ai-evals/aidd-review/review-skill-test.sudo --runs 1 --threshold 75 --timeout 600000 --agent claude --color --save-responses",
"test:e2e": "vitest run **/*-e2e.test.js && echo 'E2E tests complete.'",
"test:unit": "vitest run --exclude '**/*-e2e.test.js' && echo 'Unit tests complete.' && npm run -s lint && npm run -s typecheck",
"toc": "doctoc README.md",
Expand Down
Loading