Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 70 additions & 1 deletion __tests__/unit/ai-review-prompt.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -195,9 +195,28 @@ describe('computeVerdict (deterministic, ignores model)', () => {
expect(computeVerdict(null)).toBe('approve');
});

it('returns comment when findings exist', () => {
it('returns comment when only model findings exist', () => {
expect(computeVerdict([{ file: 'a', line: 1, quoted_line: 'x', claim: 'y' }])).toBe('comment');
});

it('returns comment when oracle findings are warning/info only', () => {
expect(computeVerdict([
{ source: 'oracle:rivet-validate', severity: 'warning', artifact_id: 'X', claim: 'y' }
])).toBe('comment');
});

it('promotes to request_changes when ANY oracle finding has severity error', () => {
expect(computeVerdict([
{ source: 'oracle:rivet-validate', severity: 'warning', artifact_id: 'X', claim: 'a' },
{ source: 'oracle:rivet-validate', severity: 'error', artifact_id: 'Y', claim: 'b' }
])).toBe('request_changes');
});

it('does NOT promote to request_changes for model findings (no source) at "error" — only oracle has evidence-grade errors', () => {
expect(computeVerdict([
{ file: 'a', line: 1, quoted_line: 'x', claim: 'y', severity: 'error' }
])).toBe('comment');
});
});

describe('renderReviewMarkdown', () => {
Expand Down Expand Up @@ -245,6 +264,56 @@ describe('renderReviewMarkdown', () => {
expect(out).not.toContain('✅ Approve');
});

it('renders oracle findings with severity badge and artifact_id (not file:line)', () => {
const out = renderReviewMarkdown(
{
verdict: 'comment',
summary: 'Run includes rivet validate.',
findings: [
{
source: 'oracle:rivet-validate',
severity: 'error',
artifact_id: 'REQ-001',
claim: 'REQ-001: missing required field x'
},
{
source: 'oracle:rivet-impact',
severity: 'warning',
artifact_id: 'OLD-1',
claim: 'Artifact removed: OLD-1.'
}
]
},
99, 'def5678', meta
);
expect(out).toContain('🔴 `REQ-001`');
expect(out).toContain('rivet-validate');
expect(out).toContain('🟡 `OLD-1`');
expect(out).toContain('rivet-impact');
expect(out).toContain('🔴 Request changes'); // because severity:error oracle finding
});

it('renders mixed oracle + model findings in two visual styles', () => {
const out = renderReviewMarkdown(
{
verdict: 'comment',
summary: 'Mixed.',
findings: [
{
source: 'oracle:rivet-validate',
severity: 'warning',
artifact_id: 'REQ-100',
claim: 'REQ-100: lifecycle gap'
},
{ file: 'a.js', line: 5, quoted_line: '+x', claim: 'no test for x at a.js:5' }
]
},
99, 'def5678', meta
);
expect(out).toContain('🟡 `REQ-100`');
expect(out).toContain('`a.js:5`');
});

it('still posts when caller opts out of skip-approve-empty', () => {
const out = renderReviewMarkdown(
{ verdict: 'approve', summary: 'Doc only.', findings: [] },
Expand Down
113 changes: 113 additions & 0 deletions __tests__/unit/rivet-fetch.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import { EventEmitter } from 'node:events';
import {
hasRivetYaml,
extractTarballBuffer,
fetchAndExtractTarball
} from '../../src/rivet-fetch.js';

function mockOctokit({ contents = null, contentsStatus = 200, tarball = null } = {}) {
return {
request: jest.fn().mockImplementation((route /*, params */) => {
if (route === 'GET /repos/{owner}/{repo}/contents/{path}') {
if (contentsStatus === 404) {
const e = new Error('Not Found'); e.status = 404;
return Promise.reject(e);
}
if (contentsStatus >= 500) {
const e = new Error('Server'); e.status = contentsStatus;
return Promise.reject(e);
}
return Promise.resolve({ status: 200, data: contents });
}
if (route === 'GET /repos/{owner}/{repo}/tarball/{ref}') {
return Promise.resolve({ status: 200, data: tarball || Buffer.from('fake-tarball') });
}
return Promise.resolve({ status: 200, data: {} });
})
};
}

function makeFakeSpawn({ exitCode = 0, errorOnSpawn = null, stderr = '' } = {}) {
return jest.fn(() => {
const proc = new EventEmitter();
proc.stdin = new EventEmitter();
proc.stdin.end = jest.fn(() => {
// Simulate async exit shortly after stdin closed.
setImmediate(() => proc.emit('exit', exitCode));
});
proc.stdin.write = jest.fn();
proc.stderr = new EventEmitter();
proc.stdout = new EventEmitter();
if (errorOnSpawn) {
setImmediate(() => proc.emit('error', new Error(errorOnSpawn)));
} else if (stderr) {
setImmediate(() => proc.stderr.emit('data', Buffer.from(stderr)));
}
return proc;
});
}

describe('hasRivetYaml', () => {
it('returns true on 200 (file exists)', async () => {
const oct = mockOctokit({ contents: { name: 'rivet.yaml' } });
expect(await hasRivetYaml(oct, 'o', 'r', 'main')).toBe(true);
});

it('returns false on 404 (file absent)', async () => {
const oct = mockOctokit({ contentsStatus: 404 });
expect(await hasRivetYaml(oct, 'o', 'r', 'main')).toBe(false);
});

it('fails safe (returns false) on auth/server errors', async () => {
const oct = mockOctokit({ contentsStatus: 503 });
expect(await hasRivetYaml(oct, 'o', 'r', 'main')).toBe(false);
});
});

describe('extractTarballBuffer', () => {
it('resolves on tar exit code 0', async () => {
const spawnFn = makeFakeSpawn({ exitCode: 0 });
await expect(extractTarballBuffer(Buffer.from('x'), '/tmp/dest', { spawnFn }))
.resolves.toBeUndefined();
expect(spawnFn).toHaveBeenCalledWith(
'tar',
['-xz', '--strip-components=1', '-C', '/tmp/dest'],
expect.objectContaining({ stdio: ['pipe', 'pipe', 'pipe'] })
);
});

it('rejects on non-zero exit with stderr in message', async () => {
const spawnFn = makeFakeSpawn({ exitCode: 1, stderr: 'tar: bad gzip' });
await expect(extractTarballBuffer(Buffer.from('x'), '/tmp/dest', { spawnFn }))
.rejects.toThrow(/tar exited 1/);
});

it('rejects when tar cannot be spawned', async () => {
const spawnFn = makeFakeSpawn({ errorOnSpawn: 'ENOENT' });
await expect(extractTarballBuffer(Buffer.from('x'), '/tmp/dest', { spawnFn }))
.rejects.toThrow(/ENOENT/);
});
});

describe('fetchAndExtractTarball', () => {
it('requests the tarball and pipes it through tar', async () => {
const oct = mockOctokit({ tarball: Buffer.from('hello-tar') });
const spawnFn = makeFakeSpawn({ exitCode: 0 });
await fetchAndExtractTarball(oct, 'o', 'r', 'sha123', '/tmp/dest', { spawnFn });
expect(oct.request).toHaveBeenCalledWith(
'GET /repos/{owner}/{repo}/tarball/{ref}',
{ owner: 'o', repo: 'r', ref: 'sha123' }
);
expect(spawnFn).toHaveBeenCalledTimes(1);
});

it('handles ArrayBuffer responses (Octokit binary default)', async () => {
const buf = new ArrayBuffer(8);
new Uint8Array(buf).set([1, 2, 3, 4, 5, 6, 7, 8]);
const oct = mockOctokit({ tarball: buf });
const spawnFn = makeFakeSpawn({ exitCode: 0 });
await expect(
fetchAndExtractTarball(oct, 'o', 'r', 'sha123', '/tmp/dest', { spawnFn })
).resolves.toBeUndefined();
});
});
10 changes: 10 additions & 0 deletions config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,16 @@ ai_review:
timeout: 300000
allow_remote_endpoint: false

# Mechanical oracle: when the target repo ships rivet.yaml and the rivet
# binary is installed, run `rivet validate` and `rivet impact --since=<base>`
# against a fresh tarball of the PR head. Findings from the oracle are
# mechanically validated and bypass the model entirely. Errors from the
# oracle promote the verdict to request_changes.
rivet_oracle:
enabled: true
binary_path: "data/rivet/rivet"
timeout_ms: 60000

scheduler:
interval_minutes: 5
max_tasks_per_tick: 5
Expand Down
32 changes: 27 additions & 5 deletions src/ai-review-prompt.js
Original file line number Diff line number Diff line change
Expand Up @@ -155,9 +155,19 @@ export function filterFindings(findings, diffText) {
/**
* Compute the verdict deterministically from validated findings.
* The model's `verdict` is advisory; this function decides.
*
* Promotion rules:
* - any oracle finding with severity 'error' → request_changes
* (these are evidence-grade: broken cross-refs, schema violations,
* missing required fields — the oracle has already said no)
* - otherwise any finding → comment
* - no findings → approve
*/
export function computeVerdict(findings) {
if (!Array.isArray(findings) || findings.length === 0) return 'approve';
if (findings.some((f) => f && f.severity === 'error' && typeof f.source === 'string' && f.source.startsWith('oracle:'))) {
return 'request_changes';
}
return 'comment';
}

Expand Down Expand Up @@ -198,11 +208,23 @@ export function renderReviewMarkdown(parsed, prNumber, headSha, meta, opts = {})
lines.push(`**Findings (${findings.length}):**`);
lines.push('');
findings.forEach((f, i) => {
lines.push(`${i + 1}. \`${f.file}:${f.line}\``);
lines.push(' ```');
lines.push(` ${f.quoted_line}`);
lines.push(' ```');
lines.push(` ${f.claim}`);
if (typeof f.source === 'string' && f.source.startsWith('oracle:')) {
// Oracle finding: artifact-anchored, mechanically validated.
const sevBadge =
f.severity === 'error' ? '🔴'
: f.severity === 'warning' ? '🟡'
: 'ℹ️';
const oracleName = f.source.replace('oracle:', '');
lines.push(`${i + 1}. ${sevBadge} \`${f.artifact_id || '<unknown>'}\` _(${oracleName})_`);
lines.push(` ${f.claim}`);
} else {
// Model finding: file:line-anchored, post-validated.
lines.push(`${i + 1}. \`${f.file}:${f.line}\``);
lines.push(' ```');
lines.push(` ${f.quoted_line}`);
lines.push(' ```');
lines.push(` ${f.claim}`);
}
lines.push('');
});
}
Expand Down
83 changes: 72 additions & 11 deletions src/ai-review.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ import {
computeVerdict,
renderReviewMarkdown
} from './ai-review-prompt.js';
import { runRivetOracle } from './rivet-oracle.js';
import { hasRivetYaml, withTempRepoCheckout } from './rivet-fetch.js';

const _reviewTimestamps = new Map();

Expand Down Expand Up @@ -371,6 +373,43 @@ async function reviewPullRequest(octokit, owner, repo, prNumber) {
{ owner, repo, pull_number: prNumber }
);

// Mechanical oracle: only runs for repos that ship rivet.yaml AND have
// the binary configured. Failures are non-fatal — the model path still
// runs.
let oracleFindings = [];
let oracleSummary = null;
const rivetCfg = config.rivet_oracle;
if (rivetCfg?.enabled && rivetCfg?.binary_path) {
const headSha = prData.data.head?.sha;
const baseSha = prData.data.base?.sha;
try {
if (headSha && (await hasRivetYaml(octokit, owner, repo, headSha))) {
oracleSummary = await withTempRepoCheckout(
octokit,
owner,
repo,
headSha,
(repoPath) => runRivetOracle(rivetCfg.binary_path, repoPath, {
baseRef: baseSha,
timeout: rivetCfg.timeout_ms || 60000
})
);
if (oracleSummary?.findings) {
oracleFindings = oracleSummary.findings;
}
getLogger().info(
{ pr: prNumber, oracleFindings: oracleFindings.length },
'Rivet oracle complete'
);
}
} catch (err) {
getLogger().warn(
{ pr: prNumber, err: err.message },
'Rivet oracle failed — continuing with model-only review'
);
}
}

// Strict-JSON contract by default; legacy freeform if user explicitly
// overrides system_prompt in config.
const systemPrompt = aiConfig.system_prompt || STRICT_SYSTEM_PROMPT;
Expand Down Expand Up @@ -412,27 +451,49 @@ async function reviewPullRequest(octokit, owner, repo, prNumber) {

if (useStrictMode) {
const parsed = tryParseReview(aiResponse);
if (!parsed.ok) {
let modelFindings = [];
let modelSummary = null;

if (parsed.ok) {
modelFindings = filterFindings(parsed.data.findings, diffText);
modelSummary = parsed.data.summary;
const droppedCount = parsed.data.findings.length - modelFindings.length;
if (droppedCount > 0) {
getLogger().info(
{ pr: prNumber, droppedCount, kept: modelFindings.length },
'Filtered out hedging/unquoted findings from model output'
);
}
} else {
getLogger().warn(
{ pr: prNumber, error: parsed.error, sample: aiResponse.slice(0, 200) },
'AI review JSON parse failed — skipping post (silence is better than slop)'
'AI review JSON parse failed'
);
return { success: false, error: `parse: ${parsed.error}`, skipped: true };
}

const filtered = filterFindings(parsed.data.findings, diffText);
const droppedCount = parsed.data.findings.length - filtered.length;
if (droppedCount > 0) {
// Oracle findings come first — they're mechanically validated, not
// subject to the slop filter, and most readable when grouped.
const allFindings = [...oracleFindings, ...modelFindings];

// If model failed AND oracle had nothing → skip post entirely.
if (!parsed.ok && oracleFindings.length === 0) {
getLogger().info(
{ pr: prNumber, droppedCount, kept: filtered.length },
'Filtered out hedging/unquoted findings'
{ pr: prNumber },
'AI review skipped: model parse failed and no oracle findings'
);
return { success: false, error: `parse: ${parsed.error}`, skipped: true };
}

const renderInput = { ...parsed.data, findings: filtered };
// Compose summary: prefer the model's, fall back to oracle-derived.
const summary = modelSummary ||
(oracleFindings.length > 0
? `${oracleFindings.length} mechanical finding(s) from rivet oracle.`
: 'No findings.');

const renderInput = { summary, findings: allFindings };
comment = renderReviewMarkdown(renderInput, prNumber, headSha, meta);
assessment = computeVerdict(filtered);
findingsCount = filtered.length;
assessment = computeVerdict(allFindings);
findingsCount = allFindings.length;

if (comment === null) {
getLogger().info(
Expand Down
Loading
Loading