pulseengine · avrabe · Apr 27, 2026 · Apr 27, 2026
@@ -319,10 +319,14 @@ describe('E2E webhook simulation', () => {
 
     // The webhook signature is valid so Probot accepts it. The handler will
     // attempt GitHub API calls (e.g. checking org membership) which fail
-    // without a real connection, so Probot may return 500 from the handler
-    // error. 200/202 (accepted) and 500 (handler error) all confirm the
-    // signature was validated and the event was dispatched to the handler.
-    expect([200, 202, 500]).toContain(res.status);
+    // without a real installation. Probot returns one of:
+    //   200/202: signature validated, handler completed (handler may have
+    //            silently returned without making API calls)
+    //   401:     handler proceeded and tried to acquire an installation
+    //            token but credentials are fake
+    //   500:     unhandled exception inside the handler
+    // All four confirm the signature was validated and the event dispatched.
+    expect([200, 202, 401, 500]).toContain(res.status);
   }, 15000);
 
   // -----------------------------------------------------------------------

@@ -629,6 +629,25 @@ describe('ai-review', () => {
       const callArgs = global.fetch.mock.calls[0];
       expect(callArgs[1].signal).toBeInstanceOf(AbortSignal);
     });
+
+    it('passes responseFormat through to the wire payload as response_format', async () => {
+      mockFetchSuccess('{"verdict":"approve","summary":"x","findings":[]}');
+      const fmt = { type: 'json_schema', json_schema: { name: 'X', strict: true, schema: { type: 'object' } } };
+
+      await callLocalAI(mockEndpoint, mockModel, mockSystemPrompt, mockUserPrompt, {
+        responseFormat: fmt
+      });
+
+      const body = JSON.parse(global.fetch.mock.calls[0][1].body);
+      expect(body.response_format).toEqual(fmt);
+    });
+
+    it('omits response_format when not requested (legacy compat)', async () => {
+      mockFetchSuccess('plain prose');
+      await callLocalAI(mockEndpoint, mockModel, mockSystemPrompt, mockUserPrompt);
+      const body = JSON.parse(global.fetch.mock.calls[0][1].body);
+      expect(body.response_format).toBeUndefined();
+    });
   });
 
   // =========================================================================

@@ -137,17 +137,41 @@ function setupApp(options = {}) {
 }
 
 function createMockOctokit() {
+  // Production code now goes through octokit.request() for all issue ops via
+  // the issueOps helper (was octokit.issues.X — that namespace turned out to
+  // be undefined at runtime for both pull_request.opened and
+  // issue_comment.created in Probot v14). Existing assertions still target
+  // .issues.X jest.fn()s; route the request mock into them by route so
+  // assertions keep working unchanged.
+  const issuesCreate = jest.fn().mockResolvedValue({ data: { number: 42 } });
+  const issuesCreateComment = jest.fn().mockResolvedValue({ data: { id: 100 } });
+  const issuesUpdateComment = jest.fn().mockResolvedValue({});
+  const issuesDeleteComment = jest.fn().mockResolvedValue({});
+
+  const routeDispatch = {
+    'POST /repos/{owner}/{repo}/issues': issuesCreate,
+    'POST /repos/{owner}/{repo}/issues/{issue_number}/comments': issuesCreateComment,
+    'PATCH /repos/{owner}/{repo}/issues/comments/{comment_id}': issuesUpdateComment,
+    'DELETE /repos/{owner}/{repo}/issues/comments/{comment_id}': issuesDeleteComment
+  };
+
+  const request = jest.fn((route, params) => {
+    const dispatch = routeDispatch[route];
+    if (dispatch) return dispatch(params);
+    return Promise.resolve({ status: 200, data: {} });
+  });
+
   return {
     issues: {
-      create: jest.fn().mockResolvedValue({ data: { number: 42 } }),
-      createComment: jest.fn().mockResolvedValue({ data: { id: 100 } }),
-      updateComment: jest.fn().mockResolvedValue({}),
-      deleteComment: jest.fn().mockResolvedValue({})
+      create: issuesCreate,
+      createComment: issuesCreateComment,
+      updateComment: issuesUpdateComment,
+      deleteComment: issuesDeleteComment
     },
     repos: {
       getBranch: jest.fn().mockResolvedValue({ data: { name: 'main' } })
     },
-    request: jest.fn().mockResolvedValue({ status: 200, data: {} })
+    request
   };
 }
 
@@ -1105,7 +1129,11 @@ describe('app', () => {
 
         const { handlers } = setupApp();
         const context = createIssueCommentContext('/generate-dependabot');
-        context.octokit.request.mockResolvedValue({ data: { default_branch: 'main' } });
+        // mockImplementationOnce so the dispatcher in createMockOctokit still
+        // routes subsequent createComment calls into the namespaced jest.fn.
+        context.octokit.request.mockImplementationOnce(() =>
+          Promise.resolve({ data: { default_branch: 'main' } })
+        );
         await handlers['issue_comment.created'](context);
 
         expect(generateDependabotConfig).toHaveBeenCalledWith(context.octokit, 'myorg', 'myrepo', 'main');
@@ -1123,7 +1151,11 @@ describe('app', () => {
 
         const { handlers } = setupApp();
         const context = createIssueCommentContext('/generate-dependabot');
-        context.octokit.request.mockResolvedValue({ data: { default_branch: 'main' } });
+        // mockImplementationOnce so the dispatcher in createMockOctokit still
+        // routes subsequent createComment calls into the namespaced jest.fn.
+        context.octokit.request.mockImplementationOnce(() =>
+          Promise.resolve({ data: { default_branch: 'main' } })
+        );
         await handlers['issue_comment.created'](context);
 
         expect(applyDependabotConfig).not.toHaveBeenCalled();
@@ -1136,7 +1168,11 @@ describe('app', () => {
 
         const { handlers } = setupApp();
         const context = createIssueCommentContext('/generate-dependabot');
-        context.octokit.request.mockResolvedValue({ data: { default_branch: 'main' } });
+        // mockImplementationOnce so the dispatcher in createMockOctokit still
+        // routes subsequent createComment calls into the namespaced jest.fn.
+        context.octokit.request.mockImplementationOnce(() =>
+          Promise.resolve({ data: { default_branch: 'main' } })
+        );
         await handlers['issue_comment.created'](context);
 
         const body = context.octokit.issues.createComment.mock.calls[0][0].body;

@@ -1,5 +1,7 @@
 import {
   STRICT_SYSTEM_PROMPT,
+  REVIEW_JSON_SCHEMA,
+  buildResponseFormat,
   isSlop,
   tryParseReview,
   filterFindings,
@@ -16,6 +18,40 @@ describe('STRICT_SYSTEM_PROMPT', () => {
   });
 });
 
+describe('REVIEW_JSON_SCHEMA', () => {
+  it('declares the verdict enum exactly matching the parser', () => {
+    expect(REVIEW_JSON_SCHEMA.properties.verdict.enum).toEqual([
+      'approve', 'comment', 'request_changes'
+    ]);
+  });
+
+  it('marks verdict, summary, findings as required', () => {
+    expect(REVIEW_JSON_SCHEMA.required.sort()).toEqual(
+      ['findings', 'summary', 'verdict']
+    );
+  });
+
+  it('rejects extra properties (additionalProperties: false)', () => {
+    expect(REVIEW_JSON_SCHEMA.additionalProperties).toBe(false);
+    expect(REVIEW_JSON_SCHEMA.properties.findings.items.additionalProperties).toBe(false);
+  });
+
+  it('every finding field used by the parser is declared in the schema', () => {
+    const fields = Object.keys(REVIEW_JSON_SCHEMA.properties.findings.items.properties).sort();
+    expect(fields).toEqual(['claim', 'file', 'line', 'quoted_line']);
+  });
+});
+
+describe('buildResponseFormat', () => {
+  it('returns OpenAI-compat json_schema payload', () => {
+    const rf = buildResponseFormat();
+    expect(rf.type).toBe('json_schema');
+    expect(rf.json_schema.name).toBe('PullRequestReview');
+    expect(rf.json_schema.strict).toBe(true);
+    expect(rf.json_schema.schema).toBe(REVIEW_JSON_SCHEMA);
+  });
+});
+
 describe('isSlop', () => {
   it.each([
     'consider edge cases here',

@@ -50,6 +50,65 @@ Example with a finding:
 Example with no findings:
 {"verdict":"approve","summary":"Pure docs change in README.","findings":[]}`;
 
+/**
+ * JSON Schema describing the strict review payload. Used as a wire-format
+ * contract: passed to Ollama via OpenAI-compat `response_format` so the
+ * decoder is grammar-constrained at generation time. The model literally
+ * cannot emit a token sequence that violates the schema.
+ *
+ * Belt-and-suspenders relative to the prompt rules above: the prompt
+ * teaches the *intent* (banned hedging, quote-or-die etc.) while the schema
+ * enforces the *shape*. Today's wrong-shape failures (model emitting
+ * `{"commit_message":"..."}` instead of the review object) become
+ * impossible.
+ */
+export const REVIEW_JSON_SCHEMA = {
+  type: 'object',
+  additionalProperties: false,
+  properties: {
+    verdict: {
+      type: 'string',
+      enum: ['approve', 'comment', 'request_changes']
+    },
+    summary: {
+      type: 'string',
+      maxLength: 400
+    },
+    findings: {
+      type: 'array',
+      items: {
+        type: 'object',
+        additionalProperties: false,
+        properties: {
+          file: { type: 'string', minLength: 1 },
+          line: { type: 'integer', minimum: 1 },
+          quoted_line: { type: 'string', minLength: 1 },
+          claim: { type: 'string', minLength: 5, maxLength: 400 }
+        },
+        required: ['file', 'line', 'quoted_line', 'claim']
+      }
+    }
+  },
+  required: ['verdict', 'summary', 'findings']
+};
+
+/**
+ * Build the OpenAI-compat `response_format` payload for grammar-constrained
+ * JSON output. Ollama (≥0.5) honours this on /v1/chat/completions; older
+ * builds may fall back to plain JSON mode or ignore it entirely (in which
+ * case the parser still catches non-conforming output).
+ */
+export function buildResponseFormat() {
+  return {
+    type: 'json_schema',
+    json_schema: {
+      name: 'PullRequestReview',
+      strict: true,
+      schema: REVIEW_JSON_SCHEMA
+    }
+  };
+}
+
 const HEDGE_OR_SLOP_PATTERNS = [
   /\bmight\b/i,
   /\bcould\b/i,

@@ -8,7 +8,8 @@ import {
   tryParseReview,
   filterFindings,
   computeVerdict,
-  renderReviewMarkdown
+  renderReviewMarkdown,
+  buildResponseFormat
 } from './ai-review-prompt.js';
 import { runRivetOracle } from './rivet-oracle.js';
 import { hasRivetYaml, withTempRepoCheckout } from './rivet-fetch.js';
@@ -223,25 +224,38 @@ function buildReviewPrompt(prData, diff, files, maxDiffSize) {
 }
 
 async function callLocalAI(endpoint, model, systemPrompt, userPrompt, options = {}) {
-  const { maxTokens = 2000, temperature = 0.3, timeout = 300000 } = options;
+  const {
+    maxTokens = 2000,
+    temperature = 0.3,
+    timeout = 300000,
+    responseFormat = undefined
+  } = options;
 
   const controller = new AbortController();
   const timer = setTimeout(() => controller.abort(), timeout);
 
+  const payload = {
+    model,
+    messages: [
+      { role: 'system', content: systemPrompt },
+      { role: 'user', content: userPrompt }
+    ],
+    max_tokens: maxTokens,
+    temperature,
+    stream: false
+  };
+  if (responseFormat) {
+    // OpenAI-compat: forces grammar-constrained output. Ollama honours
+    // this on /v1/chat/completions in recent builds. Older endpoints that
+    // ignore it fall through to the strict-prompt + parser path.
+    payload.response_format = responseFormat;
+  }
+
   try {
     const response = await fetch(endpoint, {
       method: 'POST',
       headers: { 'Content-Type': 'application/json' },
-      body: JSON.stringify({
-        model,
-        messages: [
-          { role: 'system', content: systemPrompt },
-          { role: 'user', content: userPrompt }
-        ],
-        max_tokens: maxTokens,
-        temperature,
-        stream: false
-      }),
+      body: JSON.stringify(payload),
       signal: controller.signal
     });
 
@@ -438,7 +452,10 @@ async function reviewPullRequest(octokit, owner, repo, prNumber) {
       {
         maxTokens: aiConfig.max_tokens || 2000,
         temperature: aiConfig.temperature || 0.3,
-        timeout: aiConfig.timeout || 120000
+        timeout: aiConfig.timeout || 120000,
+        // Grammar-constrained output in strict mode only — legacy freeform
+        // path expects prose, would get rejected by JSON-schema decoding.
+        responseFormat: useStrictMode ? buildResponseFormat() : undefined
       }
     );