From 39b07b1b8fbfd23b7cfe0cfe58f7ce0365a24b62 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sun, 31 May 2026 18:31:52 +0000 Subject: [PATCH 1/5] fix: persist user input items to state.messages across callModel invocations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User input items (role: 'user') were never written to state.messages. The two existing saveStateSafely write sites only persist response.output and toolResults. Within a single callModel invocation this is masked because makeFollowupRequest reconstructs input from the in-memory this.resolvedRequest.input. But when a new callModel resumes from persisted state, the loaded state.messages contains zero user items — prior user turns are silently dropped. This causes two problems: 1. cache_control prompt caching is defeated at every user-message boundary (cachedTokens = 0 on first request of each new message) 2. Conversation fidelity loss — the model never sees prior user turns Fix: In initStream(), after request resolution, persist fresh user input items to state.messages via saveStateSafely before the API call. Both code paths (loaded history and no history) now capture freshItemsForState and write it to state. Co-Authored-By: Ben Heidorn --- .changeset/fix-user-input-persistence.md | 14 + packages/agent/src/lib/model-result.ts | 27 ++ .../tests/unit/user-input-persistence.test.ts | 332 ++++++++++++++++++ 3 files changed, 373 insertions(+) create mode 100644 .changeset/fix-user-input-persistence.md create mode 100644 packages/agent/tests/unit/user-input-persistence.test.ts diff --git a/.changeset/fix-user-input-persistence.md b/.changeset/fix-user-input-persistence.md new file mode 100644 index 0000000..26f3705 --- /dev/null +++ b/.changeset/fix-user-input-persistence.md @@ -0,0 +1,14 @@ +--- +'@openrouter/agent': patch +--- + +fix: persist user input items to state.messages across callModel invocations + +User input items (role: 'user') were never written to state.messages. The two +existing saveStateSafely write sites only persist response.output and +toolResults. When a new callModel resumes from persisted state, the loaded +state.messages contains zero user items — prior user turns are silently dropped. + +This causes two problems: +1. cache_control prompt caching is defeated at every user-message boundary +2. Conversation fidelity loss — the model never sees prior user turns diff --git a/packages/agent/src/lib/model-result.ts b/packages/agent/src/lib/model-result.ts index f1087b5..1cad666 100644 --- a/packages/agent/src/lib/model-result.ts +++ b/packages/agent/src/lib/model-result.ts @@ -78,6 +78,7 @@ import { isServerTool, isToolCallOutputEvent, } from './tool-types.js'; +import { normalizeInputToArray } from './turn-context.js'; /** * Typeguard for plain-object records (non-null, non-array). @@ -1498,6 +1499,13 @@ export class ModelResult< Array.isArray(this.currentState.messages) && this.currentState.messages.length > 0; + // Track fresh user input items that need to be persisted to state. + // Without this, state.messages only contains response outputs and tool + // results — prior user turns are lost when a new callModel resumes from + // persisted state, breaking conversation fidelity and cache_control + // prompt caching at user-message boundaries. + let freshItemsForState: models.BaseInputsUnion[] | undefined; + if (hasLoadedHistory && this.currentState) { // `currentState.messages` is InputsUnion — keep it as that union so // appendToMessages (which expects InputsUnion) accepts it directly. @@ -1527,6 +1535,8 @@ export class ModelResult< ? await this.applyHooksToFreshItems(freshItems, historicalMessages, initialContext) : undefined; + freshItemsForState = hookedFresh; + baseRequest = { ...baseRequest, input: hookedFresh @@ -1548,6 +1558,23 @@ export class ModelResult< ...baseRequest, input: hookedInput, }; + // All input is fresh when there's no loaded history — normalize to + // array form for state persistence. + freshItemsForState = normalizeInputToArray(hookedInput) as models.BaseInputsUnion[]; + } + + // Persist fresh user input items to state so they survive across + // callModel invocations. This ensures the resume path + // (state.messages + new input) reconstructs the full conversation. + if ( + this.stateAccessor && + this.currentState && + freshItemsForState && + freshItemsForState.length > 0 + ) { + await this.saveStateSafely({ + messages: appendToMessages(this.currentState.messages, freshItemsForState), + }); } // Store resolved request with stream mode diff --git a/packages/agent/tests/unit/user-input-persistence.test.ts b/packages/agent/tests/unit/user-input-persistence.test.ts new file mode 100644 index 0000000..995b702 --- /dev/null +++ b/packages/agent/tests/unit/user-input-persistence.test.ts @@ -0,0 +1,332 @@ +import type { OpenRouterCore } from '@openrouter/sdk/core'; +import type * as models from '@openrouter/sdk/models'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import { z } from 'zod/v4'; +import type { ConversationState, StateAccessor } from '../../src/index.js'; +import { callModel } from '../../src/inner-loop/call-model.js'; +import { ToolType } from '../../src/lib/tool-types.js'; + +const mockBetaResponsesSend = vi.hoisted(() => vi.fn()); + +vi.mock('@openrouter/sdk/funcs/betaResponsesSend', () => ({ + betaResponsesSend: mockBetaResponsesSend, +})); + +function textResponse(text: string): models.OpenResponsesResult { + return { + id: 'resp_text', + object: 'response', + createdAt: 0, + model: 'test-model', + status: 'completed', + completedAt: 0, + output: [ + { + id: 'msg_1', + type: 'message', + role: 'assistant', + status: 'completed', + content: [ + { + type: 'output_text', + text, + annotations: [], + }, + ], + }, + ], + error: null, + incompleteDetails: null, + temperature: null, + topP: null, + presencePenalty: null, + frequencyPenalty: null, + metadata: null, + instructions: null, + tools: [], + toolChoice: 'auto', + parallelToolCalls: false, + } as models.OpenResponsesResult; +} + +function toolCallResponse(callId: string, name: string, args: string): models.OpenResponsesResult { + return { + id: `resp_tc_${callId}`, + object: 'response', + createdAt: 0, + model: 'test-model', + status: 'completed', + completedAt: 0, + output: [ + { + type: 'function_call', + id: `fc_${callId}`, + callId, + name, + arguments: args, + status: 'completed', + }, + ], + error: null, + incompleteDetails: null, + temperature: null, + topP: null, + presencePenalty: null, + frequencyPenalty: null, + metadata: null, + instructions: null, + tools: [], + toolChoice: 'auto', + parallelToolCalls: false, + } as models.OpenResponsesResult; +} + +const echoTool = { + type: ToolType.Function, + function: { + name: 'echo', + description: 'Echo input', + inputSchema: z.object({ + message: z.string(), + }), + execute: async (params: { message: string }) => ({ + echoed: params.message, + }), + }, +} as const; + +const client = {} as OpenRouterCore; + +describe('User Input Persistence to State', () => { + let storedState: ConversationState | null; + let stateAccessor: StateAccessor; + + beforeEach(() => { + mockBetaResponsesSend.mockReset(); + storedState = null; + stateAccessor = { + load: async () => storedState, + save: async (state) => { + storedState = state; + }, + }; + }); + + it('persists user input items to state.messages after first callModel (no tools)', async () => { + mockBetaResponsesSend.mockResolvedValueOnce({ + ok: true, + value: textResponse('Hello back!'), + }); + + const result = callModel(client, { + model: 'test-model', + input: [ + { + role: 'user', + content: 'Hello', + }, + ], + state: stateAccessor, + }); + + await result.getText(); + + // state.messages should contain the user input AND the response output + expect(storedState).not.toBeNull(); + const messages = storedState!.messages as unknown[]; + expect(Array.isArray(messages)).toBe(true); + + const userItems = ( + messages as Array<{ + role?: string; + }> + ).filter((m) => m.role === 'user'); + expect(userItems.length).toBe(1); + expect( + ( + userItems[0] as { + content: string; + } + ).content, + ).toBe('Hello'); + }); + + it('persists string input normalized as user message', async () => { + mockBetaResponsesSend.mockResolvedValueOnce({ + ok: true, + value: textResponse('Response'), + }); + + const result = callModel(client, { + model: 'test-model', + input: 'Hello string input', + state: stateAccessor, + }); + + await result.getText(); + + expect(storedState).not.toBeNull(); + const messages = storedState!.messages as Array<{ + role?: string; + content?: string; + }>; + expect(Array.isArray(messages)).toBe(true); + + const userItems = messages.filter((m) => m.role === 'user'); + expect(userItems.length).toBe(1); + expect(userItems[0]!.content).toBe('Hello string input'); + }); + + it('persists user input alongside tool results after tool execution', async () => { + // First call: tool call, second call: text response after tool output + mockBetaResponsesSend + .mockResolvedValueOnce({ + ok: true, + value: toolCallResponse('call_1', 'echo', '{"message":"hi"}'), + }) + .mockResolvedValueOnce({ + ok: true, + value: textResponse('Done echoing.'), + }); + + const result = callModel(client, { + model: 'test-model', + input: [ + { + role: 'user', + content: 'Echo hi', + }, + ], + tools: [ + echoTool, + ] as const, + state: stateAccessor, + }); + + await result.getText(); + + expect(storedState).not.toBeNull(); + const messages = storedState!.messages as Array<{ + role?: string; + type?: string; + }>; + expect(Array.isArray(messages)).toBe(true); + + // Should contain: user input, function_call, function_call_output, message (response) + const userItems = messages.filter((m) => m.role === 'user'); + const fnCalls = messages.filter((m) => m.type === 'function_call'); + const fnOutputs = messages.filter((m) => m.type === 'function_call_output'); + + expect(userItems.length).toBe(1); + expect(fnCalls.length).toBe(1); + expect(fnOutputs.length).toBe(1); + }); + + it('second callModel sees prior user input in state on resume', async () => { + // --- First callModel --- + mockBetaResponsesSend.mockResolvedValueOnce({ + ok: true, + value: textResponse('First response'), + }); + + const result1 = callModel(client, { + model: 'test-model', + input: [ + { + role: 'user', + content: 'First message', + }, + ], + state: stateAccessor, + }); + await result1.getText(); + + // Verify first user message is in state + const messagesAfterFirst = storedState!.messages as Array<{ + role?: string; + }>; + const firstUserItems = messagesAfterFirst.filter((m) => m.role === 'user'); + expect(firstUserItems.length).toBe(1); + + // --- Second callModel (resumes from state) --- + mockBetaResponsesSend.mockResolvedValueOnce({ + ok: true, + value: textResponse('Second response'), + }); + + const result2 = callModel(client, { + model: 'test-model', + input: [ + { + role: 'user', + content: 'Second message', + }, + ], + state: stateAccessor, + }); + await result2.getText(); + + // The API request for the second call should contain BOTH user messages + const secondCallRequest = mockBetaResponsesSend.mock.calls[1]?.[1]?.responsesRequest; + expect(secondCallRequest).toBeDefined(); + + const input = secondCallRequest.input as Array<{ + role?: string; + content?: string; + }>; + expect(Array.isArray(input)).toBe(true); + + const userMessages = input.filter((i) => i.role === 'user'); + expect(userMessages.length).toBe(2); + expect(userMessages[0]!.content).toBe('First message'); + expect(userMessages[1]!.content).toBe('Second message'); + }); + + it('state.messages contains user input from both calls after two callModel invocations', async () => { + // --- First callModel --- + mockBetaResponsesSend.mockResolvedValueOnce({ + ok: true, + value: textResponse('Reply 1'), + }); + + const result1 = callModel(client, { + model: 'test-model', + input: [ + { + role: 'user', + content: 'Question 1', + }, + ], + state: stateAccessor, + }); + await result1.getText(); + + // --- Second callModel --- + mockBetaResponsesSend.mockResolvedValueOnce({ + ok: true, + value: textResponse('Reply 2'), + }); + + const result2 = callModel(client, { + model: 'test-model', + input: [ + { + role: 'user', + content: 'Question 2', + }, + ], + state: stateAccessor, + }); + await result2.getText(); + + // Final state should have both user messages + const messages = storedState!.messages as Array<{ + role?: string; + content?: string; + }>; + const userItems = messages.filter((m) => m.role === 'user'); + expect(userItems.length).toBe(2); + expect(userItems[0]!.content).toBe('Question 1'); + expect(userItems[1]!.content).toBe('Question 2'); + }); +}); From d1e4831705f4300e019cb73a420080f767846018 Mon Sep 17 00:00:00 2001 From: Ben Heidorn <301326+Cybourgeoisie@users.noreply.github.com> Date: Mon, 1 Jun 2026 12:49:13 -0400 Subject: [PATCH 2/5] chore: remove changeset for user input persistence fix Co-Authored-By: Claude Opus 4 (1M context) --- .changeset/fix-user-input-persistence.md | 14 -------------- 1 file changed, 14 deletions(-) delete mode 100644 .changeset/fix-user-input-persistence.md diff --git a/.changeset/fix-user-input-persistence.md b/.changeset/fix-user-input-persistence.md deleted file mode 100644 index 26f3705..0000000 --- a/.changeset/fix-user-input-persistence.md +++ /dev/null @@ -1,14 +0,0 @@ ---- -'@openrouter/agent': patch ---- - -fix: persist user input items to state.messages across callModel invocations - -User input items (role: 'user') were never written to state.messages. The two -existing saveStateSafely write sites only persist response.output and -toolResults. When a new callModel resumes from persisted state, the loaded -state.messages contains zero user items — prior user turns are silently dropped. - -This causes two problems: -1. cache_control prompt caching is defeated at every user-message boundary -2. Conversation fidelity loss — the model never sees prior user turns From b6ab88745f32424307e440d64809d12500e1d719 Mon Sep 17 00:00:00 2001 From: Ben Heidorn <301326+Cybourgeoisie@users.noreply.github.com> Date: Mon, 1 Jun 2026 15:32:36 -0400 Subject: [PATCH 3/5] fix: defer user input persistence to after successful API response Move the fresh-items save from before the API call (initStream) to saveResponseToState, which runs only after a successful response. This prevents duplicate user turns in state when a caller retries after an API failure. Add retry-after-failure idempotency test to verify the fix. Co-Authored-By: Claude Opus 4 (1M context) --- packages/agent/src/lib/model-result.ts | 40 +++++-------- .../tests/unit/user-input-persistence.test.ts | 56 +++++++++++++++++++ 2 files changed, 70 insertions(+), 26 deletions(-) diff --git a/packages/agent/src/lib/model-result.ts b/packages/agent/src/lib/model-result.ts index 1cad666..de2caf5 100644 --- a/packages/agent/src/lib/model-result.ts +++ b/packages/agent/src/lib/model-result.ts @@ -232,6 +232,7 @@ export class ModelResult< private approvedToolCalls: string[] = []; private rejectedToolCalls: string[] = []; private isResumingFromApproval = false; + private pendingFreshItems: models.BaseInputsUnion[] | undefined; // Unified turn broadcaster for multi-turn streaming private turnBroadcaster: ToolEventBroadcaster< @@ -492,9 +493,19 @@ export class ModelResult< response.output, ]; + // Persist any fresh user input items that were collected during + // initStream, followed by the response output. Deferring the user-input + // persist to here (rather than before the API call) avoids duplicating + // user turns in state when a caller retries after an API failure. + let messages = this.currentState.messages; + if (this.pendingFreshItems && this.pendingFreshItems.length > 0) { + messages = appendToMessages(messages, this.pendingFreshItems); + this.pendingFreshItems = undefined; + } + await this.saveStateSafely({ messages: appendToMessages( - this.currentState.messages, + messages, outputItems as models.BaseInputsUnion[], ), previousResponseId: response.id, @@ -1499,13 +1510,6 @@ export class ModelResult< Array.isArray(this.currentState.messages) && this.currentState.messages.length > 0; - // Track fresh user input items that need to be persisted to state. - // Without this, state.messages only contains response outputs and tool - // results — prior user turns are lost when a new callModel resumes from - // persisted state, breaking conversation fidelity and cache_control - // prompt caching at user-message boundaries. - let freshItemsForState: models.BaseInputsUnion[] | undefined; - if (hasLoadedHistory && this.currentState) { // `currentState.messages` is InputsUnion — keep it as that union so // appendToMessages (which expects InputsUnion) accepts it directly. @@ -1535,7 +1539,7 @@ export class ModelResult< ? await this.applyHooksToFreshItems(freshItems, historicalMessages, initialContext) : undefined; - freshItemsForState = hookedFresh; + this.pendingFreshItems = hookedFresh; baseRequest = { ...baseRequest, @@ -1558,23 +1562,7 @@ export class ModelResult< ...baseRequest, input: hookedInput, }; - // All input is fresh when there's no loaded history — normalize to - // array form for state persistence. - freshItemsForState = normalizeInputToArray(hookedInput) as models.BaseInputsUnion[]; - } - - // Persist fresh user input items to state so they survive across - // callModel invocations. This ensures the resume path - // (state.messages + new input) reconstructs the full conversation. - if ( - this.stateAccessor && - this.currentState && - freshItemsForState && - freshItemsForState.length > 0 - ) { - await this.saveStateSafely({ - messages: appendToMessages(this.currentState.messages, freshItemsForState), - }); + this.pendingFreshItems = normalizeInputToArray(hookedInput) as models.BaseInputsUnion[]; } // Store resolved request with stream mode diff --git a/packages/agent/tests/unit/user-input-persistence.test.ts b/packages/agent/tests/unit/user-input-persistence.test.ts index 995b702..feb4de5 100644 --- a/packages/agent/tests/unit/user-input-persistence.test.ts +++ b/packages/agent/tests/unit/user-input-persistence.test.ts @@ -329,4 +329,60 @@ describe('User Input Persistence to State', () => { expect(userItems[0]!.content).toBe('Question 1'); expect(userItems[1]!.content).toBe('Question 2'); }); + + it('does not duplicate user input in state when retrying after API failure', async () => { + // First attempt: API call fails + mockBetaResponsesSend.mockResolvedValueOnce({ + ok: false, + error: new Error('API failure'), + }); + + const result1 = callModel(client, { + model: 'test-model', + input: [ + { + role: 'user', + content: 'Hello', + }, + ], + state: stateAccessor, + }); + + await expect(result1.getText()).rejects.toThrow(); + + // State should NOT contain the user message after a failed API call + const messagesAfterFailure = storedState?.messages as + | Array<{ role?: string }> + | undefined; + const userItemsAfterFailure = messagesAfterFailure?.filter((m) => m.role === 'user') ?? []; + expect(userItemsAfterFailure.length).toBe(0); + + // Retry: same input, API succeeds + mockBetaResponsesSend.mockResolvedValueOnce({ + ok: true, + value: textResponse('Hello back!'), + }); + + const result2 = callModel(client, { + model: 'test-model', + input: [ + { + role: 'user', + content: 'Hello', + }, + ], + state: stateAccessor, + }); + + await result2.getText(); + + // Should contain exactly one user message, not two + const messages = storedState!.messages as Array<{ + role?: string; + content?: string; + }>; + const userItems = messages.filter((m) => m.role === 'user'); + expect(userItems.length).toBe(1); + expect(userItems[0]!.content).toBe('Hello'); + }); }); From 996cbc976be7fe1f52de4d071f488bc18ca3b256 Mon Sep 17 00:00:00 2001 From: Ben Heidorn <301326+Cybourgeoisie@users.noreply.github.com> Date: Mon, 1 Jun 2026 15:32:53 -0400 Subject: [PATCH 4/5] style: fix biome formatting Co-Authored-By: Claude Opus 4 (1M context) --- packages/agent/src/lib/model-result.ts | 5 +---- packages/agent/tests/unit/user-input-persistence.test.ts | 4 +++- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/packages/agent/src/lib/model-result.ts b/packages/agent/src/lib/model-result.ts index de2caf5..94cc89b 100644 --- a/packages/agent/src/lib/model-result.ts +++ b/packages/agent/src/lib/model-result.ts @@ -504,10 +504,7 @@ export class ModelResult< } await this.saveStateSafely({ - messages: appendToMessages( - messages, - outputItems as models.BaseInputsUnion[], - ), + messages: appendToMessages(messages, outputItems as models.BaseInputsUnion[]), previousResponseId: response.id, }); } diff --git a/packages/agent/tests/unit/user-input-persistence.test.ts b/packages/agent/tests/unit/user-input-persistence.test.ts index feb4de5..db14532 100644 --- a/packages/agent/tests/unit/user-input-persistence.test.ts +++ b/packages/agent/tests/unit/user-input-persistence.test.ts @@ -352,7 +352,9 @@ describe('User Input Persistence to State', () => { // State should NOT contain the user message after a failed API call const messagesAfterFailure = storedState?.messages as - | Array<{ role?: string }> + | Array<{ + role?: string; + }> | undefined; const userItemsAfterFailure = messagesAfterFailure?.filter((m) => m.role === 'user') ?? []; expect(userItemsAfterFailure.length).toBe(0); From c930772aef57e3620c85c1ba0828adda989b839f Mon Sep 17 00:00:00 2001 From: Ben Heidorn <301326+Cybourgeoisie@users.noreply.github.com> Date: Mon, 1 Jun 2026 17:04:07 -0400 Subject: [PATCH 5/5] chore: update @openrouter/sdk lockfile to 0.12.79, rename OutputInputImage to OutputImage Co-Authored-By: Claude Opus 4 (1M context) --- packages/agent/src/index.ts | 2 +- pnpm-lock.yaml | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/agent/src/index.ts b/packages/agent/src/index.ts index 50abf3d..cedab0b 100644 --- a/packages/agent/src/index.ts +++ b/packages/agent/src/index.ts @@ -44,8 +44,8 @@ export type { // Output item types (StreamableOutputItem members) OutputFileSearchCallItem, OutputFunctionCallItem, + OutputImage, OutputImageGenerationCallItem, - OutputInputImage, OutputItems, OutputMessage, OutputReasoningItem, diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 4963f56..014ea29 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -40,7 +40,7 @@ importers: dependencies: '@openrouter/sdk': specifier: ^0.12.12 - version: 0.12.12 + version: 0.12.79 zod: specifier: ^4.0.0 version: 4.3.6 @@ -351,8 +351,8 @@ packages: resolution: {integrity: sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg==} engines: {node: '>= 8'} - '@openrouter/sdk@0.12.12': - resolution: {integrity: sha512-/4FUsYEW82sff6/QtuY7NrIe2SLya/jv8HCk4VCYFXpHvr18P2osOn4iTwTqkqu4SJdrFtj9VTRKoBIg61u9dQ==} + '@openrouter/sdk@0.12.79': + resolution: {integrity: sha512-0ZpwtnuHh3/B1piW9kHCUIQy6PAsaK/vjFdZuHxmCdAenCyUNsLA2mFpmfHNWRNb+bOO3yBc4IALa264UyzmBA==} '@rollup/rollup-android-arm-eabi@4.60.1': resolution: {integrity: sha512-d6FinEBLdIiK+1uACUttJKfgZREXrF0Qc2SmLII7W2AD8FfiZ9Wjd+rD/iRuf5s5dWrr1GgwXCvPqOuDquOowA==} @@ -1407,7 +1407,7 @@ snapshots: '@nodelib/fs.scandir': 2.1.5 fastq: 1.20.1 - '@openrouter/sdk@0.12.12': + '@openrouter/sdk@0.12.79': dependencies: zod: 4.3.6