diff --git a/packages/agent/src/index.ts b/packages/agent/src/index.ts index 50abf3d..cedab0b 100644 --- a/packages/agent/src/index.ts +++ b/packages/agent/src/index.ts @@ -44,8 +44,8 @@ export type { // Output item types (StreamableOutputItem members) OutputFileSearchCallItem, OutputFunctionCallItem, + OutputImage, OutputImageGenerationCallItem, - OutputInputImage, OutputItems, OutputMessage, OutputReasoningItem, diff --git a/packages/agent/src/lib/model-result.ts b/packages/agent/src/lib/model-result.ts index f1087b5..94cc89b 100644 --- a/packages/agent/src/lib/model-result.ts +++ b/packages/agent/src/lib/model-result.ts @@ -78,6 +78,7 @@ import { isServerTool, isToolCallOutputEvent, } from './tool-types.js'; +import { normalizeInputToArray } from './turn-context.js'; /** * Typeguard for plain-object records (non-null, non-array). @@ -231,6 +232,7 @@ export class ModelResult< private approvedToolCalls: string[] = []; private rejectedToolCalls: string[] = []; private isResumingFromApproval = false; + private pendingFreshItems: models.BaseInputsUnion[] | undefined; // Unified turn broadcaster for multi-turn streaming private turnBroadcaster: ToolEventBroadcaster< @@ -491,11 +493,18 @@ export class ModelResult< response.output, ]; + // Persist any fresh user input items that were collected during + // initStream, followed by the response output. Deferring the user-input + // persist to here (rather than before the API call) avoids duplicating + // user turns in state when a caller retries after an API failure. + let messages = this.currentState.messages; + if (this.pendingFreshItems && this.pendingFreshItems.length > 0) { + messages = appendToMessages(messages, this.pendingFreshItems); + this.pendingFreshItems = undefined; + } + await this.saveStateSafely({ - messages: appendToMessages( - this.currentState.messages, - outputItems as models.BaseInputsUnion[], - ), + messages: appendToMessages(messages, outputItems as models.BaseInputsUnion[]), previousResponseId: response.id, }); } @@ -1527,6 +1536,8 @@ export class ModelResult< ? await this.applyHooksToFreshItems(freshItems, historicalMessages, initialContext) : undefined; + this.pendingFreshItems = hookedFresh; + baseRequest = { ...baseRequest, input: hookedFresh @@ -1548,6 +1559,7 @@ export class ModelResult< ...baseRequest, input: hookedInput, }; + this.pendingFreshItems = normalizeInputToArray(hookedInput) as models.BaseInputsUnion[]; } // Store resolved request with stream mode diff --git a/packages/agent/tests/unit/user-input-persistence.test.ts b/packages/agent/tests/unit/user-input-persistence.test.ts new file mode 100644 index 0000000..db14532 --- /dev/null +++ b/packages/agent/tests/unit/user-input-persistence.test.ts @@ -0,0 +1,390 @@ +import type { OpenRouterCore } from '@openrouter/sdk/core'; +import type * as models from '@openrouter/sdk/models'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import { z } from 'zod/v4'; +import type { ConversationState, StateAccessor } from '../../src/index.js'; +import { callModel } from '../../src/inner-loop/call-model.js'; +import { ToolType } from '../../src/lib/tool-types.js'; + +const mockBetaResponsesSend = vi.hoisted(() => vi.fn()); + +vi.mock('@openrouter/sdk/funcs/betaResponsesSend', () => ({ + betaResponsesSend: mockBetaResponsesSend, +})); + +function textResponse(text: string): models.OpenResponsesResult { + return { + id: 'resp_text', + object: 'response', + createdAt: 0, + model: 'test-model', + status: 'completed', + completedAt: 0, + output: [ + { + id: 'msg_1', + type: 'message', + role: 'assistant', + status: 'completed', + content: [ + { + type: 'output_text', + text, + annotations: [], + }, + ], + }, + ], + error: null, + incompleteDetails: null, + temperature: null, + topP: null, + presencePenalty: null, + frequencyPenalty: null, + metadata: null, + instructions: null, + tools: [], + toolChoice: 'auto', + parallelToolCalls: false, + } as models.OpenResponsesResult; +} + +function toolCallResponse(callId: string, name: string, args: string): models.OpenResponsesResult { + return { + id: `resp_tc_${callId}`, + object: 'response', + createdAt: 0, + model: 'test-model', + status: 'completed', + completedAt: 0, + output: [ + { + type: 'function_call', + id: `fc_${callId}`, + callId, + name, + arguments: args, + status: 'completed', + }, + ], + error: null, + incompleteDetails: null, + temperature: null, + topP: null, + presencePenalty: null, + frequencyPenalty: null, + metadata: null, + instructions: null, + tools: [], + toolChoice: 'auto', + parallelToolCalls: false, + } as models.OpenResponsesResult; +} + +const echoTool = { + type: ToolType.Function, + function: { + name: 'echo', + description: 'Echo input', + inputSchema: z.object({ + message: z.string(), + }), + execute: async (params: { message: string }) => ({ + echoed: params.message, + }), + }, +} as const; + +const client = {} as OpenRouterCore; + +describe('User Input Persistence to State', () => { + let storedState: ConversationState | null; + let stateAccessor: StateAccessor; + + beforeEach(() => { + mockBetaResponsesSend.mockReset(); + storedState = null; + stateAccessor = { + load: async () => storedState, + save: async (state) => { + storedState = state; + }, + }; + }); + + it('persists user input items to state.messages after first callModel (no tools)', async () => { + mockBetaResponsesSend.mockResolvedValueOnce({ + ok: true, + value: textResponse('Hello back!'), + }); + + const result = callModel(client, { + model: 'test-model', + input: [ + { + role: 'user', + content: 'Hello', + }, + ], + state: stateAccessor, + }); + + await result.getText(); + + // state.messages should contain the user input AND the response output + expect(storedState).not.toBeNull(); + const messages = storedState!.messages as unknown[]; + expect(Array.isArray(messages)).toBe(true); + + const userItems = ( + messages as Array<{ + role?: string; + }> + ).filter((m) => m.role === 'user'); + expect(userItems.length).toBe(1); + expect( + ( + userItems[0] as { + content: string; + } + ).content, + ).toBe('Hello'); + }); + + it('persists string input normalized as user message', async () => { + mockBetaResponsesSend.mockResolvedValueOnce({ + ok: true, + value: textResponse('Response'), + }); + + const result = callModel(client, { + model: 'test-model', + input: 'Hello string input', + state: stateAccessor, + }); + + await result.getText(); + + expect(storedState).not.toBeNull(); + const messages = storedState!.messages as Array<{ + role?: string; + content?: string; + }>; + expect(Array.isArray(messages)).toBe(true); + + const userItems = messages.filter((m) => m.role === 'user'); + expect(userItems.length).toBe(1); + expect(userItems[0]!.content).toBe('Hello string input'); + }); + + it('persists user input alongside tool results after tool execution', async () => { + // First call: tool call, second call: text response after tool output + mockBetaResponsesSend + .mockResolvedValueOnce({ + ok: true, + value: toolCallResponse('call_1', 'echo', '{"message":"hi"}'), + }) + .mockResolvedValueOnce({ + ok: true, + value: textResponse('Done echoing.'), + }); + + const result = callModel(client, { + model: 'test-model', + input: [ + { + role: 'user', + content: 'Echo hi', + }, + ], + tools: [ + echoTool, + ] as const, + state: stateAccessor, + }); + + await result.getText(); + + expect(storedState).not.toBeNull(); + const messages = storedState!.messages as Array<{ + role?: string; + type?: string; + }>; + expect(Array.isArray(messages)).toBe(true); + + // Should contain: user input, function_call, function_call_output, message (response) + const userItems = messages.filter((m) => m.role === 'user'); + const fnCalls = messages.filter((m) => m.type === 'function_call'); + const fnOutputs = messages.filter((m) => m.type === 'function_call_output'); + + expect(userItems.length).toBe(1); + expect(fnCalls.length).toBe(1); + expect(fnOutputs.length).toBe(1); + }); + + it('second callModel sees prior user input in state on resume', async () => { + // --- First callModel --- + mockBetaResponsesSend.mockResolvedValueOnce({ + ok: true, + value: textResponse('First response'), + }); + + const result1 = callModel(client, { + model: 'test-model', + input: [ + { + role: 'user', + content: 'First message', + }, + ], + state: stateAccessor, + }); + await result1.getText(); + + // Verify first user message is in state + const messagesAfterFirst = storedState!.messages as Array<{ + role?: string; + }>; + const firstUserItems = messagesAfterFirst.filter((m) => m.role === 'user'); + expect(firstUserItems.length).toBe(1); + + // --- Second callModel (resumes from state) --- + mockBetaResponsesSend.mockResolvedValueOnce({ + ok: true, + value: textResponse('Second response'), + }); + + const result2 = callModel(client, { + model: 'test-model', + input: [ + { + role: 'user', + content: 'Second message', + }, + ], + state: stateAccessor, + }); + await result2.getText(); + + // The API request for the second call should contain BOTH user messages + const secondCallRequest = mockBetaResponsesSend.mock.calls[1]?.[1]?.responsesRequest; + expect(secondCallRequest).toBeDefined(); + + const input = secondCallRequest.input as Array<{ + role?: string; + content?: string; + }>; + expect(Array.isArray(input)).toBe(true); + + const userMessages = input.filter((i) => i.role === 'user'); + expect(userMessages.length).toBe(2); + expect(userMessages[0]!.content).toBe('First message'); + expect(userMessages[1]!.content).toBe('Second message'); + }); + + it('state.messages contains user input from both calls after two callModel invocations', async () => { + // --- First callModel --- + mockBetaResponsesSend.mockResolvedValueOnce({ + ok: true, + value: textResponse('Reply 1'), + }); + + const result1 = callModel(client, { + model: 'test-model', + input: [ + { + role: 'user', + content: 'Question 1', + }, + ], + state: stateAccessor, + }); + await result1.getText(); + + // --- Second callModel --- + mockBetaResponsesSend.mockResolvedValueOnce({ + ok: true, + value: textResponse('Reply 2'), + }); + + const result2 = callModel(client, { + model: 'test-model', + input: [ + { + role: 'user', + content: 'Question 2', + }, + ], + state: stateAccessor, + }); + await result2.getText(); + + // Final state should have both user messages + const messages = storedState!.messages as Array<{ + role?: string; + content?: string; + }>; + const userItems = messages.filter((m) => m.role === 'user'); + expect(userItems.length).toBe(2); + expect(userItems[0]!.content).toBe('Question 1'); + expect(userItems[1]!.content).toBe('Question 2'); + }); + + it('does not duplicate user input in state when retrying after API failure', async () => { + // First attempt: API call fails + mockBetaResponsesSend.mockResolvedValueOnce({ + ok: false, + error: new Error('API failure'), + }); + + const result1 = callModel(client, { + model: 'test-model', + input: [ + { + role: 'user', + content: 'Hello', + }, + ], + state: stateAccessor, + }); + + await expect(result1.getText()).rejects.toThrow(); + + // State should NOT contain the user message after a failed API call + const messagesAfterFailure = storedState?.messages as + | Array<{ + role?: string; + }> + | undefined; + const userItemsAfterFailure = messagesAfterFailure?.filter((m) => m.role === 'user') ?? []; + expect(userItemsAfterFailure.length).toBe(0); + + // Retry: same input, API succeeds + mockBetaResponsesSend.mockResolvedValueOnce({ + ok: true, + value: textResponse('Hello back!'), + }); + + const result2 = callModel(client, { + model: 'test-model', + input: [ + { + role: 'user', + content: 'Hello', + }, + ], + state: stateAccessor, + }); + + await result2.getText(); + + // Should contain exactly one user message, not two + const messages = storedState!.messages as Array<{ + role?: string; + content?: string; + }>; + const userItems = messages.filter((m) => m.role === 'user'); + expect(userItems.length).toBe(1); + expect(userItems[0]!.content).toBe('Hello'); + }); +}); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 4963f56..014ea29 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -40,7 +40,7 @@ importers: dependencies: '@openrouter/sdk': specifier: ^0.12.12 - version: 0.12.12 + version: 0.12.79 zod: specifier: ^4.0.0 version: 4.3.6 @@ -351,8 +351,8 @@ packages: resolution: {integrity: sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg==} engines: {node: '>= 8'} - '@openrouter/sdk@0.12.12': - resolution: {integrity: sha512-/4FUsYEW82sff6/QtuY7NrIe2SLya/jv8HCk4VCYFXpHvr18P2osOn4iTwTqkqu4SJdrFtj9VTRKoBIg61u9dQ==} + '@openrouter/sdk@0.12.79': + resolution: {integrity: sha512-0ZpwtnuHh3/B1piW9kHCUIQy6PAsaK/vjFdZuHxmCdAenCyUNsLA2mFpmfHNWRNb+bOO3yBc4IALa264UyzmBA==} '@rollup/rollup-android-arm-eabi@4.60.1': resolution: {integrity: sha512-d6FinEBLdIiK+1uACUttJKfgZREXrF0Qc2SmLII7W2AD8FfiZ9Wjd+rD/iRuf5s5dWrr1GgwXCvPqOuDquOowA==} @@ -1407,7 +1407,7 @@ snapshots: '@nodelib/fs.scandir': 2.1.5 fastq: 1.20.1 - '@openrouter/sdk@0.12.12': + '@openrouter/sdk@0.12.79': dependencies: zod: 4.3.6