From 47a194844c2186dcaed33c6e2950a404f96ea6b0 Mon Sep 17 00:00:00 2001 From: Pranav Sharan Date: Sun, 8 Feb 2026 00:46:21 -0800 Subject: [PATCH 1/6] fix anthropic tool call bug --- .../__tests__/tool-validation-error.test.ts | 128 ++++++++++++++++++ .../agent-runtime/src/tool-stream-parser.ts | 30 ++-- .../agent-runtime/src/tools/stream-parser.ts | 34 +++-- .../agent-runtime/src/tools/tool-executor.ts | 12 +- ...vert-to-openai-compatible-chat-messages.ts | 20 +++ sdk/src/impl/llm.ts | 25 +++- 6 files changed, 223 insertions(+), 26 deletions(-) diff --git a/packages/agent-runtime/src/__tests__/tool-validation-error.test.ts b/packages/agent-runtime/src/__tests__/tool-validation-error.test.ts index d9ea5d89f3..df9c1997d8 100644 --- a/packages/agent-runtime/src/__tests__/tool-validation-error.test.ts +++ b/packages/agent-runtime/src/__tests__/tool-validation-error.test.ts @@ -1,6 +1,7 @@ import { TEST_AGENT_RUNTIME_IMPL } from '@codebuff/common/testing/impl/agent-runtime' import { getInitialSessionState } from '@codebuff/common/types/session-state' import { promptSuccess } from '@codebuff/common/util/error' +import { jsonToolResult } from '@codebuff/common/util/messages' import { beforeEach, describe, expect, it } from 'bun:test' import { mockFileContext } from './test-utils' @@ -12,6 +13,10 @@ import type { AgentRuntimeScopedDeps, } from '@codebuff/common/types/contracts/agent-runtime' import type { StreamChunk } from '@codebuff/common/types/contracts/llm' +import type { + AssistantMessage, + ToolMessage, +} from '@codebuff/common/types/messages/codebuff-message' import type { PrintModeEvent } from '@codebuff/common/types/print-mode' describe('tool validation error handling', () => { @@ -225,4 +230,127 @@ describe('tool validation error handling', () => { ) expect(errorEvents.length).toBe(0) }) + + it('should preserve tool_call/tool_result ordering when custom tool setup is async', async () => { + const toolName = 'delayed_custom_tool' + const agentWithCustomTool: AgentTemplate = { + ...testAgentTemplate, + toolNames: [toolName, 'end_turn'], + } + + const delayedToolCallChunk: StreamChunk = { + type: 'tool-call', + toolName, + toolCallId: 'delayed-custom-tool-call-id', + input: { + query: 'test', + }, + } + + async function* mockStream() { + yield delayedToolCallChunk + return promptSuccess('mock-message-id') + } + + const fileContextWithCustomTool = { + ...mockFileContext, + customToolDefinitions: { + [toolName]: { + inputSchema: { + type: 'object', + properties: { + query: { type: 'string' }, + }, + required: ['query'], + additionalProperties: false, + }, + endsAgentStep: false, + description: 'A delayed custom tool for ordering tests', + }, + }, + } + + const sessionState = getInitialSessionState(fileContextWithCustomTool) + const agentState = sessionState.mainAgentState + + agentRuntimeImpl.requestMcpToolData = async () => { + // Force an async gap so tool_call emission happens after stream completion. + await new Promise((resolve) => setTimeout(resolve, 20)) + return [] + } + agentRuntimeImpl.requestToolCall = async () => ({ + output: jsonToolResult({ ok: true }), + }) + + await processStream({ + ...agentRuntimeImpl, + agentContext: {}, + agentState, + agentStepId: 'test-step-id', + agentTemplate: agentWithCustomTool, + ancestorRunIds: [], + clientSessionId: 'test-session', + fileContext: fileContextWithCustomTool, + fingerprintId: 'test-fingerprint', + fullResponse: '', + localAgentTemplates: { 'test-agent': agentWithCustomTool }, + messages: [], + prompt: 'test prompt', + repoId: undefined, + repoUrl: undefined, + runId: 'test-run-id', + signal: new AbortController().signal, + stream: mockStream(), + system: 'test system', + tools: {}, + userId: 'test-user', + userInputId: 'test-input-id', + onCostCalculated: async () => {}, + onResponseChunk: () => {}, + }) + + const assistantToolCallMessages = agentState.messageHistory.filter( + (m): m is AssistantMessage => + m.role === 'assistant' && + m.content.some((c) => c.type === 'tool-call' && c.toolName === toolName), + ) + const toolMessages = agentState.messageHistory.filter( + (m): m is ToolMessage => m.role === 'tool' && m.toolName === toolName, + ) + + expect(assistantToolCallMessages.length).toBe(1) + expect(toolMessages.length).toBe(1) + + const assistantToolCallPart = assistantToolCallMessages[0].content.find( + ( + c, + ): c is Extract => + c.type === 'tool-call' && c.toolName === toolName, + ) + expect(assistantToolCallPart).toBeDefined() + expect(toolMessages[0].toolCallId).toBe(assistantToolCallPart!.toolCallId) + + const assistantIndex = agentState.messageHistory.indexOf( + assistantToolCallMessages[0], + ) + const toolResultIndex = agentState.messageHistory.indexOf(toolMessages[0]) + expect(assistantIndex).toBeGreaterThanOrEqual(0) + expect(toolResultIndex).toBeGreaterThan(assistantIndex) + + const assistantToolCallIds = new Set( + agentState.messageHistory.flatMap((message) => { + if (message.role !== 'assistant') { + return [] + } + return message.content.flatMap((part) => + part.type === 'tool-call' ? [part.toolCallId] : [], + ) + }), + ) + const orphanToolResults = agentState.messageHistory.filter( + (message): message is ToolMessage => + message.role === 'tool' && !assistantToolCallIds.has(message.toolCallId), + ) + expect(orphanToolResults.length).toBe(0) + }) }) diff --git a/packages/agent-runtime/src/tool-stream-parser.ts b/packages/agent-runtime/src/tool-stream-parser.ts index 7beea54852..44f2168f08 100644 --- a/packages/agent-runtime/src/tool-stream-parser.ts +++ b/packages/agent-runtime/src/tool-stream-parser.ts @@ -21,13 +21,25 @@ export async function* processStreamWithTools(params: { processors: Record< string, { - onTagStart: (tagName: string, attributes: Record) => void - onTagEnd: (tagName: string, params: Record) => void + onTagStart: ( + tagName: string, + attributes: Record, + ) => void | Promise + onTagEnd: ( + tagName: string, + params: Record, + ) => void | Promise } > defaultProcessor: (toolName: string) => { - onTagStart: (tagName: string, attributes: Record) => void - onTagEnd: (tagName: string, params: Record) => void + onTagStart: ( + tagName: string, + attributes: Record, + ) => void | Promise + onTagEnd: ( + tagName: string, + params: Record, + ) => void | Promise } onError: (tagName: string, errorMessage: string) => void onResponseChunk: (chunk: PrintModeText | PrintModeError) => void @@ -62,11 +74,11 @@ export async function* processStreamWithTools(params: { // State for parsing XML tool calls from text stream const xmlParserState: StreamParserState = createStreamParserState() - function processToolCallObject(params: { + async function processToolCallObject(params: { toolName: string input: any contents?: string - }): void { + }): Promise { const { toolName, input, contents } = params const processor = processors[toolName] ?? defaultProcessor(toolName) @@ -85,8 +97,8 @@ export async function* processStreamWithTools(params: { logger, }) - processor.onTagStart(toolName, {}) - processor.onTagEnd(toolName, input) + await processor.onTagStart(toolName, {}) + await processor.onTagEnd(toolName, input) } function flush() { @@ -146,7 +158,7 @@ export async function* processStreamWithTools(params: { } if (chunk.type === 'tool-call') { - processToolCallObject(chunk) + await processToolCallObject(chunk) } yield chunk diff --git a/packages/agent-runtime/src/tools/stream-parser.ts b/packages/agent-runtime/src/tools/stream-parser.ts index ac23372a6d..e54377944f 100644 --- a/packages/agent-runtime/src/tools/stream-parser.ts +++ b/packages/agent-runtime/src/tools/stream-parser.ts @@ -84,6 +84,10 @@ export async function processStream( userId, } = params const fullResponseChunks: string[] = [fullResponse] + const messageHistoryBeforeStream = expireMessages( + agentState.messageHistory, + 'agentStep', + ) // === MUTABLE STATE === const toolResults: ToolMessage[] = [] @@ -111,9 +115,11 @@ export async function processStream( return (chunk: string | PrintModeEvent) => { if (typeof chunk !== 'string') { if (chunk.type === 'tool_call') { - assistantMessages.push( - assistantMessage({ ...chunk, type: 'tool-call' }), - ) + if (chunk.includeToolCall !== false) { + assistantMessages.push( + assistantMessage({ ...chunk, type: 'tool-call' }), + ) + } } else if (isXmlMode && chunk.type === 'tool_result') { const toolResultMessage: ToolMessage = { role: 'tool', @@ -182,7 +188,7 @@ export async function processStream( : (toolName as ToolName), input: transformed ? transformed.input : input, fromHandleSteps: false, - skipDirectResultPush: isXmlMode, + skipDirectResultPush: true, fileProcessingState, fullResponse: fullResponseChunks.join(''), previousToolCallFinished: previousPromise, @@ -199,7 +205,7 @@ export async function processStream( ...params, toolName, input, - skipDirectResultPush: isXmlMode, + skipDirectResultPush: true, fileProcessingState, fullResponse: fullResponseChunks.join(''), previousToolCallFinished: previousPromise, @@ -327,20 +333,20 @@ export async function processStream( } } - // === FINALIZATION === - agentState.messageHistory = buildArray([ - ...expireMessages(agentState.messageHistory, 'agentStep'), - ...assistantMessages, - ...toolResultsToAddAfterStream, - ]) - if (!signal.aborted) { resolveStreamDonePromise() await previousToolCallFinished } - // Error messages must come AFTER tool results for proper API ordering - agentState.messageHistory.push(...errorMessages) + // === FINALIZATION === + // Build message history from the pre-stream snapshot so tool_calls and + // tool_results are always appended in deterministic order. + agentState.messageHistory = buildArray([ + ...messageHistoryBeforeStream, + ...assistantMessages, + ...toolResultsToAddAfterStream, + ...errorMessages, + ]) return { fullResponse: fullResponseChunks.join(''), diff --git a/packages/agent-runtime/src/tools/tool-executor.ts b/packages/agent-runtime/src/tools/tool-executor.ts index 02841f5b96..31e1bdb226 100644 --- a/packages/agent-runtime/src/tools/tool-executor.ts +++ b/packages/agent-runtime/src/tools/tool-executor.ts @@ -146,7 +146,7 @@ export async function executeToolCall( previousToolCallFinished, toolCalls, toolResults, - toolResultsToAddAfterStream: _toolResultsToAddAfterStream, + toolResultsToAddAfterStream, userInputId, onCostCalculated, @@ -350,6 +350,10 @@ export async function executeToolCall( toolResults.push(toolResult) + if (!excludeToolFromMessageHistory) { + toolResultsToAddAfterStream.push(toolResult) + } + if (!excludeToolFromMessageHistory && !params.skipDirectResultPush) { agentState.messageHistory.push(toolResult) } @@ -450,7 +454,7 @@ export async function executeCustomToolCall( toolCallId, toolCalls, toolResults, - toolResultsToAddAfterStream: _toolResultsToAddAfterStream, + toolResultsToAddAfterStream, userInputId, } = params const toolCall: CustomToolCall | ToolCallError = parseRawCustomToolCall({ @@ -560,6 +564,10 @@ export async function executeCustomToolCall( toolResults.push(toolResult) + if (!excludeToolFromMessageHistory) { + toolResultsToAddAfterStream.push(toolResult) + } + if (!excludeToolFromMessageHistory && !params.skipDirectResultPush) { agentState.messageHistory.push(toolResult) } diff --git a/packages/internal/src/openai-compatible/chat/convert-to-openai-compatible-chat-messages.ts b/packages/internal/src/openai-compatible/chat/convert-to-openai-compatible-chat-messages.ts index 30a27cf6c4..b649ab3218 100644 --- a/packages/internal/src/openai-compatible/chat/convert-to-openai-compatible-chat-messages.ts +++ b/packages/internal/src/openai-compatible/chat/convert-to-openai-compatible-chat-messages.ts @@ -138,5 +138,25 @@ export function convertToOpenAICompatibleChatMessages( } } + // Debug: dump OpenAI-format message summary to catch tool_use_id mismatches + console.error('[SDK DEBUG] OpenAI-format messages (' + messages.length + '):') + for (let i = 0; i < messages.length; i++) { + const m = messages[i] as Record + const role = m.role as string + if (role === 'tool') { + console.error(` [${i}] tool tool_call_id=${(m as { tool_call_id?: string }).tool_call_id}`) + } else if (role === 'assistant') { + const toolCalls = (m as { tool_calls?: Array<{ id: string; function?: { name: string } }> }).tool_calls + if (toolCalls?.length) { + const ids = toolCalls.map(tc => `${tc.function?.name}:${tc.id}`) + console.error(` [${i}] assistant tool_calls=[${ids.join(', ')}]`) + } else { + console.error(` [${i}] assistant (text)`) + } + } else { + console.error(` [${i}] ${role}`) + } + } + return messages } diff --git a/sdk/src/impl/llm.ts b/sdk/src/impl/llm.ts index d11ed2c7d0..3b0dc44a1d 100644 --- a/sdk/src/impl/llm.ts +++ b/sdk/src/impl/llm.ts @@ -225,11 +225,34 @@ export async function* promptAiSdkStream( } } + const convertedMessages = convertCbToModelMessages(params) + + // Debug: dump message summary to catch tool_use_id mismatches before they hit the API + console.error('[SDK DEBUG] promptAiSdkStream messages (' + convertedMessages.length + '):') + for (let i = 0; i < convertedMessages.length; i++) { + const m = convertedMessages[i] as Record + const role = m.role as string + const content = m.content + if (role === 'tool' && Array.isArray(content)) { + const toolIds = (content as Array<{ toolCallId?: string; type?: string }>) + .filter(c => c.type === 'tool-result') + .map(c => c.toolCallId) + console.error(` [${i}] ${role} toolCallIds=${JSON.stringify(toolIds)}`) + } else if (role === 'assistant' && Array.isArray(content)) { + const parts = (content as Array<{ type?: string; toolCallId?: string; toolName?: string }>) + .map(c => c.type === 'tool-call' ? `tool-call(${c.toolName}:${c.toolCallId})` : c.type) + console.error(` [${i}] ${role} parts=[${parts.join(', ')}]`) + } else { + const tags = (m as { tags?: string[] }).tags + console.error(` [${i}] ${role}${tags ? ' tags=' + JSON.stringify(tags) : ''}`) + } + } + const response = streamText({ ...params, prompt: undefined, model: aiSDKModel, - messages: convertCbToModelMessages(params), + messages: convertedMessages, // When using Claude OAuth, disable retries so we can immediately fall back to Codebuff // backend on rate limit errors instead of retrying 4 times first ...(isClaudeOAuth && { maxRetries: 0 }), From daf3d570ad5867bb89308cf6267460b2dccdb1d4 Mon Sep 17 00:00:00 2001 From: Pranav Sharan Date: Sun, 8 Feb 2026 15:16:12 -0800 Subject: [PATCH 2/6] pr comments --- .../src/run-programmatic-step.ts | 6 ++--- .../agent-runtime/src/tools/stream-parser.ts | 25 ++++++++----------- .../agent-runtime/src/tools/tool-executor.ts | 20 +++++---------- ...vert-to-openai-compatible-chat-messages.ts | 20 --------------- sdk/src/impl/llm.ts | 25 +------------------ 5 files changed, 21 insertions(+), 75 deletions(-) diff --git a/packages/agent-runtime/src/run-programmatic-step.ts b/packages/agent-runtime/src/run-programmatic-step.ts index 07cc27a068..82a6f4f55f 100644 --- a/packages/agent-runtime/src/run-programmatic-step.ts +++ b/packages/agent-runtime/src/run-programmatic-step.ts @@ -82,7 +82,7 @@ export async function runProgrammaticStep( | 'toolCallId' | 'toolCalls' | 'toolResults' - | 'toolResultsToAddAfterStream' + | 'toolResultsToAddToMessageHistory' > & ParamsExcluding< AddAgentStepFn, @@ -428,7 +428,7 @@ type ExecuteToolCallsArrayParams = Omit< | 'autoInsertEndStepParam' | 'excludeToolFromMessageHistory' | 'toolCallId' - | 'toolResultsToAddAfterStream' + | 'toolResultsToAddToMessageHistory' > & { agentState: AgentState onResponseChunk: (chunk: string | PrintModeEvent) => void @@ -494,7 +494,7 @@ async function executeSingleToolCall( excludeToolFromMessageHistory, fromHandleSteps: true, toolCallId, - toolResultsToAddAfterStream: [], + toolResultsToAddToMessageHistory: [], onResponseChunk: (chunk: string | PrintModeEvent) => { if (typeof chunk === 'string') { diff --git a/packages/agent-runtime/src/tools/stream-parser.ts b/packages/agent-runtime/src/tools/stream-parser.ts index e54377944f..9d85d5491d 100644 --- a/packages/agent-runtime/src/tools/stream-parser.ts +++ b/packages/agent-runtime/src/tools/stream-parser.ts @@ -14,7 +14,7 @@ import { executeToolCall, tryTransformAgentToolCall, } from './tool-executor' -import { expireMessages, withSystemTags } from '../util/messages' +import { withSystemTags } from '../util/messages' import type { CustomToolCall, ExecuteToolCallParams } from './tool-executor' import type { AgentTemplate } from '../templates/types' @@ -60,7 +60,7 @@ export async function processStream( | 'toolCalls' | 'toolName' | 'toolResults' - | 'toolResultsToAddAfterStream' + | 'toolResultsToAddToMessageHistory' > & ParamsExcluding< typeof processStreamWithTools, @@ -84,14 +84,11 @@ export async function processStream( userId, } = params const fullResponseChunks: string[] = [fullResponse] - const messageHistoryBeforeStream = expireMessages( - agentState.messageHistory, - 'agentStep', - ) + const messageHistoryBeforeStream = [...agentState.messageHistory] // === MUTABLE STATE === const toolResults: ToolMessage[] = [] - const toolResultsToAddAfterStream: ToolMessage[] = [] + const toolResultsToAddToMessageHistory: ToolMessage[] = [] const toolCalls: (CodebuffToolCall | CustomToolCall)[] = [] const assistantMessages: Message[] = [] let hadToolCallError = false @@ -149,7 +146,7 @@ export async function processStream( // isXmlMode=false: defer execution, results added at end (for native tool calls) function createToolExecutionCallback(toolName: string, isXmlMode: boolean) { const responseHandler = createResponseHandler(isXmlMode) - const resultsArray = isXmlMode ? [] : toolResultsToAddAfterStream + const resultsArray = isXmlMode ? [] : toolResultsToAddToMessageHistory return { onTagStart: () => {}, @@ -188,14 +185,14 @@ export async function processStream( : (toolName as ToolName), input: transformed ? transformed.input : input, fromHandleSteps: false, - skipDirectResultPush: true, + fileProcessingState, fullResponse: fullResponseChunks.join(''), previousToolCallFinished: previousPromise, toolCallId, toolCalls, toolResults, - toolResultsToAddAfterStream: resultsArray, + toolResultsToAddToMessageHistory: resultsArray, onCostCalculated, onResponseChunk: responseHandler, }) @@ -205,14 +202,14 @@ export async function processStream( ...params, toolName, input, - skipDirectResultPush: true, + fileProcessingState, fullResponse: fullResponseChunks.join(''), previousToolCallFinished: previousPromise, toolCallId, toolCalls, toolResults, - toolResultsToAddAfterStream: resultsArray, + toolResultsToAddToMessageHistory: resultsArray, onResponseChunk: responseHandler, }) } @@ -250,7 +247,7 @@ export async function processStream( content: jsonToolResult({ errorMessage: error }), } toolResults.push(cloneDeep(toolResult)) - toolResultsToAddAfterStream.push(cloneDeep(toolResult)) + toolResultsToAddToMessageHistory.push(cloneDeep(toolResult)) }, loggerOptions: { userId, @@ -344,7 +341,7 @@ export async function processStream( agentState.messageHistory = buildArray([ ...messageHistoryBeforeStream, ...assistantMessages, - ...toolResultsToAddAfterStream, + ...toolResultsToAddToMessageHistory, ...errorMessages, ]) diff --git a/packages/agent-runtime/src/tools/tool-executor.ts b/packages/agent-runtime/src/tools/tool-executor.ts index 31e1bdb226..5ffbac34f3 100644 --- a/packages/agent-runtime/src/tools/tool-executor.ts +++ b/packages/agent-runtime/src/tools/tool-executor.ts @@ -120,8 +120,7 @@ export type ExecuteToolCallParams = { toolCallId: string | undefined toolCalls: (CodebuffToolCall | CustomToolCall)[] toolResults: ToolMessage[] - toolResultsToAddAfterStream: ToolMessage[] - skipDirectResultPush?: boolean + toolResultsToAddToMessageHistory: ToolMessage[] userId: string | undefined userInputId: string @@ -146,7 +145,7 @@ export async function executeToolCall( previousToolCallFinished, toolCalls, toolResults, - toolResultsToAddAfterStream, + toolResultsToAddToMessageHistory, userInputId, onCostCalculated, @@ -351,11 +350,7 @@ export async function executeToolCall( toolResults.push(toolResult) if (!excludeToolFromMessageHistory) { - toolResultsToAddAfterStream.push(toolResult) - } - - if (!excludeToolFromMessageHistory && !params.skipDirectResultPush) { - agentState.messageHistory.push(toolResult) + toolResultsToAddToMessageHistory.push(toolResult) } // After tool completes, resolve any pending creditsUsed promise @@ -454,7 +449,7 @@ export async function executeCustomToolCall( toolCallId, toolCalls, toolResults, - toolResultsToAddAfterStream, + toolResultsToAddToMessageHistory, userInputId, } = params const toolCall: CustomToolCall | ToolCallError = parseRawCustomToolCall({ @@ -565,13 +560,10 @@ export async function executeCustomToolCall( toolResults.push(toolResult) if (!excludeToolFromMessageHistory) { - toolResultsToAddAfterStream.push(toolResult) + toolResultsToAddToMessageHistory.push(toolResult) } - if (!excludeToolFromMessageHistory && !params.skipDirectResultPush) { - agentState.messageHistory.push(toolResult) - } - return + return }) } diff --git a/packages/internal/src/openai-compatible/chat/convert-to-openai-compatible-chat-messages.ts b/packages/internal/src/openai-compatible/chat/convert-to-openai-compatible-chat-messages.ts index b649ab3218..30a27cf6c4 100644 --- a/packages/internal/src/openai-compatible/chat/convert-to-openai-compatible-chat-messages.ts +++ b/packages/internal/src/openai-compatible/chat/convert-to-openai-compatible-chat-messages.ts @@ -138,25 +138,5 @@ export function convertToOpenAICompatibleChatMessages( } } - // Debug: dump OpenAI-format message summary to catch tool_use_id mismatches - console.error('[SDK DEBUG] OpenAI-format messages (' + messages.length + '):') - for (let i = 0; i < messages.length; i++) { - const m = messages[i] as Record - const role = m.role as string - if (role === 'tool') { - console.error(` [${i}] tool tool_call_id=${(m as { tool_call_id?: string }).tool_call_id}`) - } else if (role === 'assistant') { - const toolCalls = (m as { tool_calls?: Array<{ id: string; function?: { name: string } }> }).tool_calls - if (toolCalls?.length) { - const ids = toolCalls.map(tc => `${tc.function?.name}:${tc.id}`) - console.error(` [${i}] assistant tool_calls=[${ids.join(', ')}]`) - } else { - console.error(` [${i}] assistant (text)`) - } - } else { - console.error(` [${i}] ${role}`) - } - } - return messages } diff --git a/sdk/src/impl/llm.ts b/sdk/src/impl/llm.ts index 3b0dc44a1d..d11ed2c7d0 100644 --- a/sdk/src/impl/llm.ts +++ b/sdk/src/impl/llm.ts @@ -225,34 +225,11 @@ export async function* promptAiSdkStream( } } - const convertedMessages = convertCbToModelMessages(params) - - // Debug: dump message summary to catch tool_use_id mismatches before they hit the API - console.error('[SDK DEBUG] promptAiSdkStream messages (' + convertedMessages.length + '):') - for (let i = 0; i < convertedMessages.length; i++) { - const m = convertedMessages[i] as Record - const role = m.role as string - const content = m.content - if (role === 'tool' && Array.isArray(content)) { - const toolIds = (content as Array<{ toolCallId?: string; type?: string }>) - .filter(c => c.type === 'tool-result') - .map(c => c.toolCallId) - console.error(` [${i}] ${role} toolCallIds=${JSON.stringify(toolIds)}`) - } else if (role === 'assistant' && Array.isArray(content)) { - const parts = (content as Array<{ type?: string; toolCallId?: string; toolName?: string }>) - .map(c => c.type === 'tool-call' ? `tool-call(${c.toolName}:${c.toolCallId})` : c.type) - console.error(` [${i}] ${role} parts=[${parts.join(', ')}]`) - } else { - const tags = (m as { tags?: string[] }).tags - console.error(` [${i}] ${role}${tags ? ' tags=' + JSON.stringify(tags) : ''}`) - } - } - const response = streamText({ ...params, prompt: undefined, model: aiSDKModel, - messages: convertedMessages, + messages: convertCbToModelMessages(params), // When using Claude OAuth, disable retries so we can immediately fall back to Codebuff // backend on rate limit errors instead of retrying 4 times first ...(isClaudeOAuth && { maxRetries: 0 }), From 971c8e02250bcb422e3a0f63597dc13ff0e99a74 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sun, 8 Feb 2026 17:13:31 -0800 Subject: [PATCH 3/6] Fix bug: overwrite excludeToolFromMessageHistory to false in processStream --- .../agent-runtime/src/tool-stream-parser.ts | 2 -- .../agent-runtime/src/tools/stream-parser.ts | 25 ++++++------------- .../agent-runtime/src/tools/tool-executor.ts | 8 ++---- 3 files changed, 9 insertions(+), 26 deletions(-) diff --git a/packages/agent-runtime/src/tool-stream-parser.ts b/packages/agent-runtime/src/tool-stream-parser.ts index 44f2168f08..543a07f62b 100644 --- a/packages/agent-runtime/src/tool-stream-parser.ts +++ b/packages/agent-runtime/src/tool-stream-parser.ts @@ -41,7 +41,6 @@ export async function* processStreamWithTools(params: { params: Record, ) => void | Promise } - onError: (tagName: string, errorMessage: string) => void onResponseChunk: (chunk: PrintModeText | PrintModeError) => void logger: Logger loggerOptions?: { @@ -60,7 +59,6 @@ export async function* processStreamWithTools(params: { stream, processors, defaultProcessor, - onError: _onError, onResponseChunk, logger, loggerOptions, diff --git a/packages/agent-runtime/src/tools/stream-parser.ts b/packages/agent-runtime/src/tools/stream-parser.ts index 9d85d5491d..309ddffecd 100644 --- a/packages/agent-runtime/src/tools/stream-parser.ts +++ b/packages/agent-runtime/src/tools/stream-parser.ts @@ -1,12 +1,10 @@ import { toolNames } from '@codebuff/common/tools/constants' import { buildArray } from '@codebuff/common/util/array' import { - jsonToolResult, assistantMessage, userMessage, } from '@codebuff/common/util/messages' import { generateCompactId } from '@codebuff/common/util/string' -import { cloneDeep } from 'lodash' import { processStreamWithTools } from '../tool-stream-parser' import { @@ -66,7 +64,6 @@ export async function processStream( typeof processStreamWithTools, | 'processors' | 'defaultProcessor' - | 'onError' | 'loggerOptions' | 'executeXmlToolCall' >, @@ -149,7 +146,7 @@ export async function processStream( const resultsArray = isXmlMode ? [] : toolResultsToAddToMessageHistory return { - onTagStart: () => {}, + onTagStart: () => { }, onTagEnd: async (_: string, input: Record) => { if (signal.aborted) { return @@ -160,10 +157,10 @@ export async function processStream( // Check if this is an agent tool call that should be transformed to spawn_agents const transformed = !isNativeTool ? tryTransformAgentToolCall({ - toolName, - input, - spawnableAgents: agentTemplate.spawnableAgents, - }) + toolName, + input, + spawnableAgents: agentTemplate.spawnableAgents, + }) : null // Read previousToolCallFinished at execution time to ensure proper sequential chaining. @@ -193,6 +190,7 @@ export async function processStream( toolCalls, toolResults, toolResultsToAddToMessageHistory: resultsArray, + excludeToolFromMessageHistory: false, onCostCalculated, onResponseChunk: responseHandler, }) @@ -210,6 +208,7 @@ export async function processStream( toolCalls, toolResults, toolResultsToAddToMessageHistory: resultsArray, + excludeToolFromMessageHistory: false, onResponseChunk: responseHandler, }) } @@ -239,16 +238,6 @@ export async function processStream( ]), defaultProcessor: (name: string) => createToolExecutionCallback(name, false), - onError: (toolName, error) => { - const toolResult: ToolMessage = { - role: 'tool', - toolName, - toolCallId: generateCompactId(), - content: jsonToolResult({ errorMessage: error }), - } - toolResults.push(cloneDeep(toolResult)) - toolResultsToAddToMessageHistory.push(cloneDeep(toolResult)) - }, loggerOptions: { userId, model: agentTemplate.model, diff --git a/packages/agent-runtime/src/tools/tool-executor.ts b/packages/agent-runtime/src/tools/tool-executor.ts index 5ffbac34f3..456215d956 100644 --- a/packages/agent-runtime/src/tools/tool-executor.ts +++ b/packages/agent-runtime/src/tools/tool-executor.ts @@ -533,7 +533,7 @@ export async function executeCustomToolCall( return clientToolResult.output satisfies ToolResultOutput[] }) .then((result) => { - if (result === null) { + if (!result) { return } const toolResult = { @@ -546,10 +546,6 @@ export async function executeCustomToolCall( { input, toolResult }, `${toolName} custom tool call & result (${toolResult.toolCallId})`, ) - if (result === undefined) { - return - } - onResponseChunk({ type: 'tool_result', toolName: toolResult.toolName, @@ -563,7 +559,7 @@ export async function executeCustomToolCall( toolResultsToAddToMessageHistory.push(toolResult) } - return + return }) } From e77eb408affe77545f60cf50c1cf4ca6520dd988 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sun, 8 Feb 2026 17:57:48 -0800 Subject: [PATCH 4/6] Fix: add tool calls appropriately --- .../src/run-programmatic-step.ts | 34 +++++++++++-------- .../agent-runtime/src/tools/stream-parser.ts | 12 +++---- .../agent-runtime/src/tools/tool-executor.ts | 16 +++++++-- 3 files changed, 37 insertions(+), 25 deletions(-) diff --git a/packages/agent-runtime/src/run-programmatic-step.ts b/packages/agent-runtime/src/run-programmatic-step.ts index 82a6f4f55f..378a4e9bf4 100644 --- a/packages/agent-runtime/src/run-programmatic-step.ts +++ b/packages/agent-runtime/src/run-programmatic-step.ts @@ -137,16 +137,16 @@ export async function runProgrammaticStep( if (!generator) { const createLogMethod = (level: 'debug' | 'info' | 'warn' | 'error') => - (data: any, msg?: string) => { - logger[level](data, msg) // Log to backend - handleStepsLogChunk({ - userInputId, - runId: agentState.runId ?? 'undefined', - level, - data, - message: msg, - }) - } + (data: any, msg?: string) => { + logger[level](data, msg) // Log to backend + handleStepsLogChunk({ + userInputId, + runId: agentState.runId ?? 'undefined', + level, + data, + message: msg, + }) + } const streamingLogger = { debug: createLogMethod('debug'), @@ -243,7 +243,7 @@ export async function runProgrammaticStep( if (!parseResult.success) { throw new Error( `Invalid yield value from handleSteps in agent ${template.id}: ${parseResult.error.message}. ` + - `Received: ${JSON.stringify(result.value)}`, + `Received: ${JSON.stringify(result.value)}`, ) } @@ -334,9 +334,8 @@ export async function runProgrammaticStep( } catch (error) { endTurn = true - const errorMessage = `Error executing handleSteps for agent ${template.id}: ${ - error instanceof Error ? error.message : 'Unknown error' - }` + const errorMessage = `Error executing handleSteps for agent ${template.id}: ${error instanceof Error ? error.message : 'Unknown error' + }` logger.error( { error: getErrorObject(error), template: template.id }, errorMessage, @@ -485,6 +484,7 @@ async function executeSingleToolCall( // }) } + const toolResultsToAddToMessageHistory: ToolMessage[] = [] // Execute the tool call await executeToolCall({ ...params, @@ -494,7 +494,9 @@ async function executeSingleToolCall( excludeToolFromMessageHistory, fromHandleSteps: true, toolCallId, - toolResultsToAddToMessageHistory: [], + toolCalls: [], + toolCallsToAddToMessageHistory: [], + toolResultsToAddToMessageHistory, onResponseChunk: (chunk: string | PrintModeEvent) => { if (typeof chunk === 'string') { @@ -539,6 +541,8 @@ async function executeSingleToolCall( }, }) + agentState.messageHistory.push(...toolResultsToAddToMessageHistory) + // Get the latest tool result return toolResults[toolResults.length - 1]?.content } diff --git a/packages/agent-runtime/src/tools/stream-parser.ts b/packages/agent-runtime/src/tools/stream-parser.ts index 309ddffecd..504029f98b 100644 --- a/packages/agent-runtime/src/tools/stream-parser.ts +++ b/packages/agent-runtime/src/tools/stream-parser.ts @@ -87,6 +87,7 @@ export async function processStream( const toolResults: ToolMessage[] = [] const toolResultsToAddToMessageHistory: ToolMessage[] = [] const toolCalls: (CodebuffToolCall | CustomToolCall)[] = [] + const toolCallsToAddToMessageHistory: (CodebuffToolCall | CustomToolCall)[] = [] const assistantMessages: Message[] = [] let hadToolCallError = false const errorMessages: Message[] = [] @@ -108,13 +109,7 @@ export async function processStream( function createResponseHandler(isXmlMode: boolean) { return (chunk: string | PrintModeEvent) => { if (typeof chunk !== 'string') { - if (chunk.type === 'tool_call') { - if (chunk.includeToolCall !== false) { - assistantMessages.push( - assistantMessage({ ...chunk, type: 'tool-call' }), - ) - } - } else if (isXmlMode && chunk.type === 'tool_result') { + if (isXmlMode && chunk.type === 'tool_result') { const toolResultMessage: ToolMessage = { role: 'tool', toolName: chunk.toolName, @@ -188,6 +183,7 @@ export async function processStream( previousToolCallFinished: previousPromise, toolCallId, toolCalls, + toolCallsToAddToMessageHistory, toolResults, toolResultsToAddToMessageHistory: resultsArray, excludeToolFromMessageHistory: false, @@ -206,6 +202,7 @@ export async function processStream( previousToolCallFinished: previousPromise, toolCallId, toolCalls, + toolCallsToAddToMessageHistory, toolResults, toolResultsToAddToMessageHistory: resultsArray, excludeToolFromMessageHistory: false, @@ -330,6 +327,7 @@ export async function processStream( agentState.messageHistory = buildArray([ ...messageHistoryBeforeStream, ...assistantMessages, + ...toolCallsToAddToMessageHistory.map((toolCall) => assistantMessage({ ...toolCall, type: 'tool-call' })), ...toolResultsToAddToMessageHistory, ...errorMessages, ]) diff --git a/packages/agent-runtime/src/tools/tool-executor.ts b/packages/agent-runtime/src/tools/tool-executor.ts index 456215d956..23d2e7880d 100644 --- a/packages/agent-runtime/src/tools/tool-executor.ts +++ b/packages/agent-runtime/src/tools/tool-executor.ts @@ -33,7 +33,7 @@ import type { Logger } from '@codebuff/common/types/contracts/logger' import type { ToolMessage } from '@codebuff/common/types/messages/codebuff-message' import type { ToolResultOutput } from '@codebuff/common/types/messages/content-part' import type { PrintModeEvent } from '@codebuff/common/types/print-mode' -import type { AgentTemplateType , AgentState, Subgoal } from '@codebuff/common/types/session-state' +import type { AgentTemplateType, AgentState, Subgoal } from '@codebuff/common/types/session-state' import type { CustomToolDefinitions, ProjectFileContext, @@ -119,6 +119,7 @@ export type ExecuteToolCallParams = { tools: ToolSet toolCallId: string | undefined toolCalls: (CodebuffToolCall | CustomToolCall)[] + toolCallsToAddToMessageHistory: (CodebuffToolCall | CustomToolCall)[] toolResults: ToolMessage[] toolResultsToAddToMessageHistory: ToolMessage[] userId: string | undefined @@ -144,6 +145,7 @@ export async function executeToolCall( logger, previousToolCallFinished, toolCalls, + toolCallsToAddToMessageHistory, toolResults, toolResultsToAddToMessageHistory, userInputId, @@ -298,8 +300,6 @@ export async function executeToolCall( includeToolCall: !excludeToolFromMessageHistory, }) - toolCalls.push(toolCall) - // Cast to any to avoid type errors const handler = codebuffToolHandlers[ toolName @@ -311,6 +311,12 @@ export async function executeToolCall( ? { ...toolCall, input: effectiveInput } : toolCall + toolCalls.push(finalToolCall) + if (!excludeToolFromMessageHistory) { + toolCallsToAddToMessageHistory.push(finalToolCall) + } + + const toolResultPromise = handler({ ...params, toolCall: finalToolCall, @@ -448,6 +454,7 @@ export async function executeCustomToolCall( requestToolCall, toolCallId, toolCalls, + toolCallsToAddToMessageHistory, toolResults, toolResultsToAddToMessageHistory, userInputId, @@ -512,6 +519,9 @@ export async function executeCustomToolCall( }) toolCalls.push(toolCall) + if (!excludeToolFromMessageHistory) { + toolCallsToAddToMessageHistory.push(toolCall) + } return previousToolCallFinished .then(async () => { From dcd590747fdd745d2354f045b1be9819b2670677 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sun, 8 Feb 2026 18:05:01 -0800 Subject: [PATCH 5/6] Simplify xml tool call/result handling --- .../agent-runtime/src/tools/stream-parser.ts | 22 +++++-------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/packages/agent-runtime/src/tools/stream-parser.ts b/packages/agent-runtime/src/tools/stream-parser.ts index 504029f98b..fa8b02d004 100644 --- a/packages/agent-runtime/src/tools/stream-parser.ts +++ b/packages/agent-runtime/src/tools/stream-parser.ts @@ -106,18 +106,10 @@ export async function processStream( // === RESPONSE HANDLER === // Creates a response handler that captures tool events into assistantMessages. // When isXmlMode=true, also captures tool_result events for interleaved ordering. - function createResponseHandler(isXmlMode: boolean) { + function createResponseHandler() { return (chunk: string | PrintModeEvent) => { if (typeof chunk !== 'string') { - if (isXmlMode && chunk.type === 'tool_result') { - const toolResultMessage: ToolMessage = { - role: 'tool', - toolName: chunk.toolName, - toolCallId: chunk.toolCallId, - content: chunk.output, - } - assistantMessages.push(toolResultMessage) - } else if (chunk.type === 'error') { + if (chunk.type === 'error') { hadToolCallError = true errorMessages.push( userMessage( @@ -134,12 +126,8 @@ export async function processStream( // === TOOL EXECUTION === // Unified callback factory for both native and custom tools. - // isXmlMode=true: execute immediately, capture results inline (for XML tool calls) - // isXmlMode=false: defer execution, results added at end (for native tool calls) function createToolExecutionCallback(toolName: string, isXmlMode: boolean) { - const responseHandler = createResponseHandler(isXmlMode) - const resultsArray = isXmlMode ? [] : toolResultsToAddToMessageHistory - + const responseHandler = createResponseHandler() return { onTagStart: () => { }, onTagEnd: async (_: string, input: Record) => { @@ -185,7 +173,7 @@ export async function processStream( toolCalls, toolCallsToAddToMessageHistory, toolResults, - toolResultsToAddToMessageHistory: resultsArray, + toolResultsToAddToMessageHistory, excludeToolFromMessageHistory: false, onCostCalculated, onResponseChunk: responseHandler, @@ -204,7 +192,7 @@ export async function processStream( toolCalls, toolCallsToAddToMessageHistory, toolResults, - toolResultsToAddToMessageHistory: resultsArray, + toolResultsToAddToMessageHistory, excludeToolFromMessageHistory: false, onResponseChunk: responseHandler, }) From 8bc716dcfe6918908d028679cf1528b6dd245c03 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sun, 8 Feb 2026 18:17:38 -0800 Subject: [PATCH 6/6] Fix types/tests --- .../src/__tests__/tool-stream-parser.test.ts | 40 ------------------- .../xml-tool-result-ordering.test.ts | 3 -- .../src/run-programmatic-step.ts | 2 + .../agent-runtime/src/tools/stream-parser.ts | 1 + 4 files changed, 3 insertions(+), 43 deletions(-) diff --git a/packages/agent-runtime/src/__tests__/tool-stream-parser.test.ts b/packages/agent-runtime/src/__tests__/tool-stream-parser.test.ts index 391610c27f..c258ff96ed 100644 --- a/packages/agent-runtime/src/__tests__/tool-stream-parser.test.ts +++ b/packages/agent-runtime/src/__tests__/tool-stream-parser.test.ts @@ -47,10 +47,6 @@ describe('processStreamWithTags', () => { }, } - function onError(name: string, error: string) { - events.push({ name, error }) - } - const result: string[] = [] const responseChunks: any[] = [] @@ -70,7 +66,6 @@ describe('processStreamWithTags', () => { stream, processors, defaultProcessor, - onError, onResponseChunk, executeXmlToolCall: async () => {}, })) { @@ -114,10 +109,6 @@ describe('processStreamWithTags', () => { }, } - function onError(name: string, error: string) { - events.push({ name, error }) - } - const result: string[] = [] const responseChunks: any[] = [] @@ -137,7 +128,6 @@ describe('processStreamWithTags', () => { stream, processors, defaultProcessor, - onError, onResponseChunk, executeXmlToolCall: async () => {}, })) { @@ -191,10 +181,6 @@ describe('processStreamWithTags', () => { }, } - function onError(name: string, error: string) { - events.push({ name, error }) - } - const result: string[] = [] const responseChunks: any[] = [] @@ -214,7 +200,6 @@ describe('processStreamWithTags', () => { stream, processors, defaultProcessor, - onError, onResponseChunk, executeXmlToolCall: async () => {}, })) { @@ -267,10 +252,6 @@ describe('processStreamWithTags', () => { }, } - function onError(name: string, error: string) { - events.push({ name, error, type: 'error' }) - } - const responseChunks: any[] = [] function onResponseChunk(chunk: any) { @@ -295,7 +276,6 @@ describe('processStreamWithTags', () => { stream, processors, defaultProcessor, - onError, onResponseChunk, executeXmlToolCall: async () => {}, })) { @@ -341,10 +321,6 @@ describe('processStreamWithTags', () => { }, } - function onError(name: string, error: string) { - events.push({ name, error, type: 'error' }) - } - const result: string[] = [] const responseChunks: any[] = [] @@ -364,7 +340,6 @@ describe('processStreamWithTags', () => { stream, processors, defaultProcessor, - onError, onResponseChunk, executeXmlToolCall: async () => {}, })) { @@ -414,10 +389,6 @@ describe('processStreamWithTags', () => { }, } - function onError(name: string, error: string) { - events.push({ name, error, type: 'error' }) - } - const result: string[] = [] const responseChunks: any[] = [] @@ -437,7 +408,6 @@ describe('processStreamWithTags', () => { stream, processors, defaultProcessor, - onError, onResponseChunk, executeXmlToolCall: async () => {}, })) { @@ -468,10 +438,6 @@ describe('processStreamWithTags', () => { const processors = {} - function onError(name: string, error: string) { - events.push({ name, error, type: 'error' }) - } - const result: string[] = [] const responseChunks: any[] = [] @@ -491,7 +457,6 @@ describe('processStreamWithTags', () => { stream, processors, defaultProcessor, - onError, onResponseChunk, executeXmlToolCall: async () => {}, })) { @@ -515,10 +480,6 @@ describe('processStreamWithTags', () => { const processors = {} - function onError(name: string, error: string) { - events.push({ name, error, type: 'error' }) - } - const result: string[] = [] const responseChunks: any[] = [] @@ -538,7 +499,6 @@ describe('processStreamWithTags', () => { stream, processors, defaultProcessor, - onError, onResponseChunk, executeXmlToolCall: async () => {}, })) { diff --git a/packages/agent-runtime/src/__tests__/xml-tool-result-ordering.test.ts b/packages/agent-runtime/src/__tests__/xml-tool-result-ordering.test.ts index e187abfe2d..65c6742d8e 100644 --- a/packages/agent-runtime/src/__tests__/xml-tool-result-ordering.test.ts +++ b/packages/agent-runtime/src/__tests__/xml-tool-result-ordering.test.ts @@ -59,7 +59,6 @@ describe('XML tool result ordering', () => { stream, processors: {}, defaultProcessor, - onError: () => {}, onResponseChunk, executeXmlToolCall: async ({ toolName, input }) => { executionOrder.push(`executeXmlToolCall:${toolName}`) @@ -136,7 +135,6 @@ describe('XML tool result ordering', () => { stream, processors: {}, defaultProcessor, - onError: () => {}, onResponseChunk, executeXmlToolCall: async ({ toolName }) => { // Simulate tool_call event @@ -206,7 +204,6 @@ describe('XML tool result ordering', () => { stream, processors: {}, defaultProcessor: () => ({ onTagStart: () => {}, onTagEnd: () => {} }), - onError: () => {}, onResponseChunk: () => {}, executeXmlToolCall: async () => { // Simulate tool execution with async work diff --git a/packages/agent-runtime/src/run-programmatic-step.ts b/packages/agent-runtime/src/run-programmatic-step.ts index 378a4e9bf4..ca1512100f 100644 --- a/packages/agent-runtime/src/run-programmatic-step.ts +++ b/packages/agent-runtime/src/run-programmatic-step.ts @@ -81,6 +81,7 @@ export async function runProgrammaticStep( | 'fileProcessingState' | 'toolCallId' | 'toolCalls' + | 'toolCallsToAddToMessageHistory' | 'toolResults' | 'toolResultsToAddToMessageHistory' > & @@ -427,6 +428,7 @@ type ExecuteToolCallsArrayParams = Omit< | 'autoInsertEndStepParam' | 'excludeToolFromMessageHistory' | 'toolCallId' + | 'toolCallsToAddToMessageHistory' | 'toolResultsToAddToMessageHistory' > & { agentState: AgentState diff --git a/packages/agent-runtime/src/tools/stream-parser.ts b/packages/agent-runtime/src/tools/stream-parser.ts index fa8b02d004..2cd9debf5e 100644 --- a/packages/agent-runtime/src/tools/stream-parser.ts +++ b/packages/agent-runtime/src/tools/stream-parser.ts @@ -56,6 +56,7 @@ export async function processStream( | 'state' | 'toolCallId' | 'toolCalls' + | 'toolCallsToAddToMessageHistory' | 'toolName' | 'toolResults' | 'toolResultsToAddToMessageHistory'