From 47a194844c2186dcaed33c6e2950a404f96ea6b0 Mon Sep 17 00:00:00 2001
From: Pranav Sharan <pranav100000@gmail.com>
Date: Sun, 8 Feb 2026 00:46:21 -0800
Subject: [PATCH 1/6] fix anthropic tool call bug

---
 .../__tests__/tool-validation-error.test.ts   | 128 ++++++++++++++++++
 .../agent-runtime/src/tool-stream-parser.ts   |  30 ++--
 .../agent-runtime/src/tools/stream-parser.ts  |  34 +++--
 .../agent-runtime/src/tools/tool-executor.ts  |  12 +-
 ...vert-to-openai-compatible-chat-messages.ts |  20 +++
 sdk/src/impl/llm.ts                           |  25 +++-
 6 files changed, 223 insertions(+), 26 deletions(-)

diff --git a/packages/agent-runtime/src/__tests__/tool-validation-error.test.ts b/packages/agent-runtime/src/__tests__/tool-validation-error.test.ts
index d9ea5d89f3..df9c1997d8 100644
--- a/packages/agent-runtime/src/__tests__/tool-validation-error.test.ts
+++ b/packages/agent-runtime/src/__tests__/tool-validation-error.test.ts
@@ -1,6 +1,7 @@
 import { TEST_AGENT_RUNTIME_IMPL } from '@codebuff/common/testing/impl/agent-runtime'
 import { getInitialSessionState } from '@codebuff/common/types/session-state'
 import { promptSuccess } from '@codebuff/common/util/error'
+import { jsonToolResult } from '@codebuff/common/util/messages'
 import { beforeEach, describe, expect, it } from 'bun:test'
 
 import { mockFileContext } from './test-utils'
@@ -12,6 +13,10 @@ import type {
   AgentRuntimeScopedDeps,
 } from '@codebuff/common/types/contracts/agent-runtime'
 import type { StreamChunk } from '@codebuff/common/types/contracts/llm'
+import type {
+  AssistantMessage,
+  ToolMessage,
+} from '@codebuff/common/types/messages/codebuff-message'
 import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
 
 describe('tool validation error handling', () => {
@@ -225,4 +230,127 @@ describe('tool validation error handling', () => {
     )
     expect(errorEvents.length).toBe(0)
   })
+
+  it('should preserve tool_call/tool_result ordering when custom tool setup is async', async () => {
+    const toolName = 'delayed_custom_tool'
+    const agentWithCustomTool: AgentTemplate = {
+      ...testAgentTemplate,
+      toolNames: [toolName, 'end_turn'],
+    }
+
+    const delayedToolCallChunk: StreamChunk = {
+      type: 'tool-call',
+      toolName,
+      toolCallId: 'delayed-custom-tool-call-id',
+      input: {
+        query: 'test',
+      },
+    }
+
+    async function* mockStream() {
+      yield delayedToolCallChunk
+      return promptSuccess('mock-message-id')
+    }
+
+    const fileContextWithCustomTool = {
+      ...mockFileContext,
+      customToolDefinitions: {
+        [toolName]: {
+          inputSchema: {
+            type: 'object',
+            properties: {
+              query: { type: 'string' },
+            },
+            required: ['query'],
+            additionalProperties: false,
+          },
+          endsAgentStep: false,
+          description: 'A delayed custom tool for ordering tests',
+        },
+      },
+    }
+
+    const sessionState = getInitialSessionState(fileContextWithCustomTool)
+    const agentState = sessionState.mainAgentState
+
+    agentRuntimeImpl.requestMcpToolData = async () => {
+      // Force an async gap so tool_call emission happens after stream completion.
+      await new Promise((resolve) => setTimeout(resolve, 20))
+      return []
+    }
+    agentRuntimeImpl.requestToolCall = async () => ({
+      output: jsonToolResult({ ok: true }),
+    })
+
+    await processStream({
+      ...agentRuntimeImpl,
+      agentContext: {},
+      agentState,
+      agentStepId: 'test-step-id',
+      agentTemplate: agentWithCustomTool,
+      ancestorRunIds: [],
+      clientSessionId: 'test-session',
+      fileContext: fileContextWithCustomTool,
+      fingerprintId: 'test-fingerprint',
+      fullResponse: '',
+      localAgentTemplates: { 'test-agent': agentWithCustomTool },
+      messages: [],
+      prompt: 'test prompt',
+      repoId: undefined,
+      repoUrl: undefined,
+      runId: 'test-run-id',
+      signal: new AbortController().signal,
+      stream: mockStream(),
+      system: 'test system',
+      tools: {},
+      userId: 'test-user',
+      userInputId: 'test-input-id',
+      onCostCalculated: async () => {},
+      onResponseChunk: () => {},
+    })
+
+    const assistantToolCallMessages = agentState.messageHistory.filter(
+      (m): m is AssistantMessage =>
+        m.role === 'assistant' &&
+        m.content.some((c) => c.type === 'tool-call' && c.toolName === toolName),
+    )
+    const toolMessages = agentState.messageHistory.filter(
+      (m): m is ToolMessage => m.role === 'tool' && m.toolName === toolName,
+    )
+
+    expect(assistantToolCallMessages.length).toBe(1)
+    expect(toolMessages.length).toBe(1)
+
+    const assistantToolCallPart = assistantToolCallMessages[0].content.find(
+      (
+        c,
+      ): c is Extract<AssistantMessage['content'][number], { type: 'tool-call' }> =>
+        c.type === 'tool-call' && c.toolName === toolName,
+    )
+    expect(assistantToolCallPart).toBeDefined()
+    expect(toolMessages[0].toolCallId).toBe(assistantToolCallPart!.toolCallId)
+
+    const assistantIndex = agentState.messageHistory.indexOf(
+      assistantToolCallMessages[0],
+    )
+    const toolResultIndex = agentState.messageHistory.indexOf(toolMessages[0])
+    expect(assistantIndex).toBeGreaterThanOrEqual(0)
+    expect(toolResultIndex).toBeGreaterThan(assistantIndex)
+
+    const assistantToolCallIds = new Set(
+      agentState.messageHistory.flatMap((message) => {
+        if (message.role !== 'assistant') {
+          return []
+        }
+        return message.content.flatMap((part) =>
+          part.type === 'tool-call' ? [part.toolCallId] : [],
+        )
+      }),
+    )
+    const orphanToolResults = agentState.messageHistory.filter(
+      (message): message is ToolMessage =>
+        message.role === 'tool' && !assistantToolCallIds.has(message.toolCallId),
+    )
+    expect(orphanToolResults.length).toBe(0)
+  })
 })
diff --git a/packages/agent-runtime/src/tool-stream-parser.ts b/packages/agent-runtime/src/tool-stream-parser.ts
index 7beea54852..44f2168f08 100644
--- a/packages/agent-runtime/src/tool-stream-parser.ts
+++ b/packages/agent-runtime/src/tool-stream-parser.ts
@@ -21,13 +21,25 @@ export async function* processStreamWithTools(params: {
   processors: Record<
     string,
     {
-      onTagStart: (tagName: string, attributes: Record<string, string>) => void
-      onTagEnd: (tagName: string, params: Record<string, any>) => void
+      onTagStart: (
+        tagName: string,
+        attributes: Record<string, string>,
+      ) => void | Promise<void>
+      onTagEnd: (
+        tagName: string,
+        params: Record<string, any>,
+      ) => void | Promise<void>
     }
   >
   defaultProcessor: (toolName: string) => {
-    onTagStart: (tagName: string, attributes: Record<string, string>) => void
-    onTagEnd: (tagName: string, params: Record<string, any>) => void
+    onTagStart: (
+      tagName: string,
+      attributes: Record<string, string>,
+    ) => void | Promise<void>
+    onTagEnd: (
+      tagName: string,
+      params: Record<string, any>,
+    ) => void | Promise<void>
   }
   onError: (tagName: string, errorMessage: string) => void
   onResponseChunk: (chunk: PrintModeText | PrintModeError) => void
@@ -62,11 +74,11 @@ export async function* processStreamWithTools(params: {
   // State for parsing XML tool calls from text stream
   const xmlParserState: StreamParserState = createStreamParserState()
 
-  function processToolCallObject(params: {
+  async function processToolCallObject(params: {
     toolName: string
     input: any
     contents?: string
-  }): void {
+  }): Promise<void> {
     const { toolName, input, contents } = params
 
     const processor = processors[toolName] ?? defaultProcessor(toolName)
@@ -85,8 +97,8 @@ export async function* processStreamWithTools(params: {
       logger,
     })
 
-    processor.onTagStart(toolName, {})
-    processor.onTagEnd(toolName, input)
+    await processor.onTagStart(toolName, {})
+    await processor.onTagEnd(toolName, input)
   }
 
   function flush() {
@@ -146,7 +158,7 @@ export async function* processStreamWithTools(params: {
     }
 
     if (chunk.type === 'tool-call') {
-      processToolCallObject(chunk)
+      await processToolCallObject(chunk)
     }
 
     yield chunk
diff --git a/packages/agent-runtime/src/tools/stream-parser.ts b/packages/agent-runtime/src/tools/stream-parser.ts
index ac23372a6d..e54377944f 100644
--- a/packages/agent-runtime/src/tools/stream-parser.ts
+++ b/packages/agent-runtime/src/tools/stream-parser.ts
@@ -84,6 +84,10 @@ export async function processStream(
     userId,
   } = params
   const fullResponseChunks: string[] = [fullResponse]
+  const messageHistoryBeforeStream = expireMessages(
+    agentState.messageHistory,
+    'agentStep',
+  )
 
   // === MUTABLE STATE ===
   const toolResults: ToolMessage[] = []
@@ -111,9 +115,11 @@ export async function processStream(
     return (chunk: string | PrintModeEvent) => {
       if (typeof chunk !== 'string') {
         if (chunk.type === 'tool_call') {
-          assistantMessages.push(
-            assistantMessage({ ...chunk, type: 'tool-call' }),
-          )
+          if (chunk.includeToolCall !== false) {
+            assistantMessages.push(
+              assistantMessage({ ...chunk, type: 'tool-call' }),
+            )
+          }
         } else if (isXmlMode && chunk.type === 'tool_result') {
           const toolResultMessage: ToolMessage = {
             role: 'tool',
@@ -182,7 +188,7 @@ export async function processStream(
               : (toolName as ToolName),
             input: transformed ? transformed.input : input,
             fromHandleSteps: false,
-            skipDirectResultPush: isXmlMode,
+            skipDirectResultPush: true,
             fileProcessingState,
             fullResponse: fullResponseChunks.join(''),
             previousToolCallFinished: previousPromise,
@@ -199,7 +205,7 @@ export async function processStream(
             ...params,
             toolName,
             input,
-            skipDirectResultPush: isXmlMode,
+            skipDirectResultPush: true,
             fileProcessingState,
             fullResponse: fullResponseChunks.join(''),
             previousToolCallFinished: previousPromise,
@@ -327,20 +333,20 @@ export async function processStream(
     }
   }
 
-  // === FINALIZATION ===
-  agentState.messageHistory = buildArray<Message>([
-    ...expireMessages(agentState.messageHistory, 'agentStep'),
-    ...assistantMessages,
-    ...toolResultsToAddAfterStream,
-  ])
-
   if (!signal.aborted) {
     resolveStreamDonePromise()
     await previousToolCallFinished
   }
 
-  // Error messages must come AFTER tool results for proper API ordering
-  agentState.messageHistory.push(...errorMessages)
+  // === FINALIZATION ===
+  // Build message history from the pre-stream snapshot so tool_calls and
+  // tool_results are always appended in deterministic order.
+  agentState.messageHistory = buildArray<Message>([
+    ...messageHistoryBeforeStream,
+    ...assistantMessages,
+    ...toolResultsToAddAfterStream,
+    ...errorMessages,
+  ])
 
   return {
     fullResponse: fullResponseChunks.join(''),
diff --git a/packages/agent-runtime/src/tools/tool-executor.ts b/packages/agent-runtime/src/tools/tool-executor.ts
index 02841f5b96..31e1bdb226 100644
--- a/packages/agent-runtime/src/tools/tool-executor.ts
+++ b/packages/agent-runtime/src/tools/tool-executor.ts
@@ -146,7 +146,7 @@ export async function executeToolCall<T extends ToolName>(
     previousToolCallFinished,
     toolCalls,
     toolResults,
-    toolResultsToAddAfterStream: _toolResultsToAddAfterStream,
+    toolResultsToAddAfterStream,
     userInputId,
 
     onCostCalculated,
@@ -350,6 +350,10 @@ export async function executeToolCall<T extends ToolName>(
 
     toolResults.push(toolResult)
 
+    if (!excludeToolFromMessageHistory) {
+      toolResultsToAddAfterStream.push(toolResult)
+    }
+
     if (!excludeToolFromMessageHistory && !params.skipDirectResultPush) {
       agentState.messageHistory.push(toolResult)
     }
@@ -450,7 +454,7 @@ export async function executeCustomToolCall(
     toolCallId,
     toolCalls,
     toolResults,
-    toolResultsToAddAfterStream: _toolResultsToAddAfterStream,
+    toolResultsToAddAfterStream,
     userInputId,
   } = params
   const toolCall: CustomToolCall | ToolCallError = parseRawCustomToolCall({
@@ -560,6 +564,10 @@ export async function executeCustomToolCall(
 
       toolResults.push(toolResult)
 
+      if (!excludeToolFromMessageHistory) {
+        toolResultsToAddAfterStream.push(toolResult)
+      }
+
       if (!excludeToolFromMessageHistory && !params.skipDirectResultPush) {
         agentState.messageHistory.push(toolResult)
       }
diff --git a/packages/internal/src/openai-compatible/chat/convert-to-openai-compatible-chat-messages.ts b/packages/internal/src/openai-compatible/chat/convert-to-openai-compatible-chat-messages.ts
index 30a27cf6c4..b649ab3218 100644
--- a/packages/internal/src/openai-compatible/chat/convert-to-openai-compatible-chat-messages.ts
+++ b/packages/internal/src/openai-compatible/chat/convert-to-openai-compatible-chat-messages.ts
@@ -138,5 +138,25 @@ export function convertToOpenAICompatibleChatMessages(
     }
   }
 
+  // Debug: dump OpenAI-format message summary to catch tool_use_id mismatches
+  console.error('[SDK DEBUG] OpenAI-format messages (' + messages.length + '):')
+  for (let i = 0; i < messages.length; i++) {
+    const m = messages[i] as Record<string, unknown>
+    const role = m.role as string
+    if (role === 'tool') {
+      console.error(`  [${i}] tool tool_call_id=${(m as { tool_call_id?: string }).tool_call_id}`)
+    } else if (role === 'assistant') {
+      const toolCalls = (m as { tool_calls?: Array<{ id: string; function?: { name: string } }> }).tool_calls
+      if (toolCalls?.length) {
+        const ids = toolCalls.map(tc => `${tc.function?.name}:${tc.id}`)
+        console.error(`  [${i}] assistant tool_calls=[${ids.join(', ')}]`)
+      } else {
+        console.error(`  [${i}] assistant (text)`)
+      }
+    } else {
+      console.error(`  [${i}] ${role}`)
+    }
+  }
+
   return messages
 }
diff --git a/sdk/src/impl/llm.ts b/sdk/src/impl/llm.ts
index d11ed2c7d0..3b0dc44a1d 100644
--- a/sdk/src/impl/llm.ts
+++ b/sdk/src/impl/llm.ts
@@ -225,11 +225,34 @@ export async function* promptAiSdkStream(
     }
   }
 
+  const convertedMessages = convertCbToModelMessages(params)
+
+  // Debug: dump message summary to catch tool_use_id mismatches before they hit the API
+  console.error('[SDK DEBUG] promptAiSdkStream messages (' + convertedMessages.length + '):')
+  for (let i = 0; i < convertedMessages.length; i++) {
+    const m = convertedMessages[i] as Record<string, unknown>
+    const role = m.role as string
+    const content = m.content
+    if (role === 'tool' && Array.isArray(content)) {
+      const toolIds = (content as Array<{ toolCallId?: string; type?: string }>)
+        .filter(c => c.type === 'tool-result')
+        .map(c => c.toolCallId)
+      console.error(`  [${i}] ${role} toolCallIds=${JSON.stringify(toolIds)}`)
+    } else if (role === 'assistant' && Array.isArray(content)) {
+      const parts = (content as Array<{ type?: string; toolCallId?: string; toolName?: string }>)
+        .map(c => c.type === 'tool-call' ? `tool-call(${c.toolName}:${c.toolCallId})` : c.type)
+      console.error(`  [${i}] ${role} parts=[${parts.join(', ')}]`)
+    } else {
+      const tags = (m as { tags?: string[] }).tags
+      console.error(`  [${i}] ${role}${tags ? ' tags=' + JSON.stringify(tags) : ''}`)
+    }
+  }
+
   const response = streamText({
     ...params,
     prompt: undefined,
     model: aiSDKModel,
-    messages: convertCbToModelMessages(params),
+    messages: convertedMessages,
     // When using Claude OAuth, disable retries so we can immediately fall back to Codebuff
     // backend on rate limit errors instead of retrying 4 times first
     ...(isClaudeOAuth && { maxRetries: 0 }),

From daf3d570ad5867bb89308cf6267460b2dccdb1d4 Mon Sep 17 00:00:00 2001
From: Pranav Sharan <pranav100000@gmail.com>
Date: Sun, 8 Feb 2026 15:16:12 -0800
Subject: [PATCH 2/6] pr comments

---
 .../src/run-programmatic-step.ts              |  6 ++---
 .../agent-runtime/src/tools/stream-parser.ts  | 25 ++++++++-----------
 .../agent-runtime/src/tools/tool-executor.ts  | 20 +++++----------
 ...vert-to-openai-compatible-chat-messages.ts | 20 ---------------
 sdk/src/impl/llm.ts                           | 25 +------------------
 5 files changed, 21 insertions(+), 75 deletions(-)

diff --git a/packages/agent-runtime/src/run-programmatic-step.ts b/packages/agent-runtime/src/run-programmatic-step.ts
index 07cc27a068..82a6f4f55f 100644
--- a/packages/agent-runtime/src/run-programmatic-step.ts
+++ b/packages/agent-runtime/src/run-programmatic-step.ts
@@ -82,7 +82,7 @@ export async function runProgrammaticStep(
     | 'toolCallId'
     | 'toolCalls'
     | 'toolResults'
-    | 'toolResultsToAddAfterStream'
+    | 'toolResultsToAddToMessageHistory'
   > &
     ParamsExcluding<
       AddAgentStepFn,
@@ -428,7 +428,7 @@ type ExecuteToolCallsArrayParams = Omit<
   | 'autoInsertEndStepParam'
   | 'excludeToolFromMessageHistory'
   | 'toolCallId'
-  | 'toolResultsToAddAfterStream'
+  | 'toolResultsToAddToMessageHistory'
 > & {
   agentState: AgentState
   onResponseChunk: (chunk: string | PrintModeEvent) => void
@@ -494,7 +494,7 @@ async function executeSingleToolCall(
     excludeToolFromMessageHistory,
     fromHandleSteps: true,
     toolCallId,
-    toolResultsToAddAfterStream: [],
+    toolResultsToAddToMessageHistory: [],
 
     onResponseChunk: (chunk: string | PrintModeEvent) => {
       if (typeof chunk === 'string') {
diff --git a/packages/agent-runtime/src/tools/stream-parser.ts b/packages/agent-runtime/src/tools/stream-parser.ts
index e54377944f..9d85d5491d 100644
--- a/packages/agent-runtime/src/tools/stream-parser.ts
+++ b/packages/agent-runtime/src/tools/stream-parser.ts
@@ -14,7 +14,7 @@ import {
   executeToolCall,
   tryTransformAgentToolCall,
 } from './tool-executor'
-import { expireMessages, withSystemTags } from '../util/messages'
+import { withSystemTags } from '../util/messages'
 
 import type { CustomToolCall, ExecuteToolCallParams } from './tool-executor'
 import type { AgentTemplate } from '../templates/types'
@@ -60,7 +60,7 @@ export async function processStream(
     | 'toolCalls'
     | 'toolName'
     | 'toolResults'
-    | 'toolResultsToAddAfterStream'
+    | 'toolResultsToAddToMessageHistory'
   > &
     ParamsExcluding<
       typeof processStreamWithTools,
@@ -84,14 +84,11 @@ export async function processStream(
     userId,
   } = params
   const fullResponseChunks: string[] = [fullResponse]
-  const messageHistoryBeforeStream = expireMessages(
-    agentState.messageHistory,
-    'agentStep',
-  )
+  const messageHistoryBeforeStream = [...agentState.messageHistory]
 
   // === MUTABLE STATE ===
   const toolResults: ToolMessage[] = []
-  const toolResultsToAddAfterStream: ToolMessage[] = []
+  const toolResultsToAddToMessageHistory: ToolMessage[] = []
   const toolCalls: (CodebuffToolCall | CustomToolCall)[] = []
   const assistantMessages: Message[] = []
   let hadToolCallError = false
@@ -149,7 +146,7 @@ export async function processStream(
   // isXmlMode=false: defer execution, results added at end (for native tool calls)
   function createToolExecutionCallback(toolName: string, isXmlMode: boolean) {
     const responseHandler = createResponseHandler(isXmlMode)
-    const resultsArray = isXmlMode ? [] : toolResultsToAddAfterStream
+    const resultsArray = isXmlMode ? [] : toolResultsToAddToMessageHistory
 
     return {
       onTagStart: () => {},
@@ -188,14 +185,14 @@ export async function processStream(
               : (toolName as ToolName),
             input: transformed ? transformed.input : input,
             fromHandleSteps: false,
-            skipDirectResultPush: true,
+
             fileProcessingState,
             fullResponse: fullResponseChunks.join(''),
             previousToolCallFinished: previousPromise,
             toolCallId,
             toolCalls,
             toolResults,
-            toolResultsToAddAfterStream: resultsArray,
+            toolResultsToAddToMessageHistory: resultsArray,
             onCostCalculated,
             onResponseChunk: responseHandler,
           })
@@ -205,14 +202,14 @@ export async function processStream(
             ...params,
             toolName,
             input,
-            skipDirectResultPush: true,
+
             fileProcessingState,
             fullResponse: fullResponseChunks.join(''),
             previousToolCallFinished: previousPromise,
             toolCallId,
             toolCalls,
             toolResults,
-            toolResultsToAddAfterStream: resultsArray,
+            toolResultsToAddToMessageHistory: resultsArray,
             onResponseChunk: responseHandler,
           })
         }
@@ -250,7 +247,7 @@ export async function processStream(
         content: jsonToolResult({ errorMessage: error }),
       }
       toolResults.push(cloneDeep(toolResult))
-      toolResultsToAddAfterStream.push(cloneDeep(toolResult))
+      toolResultsToAddToMessageHistory.push(cloneDeep(toolResult))
     },
     loggerOptions: {
       userId,
@@ -344,7 +341,7 @@ export async function processStream(
   agentState.messageHistory = buildArray<Message>([
     ...messageHistoryBeforeStream,
     ...assistantMessages,
-    ...toolResultsToAddAfterStream,
+    ...toolResultsToAddToMessageHistory,
     ...errorMessages,
   ])
 
diff --git a/packages/agent-runtime/src/tools/tool-executor.ts b/packages/agent-runtime/src/tools/tool-executor.ts
index 31e1bdb226..5ffbac34f3 100644
--- a/packages/agent-runtime/src/tools/tool-executor.ts
+++ b/packages/agent-runtime/src/tools/tool-executor.ts
@@ -120,8 +120,7 @@ export type ExecuteToolCallParams<T extends string = ToolName> = {
   toolCallId: string | undefined
   toolCalls: (CodebuffToolCall | CustomToolCall)[]
   toolResults: ToolMessage[]
-  toolResultsToAddAfterStream: ToolMessage[]
-  skipDirectResultPush?: boolean
+  toolResultsToAddToMessageHistory: ToolMessage[]
   userId: string | undefined
   userInputId: string
 
@@ -146,7 +145,7 @@ export async function executeToolCall<T extends ToolName>(
     previousToolCallFinished,
     toolCalls,
     toolResults,
-    toolResultsToAddAfterStream,
+    toolResultsToAddToMessageHistory,
     userInputId,
 
     onCostCalculated,
@@ -351,11 +350,7 @@ export async function executeToolCall<T extends ToolName>(
     toolResults.push(toolResult)
 
     if (!excludeToolFromMessageHistory) {
-      toolResultsToAddAfterStream.push(toolResult)
-    }
-
-    if (!excludeToolFromMessageHistory && !params.skipDirectResultPush) {
-      agentState.messageHistory.push(toolResult)
+      toolResultsToAddToMessageHistory.push(toolResult)
     }
 
     // After tool completes, resolve any pending creditsUsed promise
@@ -454,7 +449,7 @@ export async function executeCustomToolCall(
     toolCallId,
     toolCalls,
     toolResults,
-    toolResultsToAddAfterStream,
+    toolResultsToAddToMessageHistory,
     userInputId,
   } = params
   const toolCall: CustomToolCall | ToolCallError = parseRawCustomToolCall({
@@ -565,13 +560,10 @@ export async function executeCustomToolCall(
       toolResults.push(toolResult)
 
       if (!excludeToolFromMessageHistory) {
-        toolResultsToAddAfterStream.push(toolResult)
+        toolResultsToAddToMessageHistory.push(toolResult)
       }
 
-      if (!excludeToolFromMessageHistory && !params.skipDirectResultPush) {
-        agentState.messageHistory.push(toolResult)
-      }
-      return
+        return
     })
 }
 
diff --git a/packages/internal/src/openai-compatible/chat/convert-to-openai-compatible-chat-messages.ts b/packages/internal/src/openai-compatible/chat/convert-to-openai-compatible-chat-messages.ts
index b649ab3218..30a27cf6c4 100644
--- a/packages/internal/src/openai-compatible/chat/convert-to-openai-compatible-chat-messages.ts
+++ b/packages/internal/src/openai-compatible/chat/convert-to-openai-compatible-chat-messages.ts
@@ -138,25 +138,5 @@ export function convertToOpenAICompatibleChatMessages(
     }
   }
 
-  // Debug: dump OpenAI-format message summary to catch tool_use_id mismatches
-  console.error('[SDK DEBUG] OpenAI-format messages (' + messages.length + '):')
-  for (let i = 0; i < messages.length; i++) {
-    const m = messages[i] as Record<string, unknown>
-    const role = m.role as string
-    if (role === 'tool') {
-      console.error(`  [${i}] tool tool_call_id=${(m as { tool_call_id?: string }).tool_call_id}`)
-    } else if (role === 'assistant') {
-      const toolCalls = (m as { tool_calls?: Array<{ id: string; function?: { name: string } }> }).tool_calls
-      if (toolCalls?.length) {
-        const ids = toolCalls.map(tc => `${tc.function?.name}:${tc.id}`)
-        console.error(`  [${i}] assistant tool_calls=[${ids.join(', ')}]`)
-      } else {
-        console.error(`  [${i}] assistant (text)`)
-      }
-    } else {
-      console.error(`  [${i}] ${role}`)
-    }
-  }
-
   return messages
 }
diff --git a/sdk/src/impl/llm.ts b/sdk/src/impl/llm.ts
index 3b0dc44a1d..d11ed2c7d0 100644
--- a/sdk/src/impl/llm.ts
+++ b/sdk/src/impl/llm.ts
@@ -225,34 +225,11 @@ export async function* promptAiSdkStream(
     }
   }
 
-  const convertedMessages = convertCbToModelMessages(params)
-
-  // Debug: dump message summary to catch tool_use_id mismatches before they hit the API
-  console.error('[SDK DEBUG] promptAiSdkStream messages (' + convertedMessages.length + '):')
-  for (let i = 0; i < convertedMessages.length; i++) {
-    const m = convertedMessages[i] as Record<string, unknown>
-    const role = m.role as string
-    const content = m.content
-    if (role === 'tool' && Array.isArray(content)) {
-      const toolIds = (content as Array<{ toolCallId?: string; type?: string }>)
-        .filter(c => c.type === 'tool-result')
-        .map(c => c.toolCallId)
-      console.error(`  [${i}] ${role} toolCallIds=${JSON.stringify(toolIds)}`)
-    } else if (role === 'assistant' && Array.isArray(content)) {
-      const parts = (content as Array<{ type?: string; toolCallId?: string; toolName?: string }>)
-        .map(c => c.type === 'tool-call' ? `tool-call(${c.toolName}:${c.toolCallId})` : c.type)
-      console.error(`  [${i}] ${role} parts=[${parts.join(', ')}]`)
-    } else {
-      const tags = (m as { tags?: string[] }).tags
-      console.error(`  [${i}] ${role}${tags ? ' tags=' + JSON.stringify(tags) : ''}`)
-    }
-  }
-
   const response = streamText({
     ...params,
     prompt: undefined,
     model: aiSDKModel,
-    messages: convertedMessages,
+    messages: convertCbToModelMessages(params),
     // When using Claude OAuth, disable retries so we can immediately fall back to Codebuff
     // backend on rate limit errors instead of retrying 4 times first
     ...(isClaudeOAuth && { maxRetries: 0 }),

From 971c8e02250bcb422e3a0f63597dc13ff0e99a74 Mon Sep 17 00:00:00 2001
From: James Grugett <jahooma@gmail.com>
Date: Sun, 8 Feb 2026 17:13:31 -0800
Subject: [PATCH 3/6] Fix bug: overwrite excludeToolFromMessageHistory to false
 in processStream

---
 .../agent-runtime/src/tool-stream-parser.ts   |  2 --
 .../agent-runtime/src/tools/stream-parser.ts  | 25 ++++++-------------
 .../agent-runtime/src/tools/tool-executor.ts  |  8 ++----
 3 files changed, 9 insertions(+), 26 deletions(-)

diff --git a/packages/agent-runtime/src/tool-stream-parser.ts b/packages/agent-runtime/src/tool-stream-parser.ts
index 44f2168f08..543a07f62b 100644
--- a/packages/agent-runtime/src/tool-stream-parser.ts
+++ b/packages/agent-runtime/src/tool-stream-parser.ts
@@ -41,7 +41,6 @@ export async function* processStreamWithTools(params: {
       params: Record<string, any>,
     ) => void | Promise<void>
   }
-  onError: (tagName: string, errorMessage: string) => void
   onResponseChunk: (chunk: PrintModeText | PrintModeError) => void
   logger: Logger
   loggerOptions?: {
@@ -60,7 +59,6 @@ export async function* processStreamWithTools(params: {
     stream,
     processors,
     defaultProcessor,
-    onError: _onError,
     onResponseChunk,
     logger,
     loggerOptions,
diff --git a/packages/agent-runtime/src/tools/stream-parser.ts b/packages/agent-runtime/src/tools/stream-parser.ts
index 9d85d5491d..309ddffecd 100644
--- a/packages/agent-runtime/src/tools/stream-parser.ts
+++ b/packages/agent-runtime/src/tools/stream-parser.ts
@@ -1,12 +1,10 @@
 import { toolNames } from '@codebuff/common/tools/constants'
 import { buildArray } from '@codebuff/common/util/array'
 import {
-  jsonToolResult,
   assistantMessage,
   userMessage,
 } from '@codebuff/common/util/messages'
 import { generateCompactId } from '@codebuff/common/util/string'
-import { cloneDeep } from 'lodash'
 
 import { processStreamWithTools } from '../tool-stream-parser'
 import {
@@ -66,7 +64,6 @@ export async function processStream(
       typeof processStreamWithTools,
       | 'processors'
       | 'defaultProcessor'
-      | 'onError'
       | 'loggerOptions'
       | 'executeXmlToolCall'
     >,
@@ -149,7 +146,7 @@ export async function processStream(
     const resultsArray = isXmlMode ? [] : toolResultsToAddToMessageHistory
 
     return {
-      onTagStart: () => {},
+      onTagStart: () => { },
       onTagEnd: async (_: string, input: Record<string, string>) => {
         if (signal.aborted) {
           return
@@ -160,10 +157,10 @@ export async function processStream(
         // Check if this is an agent tool call that should be transformed to spawn_agents
         const transformed = !isNativeTool
           ? tryTransformAgentToolCall({
-              toolName,
-              input,
-              spawnableAgents: agentTemplate.spawnableAgents,
-            })
+            toolName,
+            input,
+            spawnableAgents: agentTemplate.spawnableAgents,
+          })
           : null
 
         // Read previousToolCallFinished at execution time to ensure proper sequential chaining.
@@ -193,6 +190,7 @@ export async function processStream(
             toolCalls,
             toolResults,
             toolResultsToAddToMessageHistory: resultsArray,
+            excludeToolFromMessageHistory: false,
             onCostCalculated,
             onResponseChunk: responseHandler,
           })
@@ -210,6 +208,7 @@ export async function processStream(
             toolCalls,
             toolResults,
             toolResultsToAddToMessageHistory: resultsArray,
+            excludeToolFromMessageHistory: false,
             onResponseChunk: responseHandler,
           })
         }
@@ -239,16 +238,6 @@ export async function processStream(
     ]),
     defaultProcessor: (name: string) =>
       createToolExecutionCallback(name, false),
-    onError: (toolName, error) => {
-      const toolResult: ToolMessage = {
-        role: 'tool',
-        toolName,
-        toolCallId: generateCompactId(),
-        content: jsonToolResult({ errorMessage: error }),
-      }
-      toolResults.push(cloneDeep(toolResult))
-      toolResultsToAddToMessageHistory.push(cloneDeep(toolResult))
-    },
     loggerOptions: {
       userId,
       model: agentTemplate.model,
diff --git a/packages/agent-runtime/src/tools/tool-executor.ts b/packages/agent-runtime/src/tools/tool-executor.ts
index 5ffbac34f3..456215d956 100644
--- a/packages/agent-runtime/src/tools/tool-executor.ts
+++ b/packages/agent-runtime/src/tools/tool-executor.ts
@@ -533,7 +533,7 @@ export async function executeCustomToolCall(
       return clientToolResult.output satisfies ToolResultOutput[]
     })
     .then((result) => {
-      if (result === null) {
+      if (!result) {
         return
       }
       const toolResult = {
@@ -546,10 +546,6 @@ export async function executeCustomToolCall(
         { input, toolResult },
         `${toolName} custom tool call & result (${toolResult.toolCallId})`,
       )
-      if (result === undefined) {
-        return
-      }
-
       onResponseChunk({
         type: 'tool_result',
         toolName: toolResult.toolName,
@@ -563,7 +559,7 @@ export async function executeCustomToolCall(
         toolResultsToAddToMessageHistory.push(toolResult)
       }
 
-        return
+      return
     })
 }
 

From e77eb408affe77545f60cf50c1cf4ca6520dd988 Mon Sep 17 00:00:00 2001
From: James Grugett <jahooma@gmail.com>
Date: Sun, 8 Feb 2026 17:57:48 -0800
Subject: [PATCH 4/6] Fix: add tool calls appropriately

---
 .../src/run-programmatic-step.ts              | 34 +++++++++++--------
 .../agent-runtime/src/tools/stream-parser.ts  | 12 +++----
 .../agent-runtime/src/tools/tool-executor.ts  | 16 +++++++--
 3 files changed, 37 insertions(+), 25 deletions(-)

diff --git a/packages/agent-runtime/src/run-programmatic-step.ts b/packages/agent-runtime/src/run-programmatic-step.ts
index 82a6f4f55f..378a4e9bf4 100644
--- a/packages/agent-runtime/src/run-programmatic-step.ts
+++ b/packages/agent-runtime/src/run-programmatic-step.ts
@@ -137,16 +137,16 @@ export async function runProgrammaticStep(
   if (!generator) {
     const createLogMethod =
       (level: 'debug' | 'info' | 'warn' | 'error') =>
-      (data: any, msg?: string) => {
-        logger[level](data, msg) // Log to backend
-        handleStepsLogChunk({
-          userInputId,
-          runId: agentState.runId ?? 'undefined',
-          level,
-          data,
-          message: msg,
-        })
-      }
+        (data: any, msg?: string) => {
+          logger[level](data, msg) // Log to backend
+          handleStepsLogChunk({
+            userInputId,
+            runId: agentState.runId ?? 'undefined',
+            level,
+            data,
+            message: msg,
+          })
+        }
 
     const streamingLogger = {
       debug: createLogMethod('debug'),
@@ -243,7 +243,7 @@ export async function runProgrammaticStep(
       if (!parseResult.success) {
         throw new Error(
           `Invalid yield value from handleSteps in agent ${template.id}: ${parseResult.error.message}. ` +
-            `Received: ${JSON.stringify(result.value)}`,
+          `Received: ${JSON.stringify(result.value)}`,
         )
       }
 
@@ -334,9 +334,8 @@ export async function runProgrammaticStep(
   } catch (error) {
     endTurn = true
 
-    const errorMessage = `Error executing handleSteps for agent ${template.id}: ${
-      error instanceof Error ? error.message : 'Unknown error'
-    }`
+    const errorMessage = `Error executing handleSteps for agent ${template.id}: ${error instanceof Error ? error.message : 'Unknown error'
+      }`
     logger.error(
       { error: getErrorObject(error), template: template.id },
       errorMessage,
@@ -485,6 +484,7 @@ async function executeSingleToolCall(
     // })
   }
 
+  const toolResultsToAddToMessageHistory: ToolMessage[] = []
   // Execute the tool call
   await executeToolCall({
     ...params,
@@ -494,7 +494,9 @@ async function executeSingleToolCall(
     excludeToolFromMessageHistory,
     fromHandleSteps: true,
     toolCallId,
-    toolResultsToAddToMessageHistory: [],
+    toolCalls: [],
+    toolCallsToAddToMessageHistory: [],
+    toolResultsToAddToMessageHistory,
 
     onResponseChunk: (chunk: string | PrintModeEvent) => {
       if (typeof chunk === 'string') {
@@ -539,6 +541,8 @@ async function executeSingleToolCall(
     },
   })
 
+  agentState.messageHistory.push(...toolResultsToAddToMessageHistory)
+
   // Get the latest tool result
   return toolResults[toolResults.length - 1]?.content
 }
diff --git a/packages/agent-runtime/src/tools/stream-parser.ts b/packages/agent-runtime/src/tools/stream-parser.ts
index 309ddffecd..504029f98b 100644
--- a/packages/agent-runtime/src/tools/stream-parser.ts
+++ b/packages/agent-runtime/src/tools/stream-parser.ts
@@ -87,6 +87,7 @@ export async function processStream(
   const toolResults: ToolMessage[] = []
   const toolResultsToAddToMessageHistory: ToolMessage[] = []
   const toolCalls: (CodebuffToolCall | CustomToolCall)[] = []
+  const toolCallsToAddToMessageHistory: (CodebuffToolCall | CustomToolCall)[] = []
   const assistantMessages: Message[] = []
   let hadToolCallError = false
   const errorMessages: Message[] = []
@@ -108,13 +109,7 @@ export async function processStream(
   function createResponseHandler(isXmlMode: boolean) {
     return (chunk: string | PrintModeEvent) => {
       if (typeof chunk !== 'string') {
-        if (chunk.type === 'tool_call') {
-          if (chunk.includeToolCall !== false) {
-            assistantMessages.push(
-              assistantMessage({ ...chunk, type: 'tool-call' }),
-            )
-          }
-        } else if (isXmlMode && chunk.type === 'tool_result') {
+        if (isXmlMode && chunk.type === 'tool_result') {
           const toolResultMessage: ToolMessage = {
             role: 'tool',
             toolName: chunk.toolName,
@@ -188,6 +183,7 @@ export async function processStream(
             previousToolCallFinished: previousPromise,
             toolCallId,
             toolCalls,
+            toolCallsToAddToMessageHistory,
             toolResults,
             toolResultsToAddToMessageHistory: resultsArray,
             excludeToolFromMessageHistory: false,
@@ -206,6 +202,7 @@ export async function processStream(
             previousToolCallFinished: previousPromise,
             toolCallId,
             toolCalls,
+            toolCallsToAddToMessageHistory,
             toolResults,
             toolResultsToAddToMessageHistory: resultsArray,
             excludeToolFromMessageHistory: false,
@@ -330,6 +327,7 @@ export async function processStream(
   agentState.messageHistory = buildArray<Message>([
     ...messageHistoryBeforeStream,
     ...assistantMessages,
+    ...toolCallsToAddToMessageHistory.map((toolCall) => assistantMessage({ ...toolCall, type: 'tool-call' })),
     ...toolResultsToAddToMessageHistory,
     ...errorMessages,
   ])
diff --git a/packages/agent-runtime/src/tools/tool-executor.ts b/packages/agent-runtime/src/tools/tool-executor.ts
index 456215d956..23d2e7880d 100644
--- a/packages/agent-runtime/src/tools/tool-executor.ts
+++ b/packages/agent-runtime/src/tools/tool-executor.ts
@@ -33,7 +33,7 @@ import type { Logger } from '@codebuff/common/types/contracts/logger'
 import type { ToolMessage } from '@codebuff/common/types/messages/codebuff-message'
 import type { ToolResultOutput } from '@codebuff/common/types/messages/content-part'
 import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
-import type { AgentTemplateType , AgentState, Subgoal } from '@codebuff/common/types/session-state'
+import type { AgentTemplateType, AgentState, Subgoal } from '@codebuff/common/types/session-state'
 import type {
   CustomToolDefinitions,
   ProjectFileContext,
@@ -119,6 +119,7 @@ export type ExecuteToolCallParams<T extends string = ToolName> = {
   tools: ToolSet
   toolCallId: string | undefined
   toolCalls: (CodebuffToolCall | CustomToolCall)[]
+  toolCallsToAddToMessageHistory: (CodebuffToolCall | CustomToolCall)[]
   toolResults: ToolMessage[]
   toolResultsToAddToMessageHistory: ToolMessage[]
   userId: string | undefined
@@ -144,6 +145,7 @@ export async function executeToolCall<T extends ToolName>(
     logger,
     previousToolCallFinished,
     toolCalls,
+    toolCallsToAddToMessageHistory,
     toolResults,
     toolResultsToAddToMessageHistory,
     userInputId,
@@ -298,8 +300,6 @@ export async function executeToolCall<T extends ToolName>(
     includeToolCall: !excludeToolFromMessageHistory,
   })
 
-  toolCalls.push(toolCall)
-
   // Cast to any to avoid type errors
   const handler = codebuffToolHandlers[
     toolName
@@ -311,6 +311,12 @@ export async function executeToolCall<T extends ToolName>(
       ? { ...toolCall, input: effectiveInput }
       : toolCall
 
+  toolCalls.push(finalToolCall)
+  if (!excludeToolFromMessageHistory) {
+    toolCallsToAddToMessageHistory.push(finalToolCall)
+  }
+
+
   const toolResultPromise = handler({
     ...params,
     toolCall: finalToolCall,
@@ -448,6 +454,7 @@ export async function executeCustomToolCall(
     requestToolCall,
     toolCallId,
     toolCalls,
+    toolCallsToAddToMessageHistory,
     toolResults,
     toolResultsToAddToMessageHistory,
     userInputId,
@@ -512,6 +519,9 @@ export async function executeCustomToolCall(
   })
 
   toolCalls.push(toolCall)
+  if (!excludeToolFromMessageHistory) {
+    toolCallsToAddToMessageHistory.push(toolCall)
+  }
 
   return previousToolCallFinished
     .then(async () => {

From dcd590747fdd745d2354f045b1be9819b2670677 Mon Sep 17 00:00:00 2001
From: James Grugett <jahooma@gmail.com>
Date: Sun, 8 Feb 2026 18:05:01 -0800
Subject: [PATCH 5/6] Simplify xml tool call/result handling

---
 .../agent-runtime/src/tools/stream-parser.ts  | 22 +++++--------------
 1 file changed, 5 insertions(+), 17 deletions(-)

diff --git a/packages/agent-runtime/src/tools/stream-parser.ts b/packages/agent-runtime/src/tools/stream-parser.ts
index 504029f98b..fa8b02d004 100644
--- a/packages/agent-runtime/src/tools/stream-parser.ts
+++ b/packages/agent-runtime/src/tools/stream-parser.ts
@@ -106,18 +106,10 @@ export async function processStream(
   // === RESPONSE HANDLER ===
   // Creates a response handler that captures tool events into assistantMessages.
   // When isXmlMode=true, also captures tool_result events for interleaved ordering.
-  function createResponseHandler(isXmlMode: boolean) {
+  function createResponseHandler() {
     return (chunk: string | PrintModeEvent) => {
       if (typeof chunk !== 'string') {
-        if (isXmlMode && chunk.type === 'tool_result') {
-          const toolResultMessage: ToolMessage = {
-            role: 'tool',
-            toolName: chunk.toolName,
-            toolCallId: chunk.toolCallId,
-            content: chunk.output,
-          }
-          assistantMessages.push(toolResultMessage)
-        } else if (chunk.type === 'error') {
+        if (chunk.type === 'error') {
           hadToolCallError = true
           errorMessages.push(
             userMessage(
@@ -134,12 +126,8 @@ export async function processStream(
 
   // === TOOL EXECUTION ===
   // Unified callback factory for both native and custom tools.
-  // isXmlMode=true: execute immediately, capture results inline (for XML tool calls)
-  // isXmlMode=false: defer execution, results added at end (for native tool calls)
   function createToolExecutionCallback(toolName: string, isXmlMode: boolean) {
-    const responseHandler = createResponseHandler(isXmlMode)
-    const resultsArray = isXmlMode ? [] : toolResultsToAddToMessageHistory
-
+    const responseHandler = createResponseHandler()
     return {
       onTagStart: () => { },
       onTagEnd: async (_: string, input: Record<string, string>) => {
@@ -185,7 +173,7 @@ export async function processStream(
             toolCalls,
             toolCallsToAddToMessageHistory,
             toolResults,
-            toolResultsToAddToMessageHistory: resultsArray,
+            toolResultsToAddToMessageHistory,
             excludeToolFromMessageHistory: false,
             onCostCalculated,
             onResponseChunk: responseHandler,
@@ -204,7 +192,7 @@ export async function processStream(
             toolCalls,
             toolCallsToAddToMessageHistory,
             toolResults,
-            toolResultsToAddToMessageHistory: resultsArray,
+            toolResultsToAddToMessageHistory,
             excludeToolFromMessageHistory: false,
             onResponseChunk: responseHandler,
           })

From 8bc716dcfe6918908d028679cf1528b6dd245c03 Mon Sep 17 00:00:00 2001
From: James Grugett <jahooma@gmail.com>
Date: Sun, 8 Feb 2026 18:17:38 -0800
Subject: [PATCH 6/6] Fix types/tests

---
 .../src/__tests__/tool-stream-parser.test.ts  | 40 -------------------
 .../xml-tool-result-ordering.test.ts          |  3 --
 .../src/run-programmatic-step.ts              |  2 +
 .../agent-runtime/src/tools/stream-parser.ts  |  1 +
 4 files changed, 3 insertions(+), 43 deletions(-)

diff --git a/packages/agent-runtime/src/__tests__/tool-stream-parser.test.ts b/packages/agent-runtime/src/__tests__/tool-stream-parser.test.ts
index 391610c27f..c258ff96ed 100644
--- a/packages/agent-runtime/src/__tests__/tool-stream-parser.test.ts
+++ b/packages/agent-runtime/src/__tests__/tool-stream-parser.test.ts
@@ -47,10 +47,6 @@ describe('processStreamWithTags', () => {
       },
     }
 
-    function onError(name: string, error: string) {
-      events.push({ name, error })
-    }
-
     const result: string[] = []
     const responseChunks: any[] = []
 
@@ -70,7 +66,6 @@ describe('processStreamWithTags', () => {
       stream,
       processors,
       defaultProcessor,
-      onError,
       onResponseChunk,
       executeXmlToolCall: async () => {},
     })) {
@@ -114,10 +109,6 @@ describe('processStreamWithTags', () => {
       },
     }
 
-    function onError(name: string, error: string) {
-      events.push({ name, error })
-    }
-
     const result: string[] = []
     const responseChunks: any[] = []
 
@@ -137,7 +128,6 @@ describe('processStreamWithTags', () => {
       stream,
       processors,
       defaultProcessor,
-      onError,
       onResponseChunk,
       executeXmlToolCall: async () => {},
     })) {
@@ -191,10 +181,6 @@ describe('processStreamWithTags', () => {
       },
     }
 
-    function onError(name: string, error: string) {
-      events.push({ name, error })
-    }
-
     const result: string[] = []
     const responseChunks: any[] = []
 
@@ -214,7 +200,6 @@ describe('processStreamWithTags', () => {
       stream,
       processors,
       defaultProcessor,
-      onError,
       onResponseChunk,
       executeXmlToolCall: async () => {},
     })) {
@@ -267,10 +252,6 @@ describe('processStreamWithTags', () => {
       },
     }
 
-    function onError(name: string, error: string) {
-      events.push({ name, error, type: 'error' })
-    }
-
     const responseChunks: any[] = []
 
     function onResponseChunk(chunk: any) {
@@ -295,7 +276,6 @@ describe('processStreamWithTags', () => {
       stream,
       processors,
       defaultProcessor,
-      onError,
       onResponseChunk,
       executeXmlToolCall: async () => {},
     })) {
@@ -341,10 +321,6 @@ describe('processStreamWithTags', () => {
       },
     }
 
-    function onError(name: string, error: string) {
-      events.push({ name, error, type: 'error' })
-    }
-
     const result: string[] = []
     const responseChunks: any[] = []
 
@@ -364,7 +340,6 @@ describe('processStreamWithTags', () => {
       stream,
       processors,
       defaultProcessor,
-      onError,
       onResponseChunk,
       executeXmlToolCall: async () => {},
     })) {
@@ -414,10 +389,6 @@ describe('processStreamWithTags', () => {
       },
     }
 
-    function onError(name: string, error: string) {
-      events.push({ name, error, type: 'error' })
-    }
-
     const result: string[] = []
     const responseChunks: any[] = []
 
@@ -437,7 +408,6 @@ describe('processStreamWithTags', () => {
       stream,
       processors,
       defaultProcessor,
-      onError,
       onResponseChunk,
       executeXmlToolCall: async () => {},
     })) {
@@ -468,10 +438,6 @@ describe('processStreamWithTags', () => {
 
     const processors = {}
 
-    function onError(name: string, error: string) {
-      events.push({ name, error, type: 'error' })
-    }
-
     const result: string[] = []
     const responseChunks: any[] = []
 
@@ -491,7 +457,6 @@ describe('processStreamWithTags', () => {
       stream,
       processors,
       defaultProcessor,
-      onError,
       onResponseChunk,
       executeXmlToolCall: async () => {},
     })) {
@@ -515,10 +480,6 @@ describe('processStreamWithTags', () => {
 
     const processors = {}
 
-    function onError(name: string, error: string) {
-      events.push({ name, error, type: 'error' })
-    }
-
     const result: string[] = []
     const responseChunks: any[] = []
 
@@ -538,7 +499,6 @@ describe('processStreamWithTags', () => {
       stream,
       processors,
       defaultProcessor,
-      onError,
       onResponseChunk,
       executeXmlToolCall: async () => {},
     })) {
diff --git a/packages/agent-runtime/src/__tests__/xml-tool-result-ordering.test.ts b/packages/agent-runtime/src/__tests__/xml-tool-result-ordering.test.ts
index e187abfe2d..65c6742d8e 100644
--- a/packages/agent-runtime/src/__tests__/xml-tool-result-ordering.test.ts
+++ b/packages/agent-runtime/src/__tests__/xml-tool-result-ordering.test.ts
@@ -59,7 +59,6 @@ describe('XML tool result ordering', () => {
       stream,
       processors: {},
       defaultProcessor,
-      onError: () => {},
       onResponseChunk,
       executeXmlToolCall: async ({ toolName, input }) => {
         executionOrder.push(`executeXmlToolCall:${toolName}`)
@@ -136,7 +135,6 @@ describe('XML tool result ordering', () => {
       stream,
       processors: {},
       defaultProcessor,
-      onError: () => {},
       onResponseChunk,
       executeXmlToolCall: async ({ toolName }) => {
         // Simulate tool_call event
@@ -206,7 +204,6 @@ describe('XML tool result ordering', () => {
         stream,
         processors: {},
         defaultProcessor: () => ({ onTagStart: () => {}, onTagEnd: () => {} }),
-        onError: () => {},
         onResponseChunk: () => {},
         executeXmlToolCall: async () => {
           // Simulate tool execution with async work
diff --git a/packages/agent-runtime/src/run-programmatic-step.ts b/packages/agent-runtime/src/run-programmatic-step.ts
index 378a4e9bf4..ca1512100f 100644
--- a/packages/agent-runtime/src/run-programmatic-step.ts
+++ b/packages/agent-runtime/src/run-programmatic-step.ts
@@ -81,6 +81,7 @@ export async function runProgrammaticStep(
     | 'fileProcessingState'
     | 'toolCallId'
     | 'toolCalls'
+    | 'toolCallsToAddToMessageHistory'
     | 'toolResults'
     | 'toolResultsToAddToMessageHistory'
   > &
@@ -427,6 +428,7 @@ type ExecuteToolCallsArrayParams = Omit<
   | 'autoInsertEndStepParam'
   | 'excludeToolFromMessageHistory'
   | 'toolCallId'
+  | 'toolCallsToAddToMessageHistory'
   | 'toolResultsToAddToMessageHistory'
 > & {
   agentState: AgentState
diff --git a/packages/agent-runtime/src/tools/stream-parser.ts b/packages/agent-runtime/src/tools/stream-parser.ts
index fa8b02d004..2cd9debf5e 100644
--- a/packages/agent-runtime/src/tools/stream-parser.ts
+++ b/packages/agent-runtime/src/tools/stream-parser.ts
@@ -56,6 +56,7 @@ export async function processStream(
     | 'state'
     | 'toolCallId'
     | 'toolCalls'
+    | 'toolCallsToAddToMessageHistory'
     | 'toolName'
     | 'toolResults'
     | 'toolResultsToAddToMessageHistory'