From 1aed3011186ea5a7706f4a833ecb83552f511216 Mon Sep 17 00:00:00 2001 From: Jeff Haynie Date: Wed, 15 Apr 2026 20:09:28 -0500 Subject: [PATCH 1/5] fix: handle multimodal content arrays and preserve non-standard message fields - Change Message.Content from string to any to support multimodal content (arrays of text/image parts) instead of just strings - Add Message.Custom map to preserve non-standard fields like tool_calls, function_call, tool_call_id, refusal, name, etc. - Add custom UnmarshalJSON/MarshalJSON for Message to capture and re-emit unknown fields during serialization (needed for prompt caching) - Fix streaming extractors in bedrock and googleai to handle Content as any - Enhance Responses API extractor with: - Annotations support (url_citation for web search results) - Reasoning output type with summary field - Function call support (name, arguments) - Array response format (streaming output) - Store full output in Custom for access to web_search_call, etc. - Add comprehensive tests for: - Content as string, array, null, number, boolean - Non-standard message properties - Round-trip marshaling preserving custom fields - Web search with annotations - Reasoning outputs with reasoning_tokens - Multiple text segments - Error handling Fixes error: "json: cannot unmarshal array into Go struct field Message.Alias.messages.content of type string" --- metadata.go | 65 +- providers/anthropic/parser_test.go | 20 +- providers/bedrock/parser_test.go | 17 +- providers/bedrock/streaming_extractor.go | 6 +- providers/googleai/parser_test.go | 30 +- providers/googleai/streaming_extractor.go | 6 +- providers/openai_compatible/parser_test.go | 298 +++++- .../openai_compatible/responses_extractor.go | 43 +- providers/openai_compatible/responses_test.go | 938 ++++++++++++++++++ 9 files changed, 1399 insertions(+), 24 deletions(-) diff --git a/metadata.go b/metadata.go index 15bef47..0f0cae8 100644 --- a/metadata.go +++ b/metadata.go @@ -22,12 +22,73 @@ // resp, meta, _ := proxy.Forward(ctx, req) package llmproxy +import "encoding/json" + // Message represents a single message in a chat completion request. type Message struct { // Role is the role of the message author (e.g., "user", "assistant", "system"). Role string `json:"role"` - // Content is the text content of the message. - Content string `json:"content"` + // Content is the content of the message (can be string or array for multimodal). + Content any `json:"content"` + // Custom holds provider-specific message fields that don't map to standard fields. + Custom map[string]any `json:"-"` +} + +// UnmarshalJSON implements custom JSON unmarshaling to capture unknown fields. +func (m *Message) UnmarshalJSON(data []byte) error { + type Alias Message + aux := &struct { + *Alias + }{ + Alias: (*Alias)(m), + } + if err := json.Unmarshal(data, &aux); err != nil { + return err + } + + var raw map[string]any + if err := json.Unmarshal(data, &raw); err != nil { + return err + } + + m.Custom = make(map[string]any) + for k, v := range raw { + if k != "role" && k != "content" { + m.Custom[k] = v + } + } + + return nil +} + +// MarshalJSON implements custom JSON marshaling to include Custom fields. +func (m Message) MarshalJSON() ([]byte, error) { + type Alias Message + aux := &struct { + Alias + }{ + Alias: (Alias)(m), + } + + data, err := json.Marshal(aux) + if err != nil { + return nil, err + } + + if len(m.Custom) == 0 { + return data, nil + } + + var result map[string]any + if err := json.Unmarshal(data, &result); err != nil { + return nil, err + } + + for k, v := range m.Custom { + result[k] = v + } + + return json.Marshal(result) } // BodyMetadata contains extracted metadata from a parsed request body. diff --git a/providers/anthropic/parser_test.go b/providers/anthropic/parser_test.go index aed02cd..b5b57fd 100644 --- a/providers/anthropic/parser_test.go +++ b/providers/anthropic/parser_test.go @@ -57,7 +57,7 @@ func TestParser(t *testing.T) { t.Errorf("expected 1 message, got %d", len(meta.Messages)) } if meta.Messages[0].Content != "hello" { - t.Errorf("expected content 'hello', got %s", meta.Messages[0].Content) + t.Errorf("expected content 'hello', got %v", meta.Messages[0].Content) } }) @@ -148,7 +148,7 @@ func TestExtractor(t *testing.T) { t.Errorf("expected 1 choice, got %d", len(meta.Choices)) } if meta.Choices[0].Message.Content != "Hello!" { - t.Errorf("expected content 'Hello!', got %s", meta.Choices[0].Message.Content) + t.Errorf("expected content 'Hello!', got %v", meta.Choices[0].Message.Content) } if string(raw) != respBody { t.Error("raw body mismatch") @@ -201,3 +201,19 @@ func TestExtractor(t *testing.T) { } }) } + +func TestParser_ContentArrayWithMultipleTypes(t *testing.T) { + body := `{"model":"claude-3-opus-20240229","max_tokens":1024,"messages":[{"role":"user","content":[{"type":"text","text":"hello"},{"type":"image","source":{"type":"base64","media_type":"image/png","data":"abc123"}}]}]}` + parser := &Parser{} + + meta, _, err := parser.Parse(io.NopCloser(bytes.NewReader([]byte(body)))) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(meta.Messages) != 1 { + t.Errorf("expected 1 message, got %d", len(meta.Messages)) + } + if meta.Messages[0].Content != "hello" { + t.Errorf("expected content 'hello', got %v", meta.Messages[0].Content) + } +} diff --git a/providers/bedrock/parser_test.go b/providers/bedrock/parser_test.go index b7678d0..fe7ecaa 100644 --- a/providers/bedrock/parser_test.go +++ b/providers/bedrock/parser_test.go @@ -32,7 +32,7 @@ func TestParser(t *testing.T) { t.Errorf("expected role user, got %s", meta.Messages[0].Role) } if meta.Messages[0].Content != "hello" { - t.Errorf("expected content 'hello', got %s", meta.Messages[0].Content) + t.Errorf("expected content 'hello', got %v", meta.Messages[0].Content) } if string(raw) != body { t.Error("raw body mismatch") @@ -181,7 +181,7 @@ func TestExtractor(t *testing.T) { t.Errorf("expected 1 choice, got %d", len(meta.Choices)) } if meta.Choices[0].Message.Content != "Hello!" { - t.Errorf("expected content 'Hello!', got %s", meta.Choices[0].Message.Content) + t.Errorf("expected content 'Hello!', got %v", meta.Choices[0].Message.Content) } if meta.Choices[0].FinishReason != "end_turn" { t.Errorf("expected finish_reason end_turn, got %s", meta.Choices[0].FinishReason) @@ -191,3 +191,16 @@ func TestExtractor(t *testing.T) { } }) } + +func TestParser_MessageWithMultipleContentBlocks(t *testing.T) { + body := `{"modelId":"anthropic.claude-3-sonnet-20240229-v1:0","messages":[{"role":"user","content":[{"text":"hello"},{"text":"world"}]}]}` + parser := &Parser{} + + meta, _, err := parser.Parse(io.NopCloser(bytes.NewReader([]byte(body)))) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if meta.Messages[0].Content != "helloworld" { + t.Errorf("expected combined content 'helloworld', got %v", meta.Messages[0].Content) + } +} diff --git a/providers/bedrock/streaming_extractor.go b/providers/bedrock/streaming_extractor.go index 30d9fb8..7525d15 100644 --- a/providers/bedrock/streaming_extractor.go +++ b/providers/bedrock/streaming_extractor.go @@ -252,7 +252,11 @@ func processBedrockStreamEvent(event *eventStreamEvent, meta *llmproxy.ResponseM }) } if meta.Choices[0].Message != nil { - meta.Choices[0].Message.Content += delta.Delta.Text + if str, ok := meta.Choices[0].Message.Content.(string); ok { + meta.Choices[0].Message.Content = str + delta.Delta.Text + } else { + meta.Choices[0].Message.Content = delta.Delta.Text + } } } diff --git a/providers/googleai/parser_test.go b/providers/googleai/parser_test.go index 7f50466..21b8d35 100644 --- a/providers/googleai/parser_test.go +++ b/providers/googleai/parser_test.go @@ -25,7 +25,7 @@ func TestParser(t *testing.T) { t.Errorf("expected role user, got %s", meta.Messages[0].Role) } if meta.Messages[0].Content != "hello" { - t.Errorf("expected content 'hello', got %s", meta.Messages[0].Content) + t.Errorf("expected content 'hello', got %v", meta.Messages[0].Content) } if string(raw) != body { t.Error("raw body mismatch") @@ -155,7 +155,7 @@ func TestExtractor(t *testing.T) { t.Errorf("expected 1 choice, got %d", len(meta.Choices)) } if meta.Choices[0].Message.Content != "Hello!" { - t.Errorf("expected content 'Hello!', got %s", meta.Choices[0].Message.Content) + t.Errorf("expected content 'Hello!', got %v", meta.Choices[0].Message.Content) } if meta.Choices[0].FinishReason != "stop" { t.Errorf("expected finish_reason 'stop', got %s", meta.Choices[0].FinishReason) @@ -185,3 +185,29 @@ func TestExtractor(t *testing.T) { } }) } + +func TestParser_MessageWithMultipleParts(t *testing.T) { + body := `{"contents":[{"role":"user","parts":[{"text":"hello"},{"text":"world"}]}]}` + parser := &Parser{} + + meta, _, err := parser.Parse(io.NopCloser(bytes.NewReader([]byte(body)))) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if meta.Messages[0].Content != "helloworld" { + t.Errorf("expected combined content 'helloworld', got %v", meta.Messages[0].Content) + } +} + +func TestParser_MessageWithInlineData(t *testing.T) { + body := `{"contents":[{"role":"user","parts":[{"text":"describe this"},{"inlineData":{"mimeType":"image/png","data":"abc123"}}]}]}` + parser := &Parser{} + + meta, _, err := parser.Parse(io.NopCloser(bytes.NewReader([]byte(body)))) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if meta.Messages[0].Content != "describe this" { + t.Errorf("expected text content, got %v", meta.Messages[0].Content) + } +} diff --git a/providers/googleai/streaming_extractor.go b/providers/googleai/streaming_extractor.go index 82c20b9..751c0d2 100644 --- a/providers/googleai/streaming_extractor.go +++ b/providers/googleai/streaming_extractor.go @@ -103,7 +103,11 @@ func (e *StreamingExtractor) extractStreamingWithController(resp *http.Response, } if candidate.Content != nil { text := extractTextFromParts(candidate.Content.Parts) - meta.Choices[i].Message.Content += text + if str, ok := meta.Choices[i].Message.Content.(string); ok { + meta.Choices[i].Message.Content = str + text + } else { + meta.Choices[i].Message.Content = text + } } if candidate.FinishReason != "" { meta.Choices[i].FinishReason = mapFinishReason(candidate.FinishReason) diff --git a/providers/openai_compatible/parser_test.go b/providers/openai_compatible/parser_test.go index 7be75dc..442a1fb 100644 --- a/providers/openai_compatible/parser_test.go +++ b/providers/openai_compatible/parser_test.go @@ -2,6 +2,7 @@ package openai_compatible import ( "bytes" + "encoding/json" "io" "net/http" "net/http/httptest" @@ -29,7 +30,7 @@ func TestParser_BasicRequest(t *testing.T) { t.Errorf("message role = %q, want %q", meta.Messages[0].Role, "user") } if meta.Messages[0].Content != "hello" { - t.Errorf("message content = %q, want %q", meta.Messages[0].Content, "hello") + t.Errorf("message content = %v, want %q", meta.Messages[0].Content, "hello") } if string(raw) != body { t.Errorf("raw body mismatch") @@ -132,7 +133,7 @@ func TestParser_MultilineContent(t *testing.T) { } if meta.Messages[0].Content != "line1\nline2\nline3" { - t.Errorf("multiline content not preserved: %q", meta.Messages[0].Content) + t.Errorf("multiline content not preserved: %v", meta.Messages[0].Content) } } @@ -146,7 +147,7 @@ func TestParser_UnicodeContent(t *testing.T) { } if meta.Messages[0].Content != "Hello δΈ–η•Œ 🌍" { - t.Errorf("unicode content not preserved: %q", meta.Messages[0].Content) + t.Errorf("unicode content not preserved: %v", meta.Messages[0].Content) } } @@ -232,7 +233,7 @@ func TestExtractor_BasicResponse(t *testing.T) { t.Errorf("Choice message role = %q, want assistant", meta.Choices[0].Message.Role) } if meta.Choices[0].Message.Content != "Hello!" { - t.Errorf("Choice message content = %q, want Hello!", meta.Choices[0].Message.Content) + t.Errorf("Choice message content = %v, want Hello!", meta.Choices[0].Message.Content) } if meta.Choices[0].FinishReason != "stop" { t.Errorf("FinishReason = %q, want stop", meta.Choices[0].FinishReason) @@ -261,10 +262,10 @@ func TestExtractor_MultipleChoices(t *testing.T) { t.Fatalf("Choices count = %d, want 2", len(meta.Choices)) } if meta.Choices[0].Message.Content != "Option A" { - t.Errorf("Choice 0 content = %q, want Option A", meta.Choices[0].Message.Content) + t.Errorf("Choice 0 content = %v, want Option A", meta.Choices[0].Message.Content) } if meta.Choices[1].Message.Content != "Option B" { - t.Errorf("Choice 1 content = %q, want Option B", meta.Choices[1].Message.Content) + t.Errorf("Choice 1 content = %v, want Option B", meta.Choices[1].Message.Content) } } @@ -290,7 +291,7 @@ func TestExtractor_DeltaForStreaming(t *testing.T) { t.Errorf("Delta role = %q, want assistant", meta.Choices[0].Delta.Role) } if meta.Choices[0].Delta.Content != "Hello" { - t.Errorf("Delta content = %q, want Hello", meta.Choices[0].Delta.Content) + t.Errorf("Delta content = %v, want Hello", meta.Choices[0].Delta.Content) } } @@ -557,3 +558,286 @@ func TestExtractor_ZeroCachedTokens(t *testing.T) { t.Error("expected no cache_usage when cached_tokens is 0") } } + +func TestParser_ContentAsString(t *testing.T) { + body := `{"model":"gpt-4","messages":[{"role":"user","content":"hello world"}]}` + parser := &Parser{} + + meta, _, err := parser.Parse(io.NopCloser(bytes.NewReader([]byte(body)))) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if meta.Messages[0].Content != "hello world" { + t.Errorf("content = %v, want 'hello world'", meta.Messages[0].Content) + } +} + +func TestParser_ContentAsArray(t *testing.T) { + body := `{"model":"gpt-4o","messages":[{"role":"user","content":[{"type":"text","text":"What's in this image?"},{"type":"image_url","image_url":{"url":"https://example.com/image.png"}}]}]}` + parser := &Parser{} + + meta, _, err := parser.Parse(io.NopCloser(bytes.NewReader([]byte(body)))) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + content, ok := meta.Messages[0].Content.([]interface{}) + if !ok { + t.Fatalf("expected content to be array, got %T", meta.Messages[0].Content) + } + if len(content) != 2 { + t.Errorf("expected 2 content parts, got %d", len(content)) + } + + textPart := content[0].(map[string]interface{}) + if textPart["type"] != "text" { + t.Errorf("expected first part type 'text', got %v", textPart["type"]) + } + if textPart["text"] != "What's in this image?" { + t.Errorf("expected text content, got %v", textPart["text"]) + } + + imagePart := content[1].(map[string]interface{}) + if imagePart["type"] != "image_url" { + t.Errorf("expected second part type 'image_url', got %v", imagePart["type"]) + } +} + +func TestParser_ContentAsArrayOfText(t *testing.T) { + body := `{"model":"gpt-4","messages":[{"role":"user","content":[{"type":"text","text":"hello"},{"type":"text","text":"world"}]}]}` + parser := &Parser{} + + meta, _, err := parser.Parse(io.NopCloser(bytes.NewReader([]byte(body)))) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + content, ok := meta.Messages[0].Content.([]interface{}) + if !ok { + t.Fatalf("expected content to be array, got %T", meta.Messages[0].Content) + } + if len(content) != 2 { + t.Errorf("expected 2 content parts, got %d", len(content)) + } +} + +func TestParser_MessageWithNonStandardProperties(t *testing.T) { + body := `{"model":"gpt-4","messages":[{"role":"user","content":"hello","name":"john","custom_field":"value","nested":{"foo":"bar"}}]}` + parser := &Parser{} + + meta, _, err := parser.Parse(io.NopCloser(bytes.NewReader([]byte(body)))) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if meta.Messages[0].Role != "user" { + t.Errorf("role = %q, want user", meta.Messages[0].Role) + } + if meta.Messages[0].Content != "hello" { + t.Errorf("content = %v, want hello", meta.Messages[0].Content) + } + if meta.Messages[0].Custom["name"] != "john" { + t.Errorf("custom name = %v, want john", meta.Messages[0].Custom["name"]) + } + if meta.Messages[0].Custom["custom_field"] != "value" { + t.Errorf("custom_field = %v, want value", meta.Messages[0].Custom["custom_field"]) + } + if meta.Messages[0].Custom["nested"] == nil { + t.Error("expected nested field in Custom") + } + nested := meta.Messages[0].Custom["nested"].(map[string]interface{}) + if nested["foo"] != "bar" { + t.Errorf("nested.foo = %v, want bar", nested["foo"]) + } +} + +func TestParser_MessageWithToolCalls(t *testing.T) { + body := `{"model":"gpt-4","messages":[{"role":"assistant","content":null,"tool_calls":[{"id":"call_123","type":"function","function":{"name":"get_weather","arguments":"{\"location\":\"SF\"}"}}]}]}` + parser := &Parser{} + + meta, _, err := parser.Parse(io.NopCloser(bytes.NewReader([]byte(body)))) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if meta.Messages[0].Role != "assistant" { + t.Errorf("role = %q, want assistant", meta.Messages[0].Role) + } + if meta.Messages[0].Content != nil { + t.Errorf("content = %v, want nil", meta.Messages[0].Content) + } + if meta.Messages[0].Custom["tool_calls"] == nil { + t.Error("expected tool_calls in Custom") + } + toolCalls := meta.Messages[0].Custom["tool_calls"].([]interface{}) + if len(toolCalls) != 1 { + t.Errorf("expected 1 tool_call, got %d", len(toolCalls)) + } +} + +func TestParser_MessageWithToolCallId(t *testing.T) { + body := `{"model":"gpt-4","messages":[{"role":"tool","content":"sunny","tool_call_id":"call_123"}]}` + parser := &Parser{} + + meta, _, err := parser.Parse(io.NopCloser(bytes.NewReader([]byte(body)))) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if meta.Messages[0].Role != "tool" { + t.Errorf("role = %q, want tool", meta.Messages[0].Role) + } + if meta.Messages[0].Content != "sunny" { + t.Errorf("content = %v, want sunny", meta.Messages[0].Content) + } + if meta.Messages[0].Custom["tool_call_id"] != "call_123" { + t.Errorf("tool_call_id = %v, want call_123", meta.Messages[0].Custom["tool_call_id"]) + } +} + +func TestParser_MessageWithFunctionCall(t *testing.T) { + body := `{"model":"gpt-4","messages":[{"role":"assistant","content":null,"function_call":{"name":"get_weather","arguments":"{\"location\":\"SF\"}"}}]}` + parser := &Parser{} + + meta, _, err := parser.Parse(io.NopCloser(bytes.NewReader([]byte(body)))) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if meta.Messages[0].Custom["function_call"] == nil { + t.Error("expected function_call in Custom") + } + fnCall := meta.Messages[0].Custom["function_call"].(map[string]interface{}) + if fnCall["name"] != "get_weather" { + t.Errorf("function_call.name = %v, want get_weather", fnCall["name"]) + } +} + +func TestParser_MultipleMessagesWithMixedContent(t *testing.T) { + body := `{"model":"gpt-4o","messages":[ + {"role":"system","content":"You are helpful"}, + {"role":"user","content":"hello"}, + {"role":"assistant","content":"hi there"}, + {"role":"user","content":[{"type":"text","text":"describe this"},{"type":"image_url","image_url":{"url":"data:image/png;base64,abc123"}}]} + ]}` + parser := &Parser{} + + meta, _, err := parser.Parse(io.NopCloser(bytes.NewReader([]byte(body)))) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if len(meta.Messages) != 4 { + t.Fatalf("expected 4 messages, got %d", len(meta.Messages)) + } + + if meta.Messages[0].Content != "You are helpful" { + t.Errorf("message 0 content = %v, want 'You are helpful'", meta.Messages[0].Content) + } + if meta.Messages[1].Content != "hello" { + t.Errorf("message 1 content = %v, want 'hello'", meta.Messages[1].Content) + } + if meta.Messages[2].Content != "hi there" { + t.Errorf("message 2 content = %v, want 'hi there'", meta.Messages[2].Content) + } + + content3, ok := meta.Messages[3].Content.([]interface{}) + if !ok { + t.Fatalf("message 3 content should be array, got %T", meta.Messages[3].Content) + } + if len(content3) != 2 { + t.Errorf("message 3 should have 2 parts, got %d", len(content3)) + } +} + +func TestParser_MessageRoundTrip(t *testing.T) { + original := `{"model":"gpt-4o","messages":[{"role":"user","content":[{"type":"text","text":"hello"},{"type":"image_url","image_url":{"url":"https://example.com/img.png","detail":"high"}}],"custom_prop":"value"}]}` + + parser := &Parser{} + meta, _, err := parser.Parse(io.NopCloser(bytes.NewReader([]byte(original)))) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + remarshaled, err := json.Marshal(meta.Messages[0]) + if err != nil { + t.Fatalf("failed to remarshal message: %v", err) + } + + var remarshaledMsg map[string]interface{} + if err := json.Unmarshal(remarshaled, &remarshaledMsg); err != nil { + t.Fatalf("failed to unmarshal remarshaled message: %v", err) + } + + if remarshaledMsg["role"] != "user" { + t.Errorf("role = %v, want user", remarshaledMsg["role"]) + } + if remarshaledMsg["custom_prop"] != "value" { + t.Errorf("custom_prop = %v, want value", remarshaledMsg["custom_prop"]) + } + + content, ok := remarshaledMsg["content"].([]interface{}) + if !ok { + t.Fatalf("content should be array, got %T", remarshaledMsg["content"]) + } + if len(content) != 2 { + t.Errorf("expected 2 content parts, got %d", len(content)) + } +} + +func TestParser_ContentAsNull(t *testing.T) { + body := `{"model":"gpt-4","messages":[{"role":"assistant","content":null}]}` + parser := &Parser{} + + meta, _, err := parser.Parse(io.NopCloser(bytes.NewReader([]byte(body)))) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if meta.Messages[0].Content != nil { + t.Errorf("content = %v, want nil", meta.Messages[0].Content) + } +} + +func TestParser_ContentAsNumber(t *testing.T) { + body := `{"model":"test","messages":[{"role":"user","content":123}]}` + parser := &Parser{} + + meta, _, err := parser.Parse(io.NopCloser(bytes.NewReader([]byte(body)))) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if meta.Messages[0].Content != 123.0 { + t.Errorf("content = %v, want 123", meta.Messages[0].Content) + } +} + +func TestParser_ContentAsBoolean(t *testing.T) { + body := `{"model":"test","messages":[{"role":"user","content":true}]}` + parser := &Parser{} + + meta, _, err := parser.Parse(io.NopCloser(bytes.NewReader([]byte(body)))) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if meta.Messages[0].Content != true { + t.Errorf("content = %v, want true", meta.Messages[0].Content) + } +} + +func TestParser_RefusalInMessage(t *testing.T) { + body := `{"model":"gpt-4","messages":[{"role":"assistant","content":"I cannot help","refusal":"I cannot assist with that request"}]}` + parser := &Parser{} + + meta, _, err := parser.Parse(io.NopCloser(bytes.NewReader([]byte(body)))) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if meta.Messages[0].Custom["refusal"] != "I cannot assist with that request" { + t.Errorf("refusal = %v, want refusal message", meta.Messages[0].Custom["refusal"]) + } +} diff --git a/providers/openai_compatible/responses_extractor.go b/providers/openai_compatible/responses_extractor.go index a3b17c3..5d9a928 100644 --- a/providers/openai_compatible/responses_extractor.go +++ b/providers/openai_compatible/responses_extractor.go @@ -18,7 +18,16 @@ func (e *ResponsesExtractor) Extract(resp *http.Response) (llmproxy.ResponseMeta var responsesResp ResponsesResponse if err := json.Unmarshal(body, &responsesResp); err != nil { - return llmproxy.ResponseMetadata{}, nil, err + if isArray := len(body) > 0 && body[0] == '['; isArray { + var outputItems []ResponsesOutputItem + if err := json.Unmarshal(body, &outputItems); err != nil { + return llmproxy.ResponseMetadata{}, nil, err + } + responsesResp.Output = outputItems + responsesResp.Status = "completed" + } else { + return llmproxy.ResponseMetadata{}, nil, err + } } meta := llmproxy.ResponseMetadata{ @@ -55,6 +64,9 @@ func (e *ResponsesExtractor) Extract(resp *http.Response) (llmproxy.ResponseMeta if responsesResp.Error != nil { meta.Custom["error"] = responsesResp.Error } + if len(responsesResp.Output) > 0 { + meta.Custom["output"] = responsesResp.Output + } return meta, body, nil } @@ -96,18 +108,35 @@ type ResponsesResponse struct { } type ResponsesOutputItem struct { - ID string `json:"id"` - Type string `json:"type"` - Status string `json:"status"` - Role string `json:"role,omitempty"` - Content []ResponsesOutputContent `json:"content,omitempty"` + ID string `json:"id"` + Type string `json:"type"` + Status string `json:"status"` + Role string `json:"role,omitempty"` + Content []ResponsesOutputContent `json:"content,omitempty"` + Name string `json:"name,omitempty"` + Arguments string `json:"arguments,omitempty"` + Summary []ResponsesOutputSummary `json:"summary,omitempty"` } -type ResponsesOutputContent struct { +type ResponsesOutputSummary struct { Type string `json:"type"` Text string `json:"text,omitempty"` } +type ResponsesOutputContent struct { + Type string `json:"type"` + Text string `json:"text,omitempty"` + Annotations []ResponsesOutputAnnotation `json:"annotations,omitempty"` + Logprobs interface{} `json:"logprobs,omitempty"` +} + +type ResponsesOutputAnnotation struct { + Type string `json:"type"` + Title string `json:"title,omitempty"` + URL string `json:"url,omitempty"` + Index *int `json:"index,omitempty"` +} + type ResponsesUsage struct { InputTokens int `json:"input_tokens"` OutputTokens int `json:"output_tokens"` diff --git a/providers/openai_compatible/responses_test.go b/providers/openai_compatible/responses_test.go index e6bb62d..8aa7f72 100644 --- a/providers/openai_compatible/responses_test.go +++ b/providers/openai_compatible/responses_test.go @@ -287,3 +287,941 @@ func TestNewMultiAPI(t *testing.T) { t.Error("URLResolver() is nil") } } + +func TestResponsesExtractor_WebSearchWithAnnotations(t *testing.T) { + respBody := `[ + { + "id": "ws_67bd64fe91f081919bec069ad65797f1", + "status": "completed", + "type": "web_search_call" + }, + { + "id": "msg_67bd6502568c8191a2cbb154fa3fbf4c", + "content": [ + { + "annotations": [ + { + "index": null, + "title": "Huawei improves AI chip production", + "type": "url_citation", + "url": "https://www.ft.com/content/example" + } + ], + "text": "As of February 25, 2025, several significant developments have emerged in the field of artificial intelligence (AI).", + "type": "output_text", + "logprobs": null + } + ], + "role": "assistant", + "type": "message" + } +]` + + extractor := &ResponsesExtractor{} + resp := &http.Response{ + StatusCode: 200, + Header: make(http.Header), + Body: io.NopCloser(bytes.NewReader([]byte(respBody))), + } + + meta, raw, err := extractor.Extract(resp) + if err != nil { + t.Fatalf("Extract() error = %v", err) + } + + if len(meta.Choices) != 1 { + t.Errorf("Choices length = %d, want 1", len(meta.Choices)) + } + + if meta.Choices[0].Message.Content != "As of February 25, 2025, several significant developments have emerged in the field of artificial intelligence (AI)." { + t.Errorf("Content = %v, want AI developments text", meta.Choices[0].Message.Content) + } + + output, ok := meta.Custom["output"].([]ResponsesOutputItem) + if !ok { + t.Fatalf("output in Custom should be []ResponsesOutputItem, got %T", meta.Custom["output"]) + } + + if len(output) != 2 { + t.Errorf("Output items = %d, want 2", len(output)) + } + + if output[0].Type != "web_search_call" { + t.Errorf("First output type = %q, want web_search_call", output[0].Type) + } + if output[1].Type != "message" { + t.Errorf("Second output type = %q, want message", output[1].Type) + } + + if len(output[1].Content) == 0 { + t.Fatal("Message content should not be empty") + } + + if len(output[1].Content[0].Annotations) != 1 { + t.Errorf("Annotations count = %d, want 1", len(output[1].Content[0].Annotations)) + } + + annotation := output[1].Content[0].Annotations[0] + if annotation.Type != "url_citation" { + t.Errorf("Annotation type = %q, want url_citation", annotation.Type) + } + if annotation.Title != "Huawei improves AI chip production" { + t.Errorf("Annotation title = %q, want Huawei title", annotation.Title) + } + if annotation.URL != "https://www.ft.com/content/example" { + t.Errorf("Annotation URL = %q, want ft.com URL", annotation.URL) + } + + if string(raw) != respBody { + t.Error("Raw body not preserved") + } +} + +func TestResponsesExtractor_FullResponseWithWebSearch(t *testing.T) { + respBody := `{ + "id": "resp_67bd65392a088191a3b802a61f4fba14", + "created_at": 1740465465.0, + "error": null, + "metadata": {}, + "model": "gpt-4o-2024-08-06", + "object": "response", + "output": [ + { + "id": "msg_67bd653ab9cc81918db973f0c1af9fbb", + "content": [ + { + "annotations": [], + "text": "Based on the image of a cat, some relevant keywords could be:\n\n- Cat\n- Feline\n- Pet", + "type": "output_text", + "logprobs": null + } + ], + "role": "assistant", + "type": "message" + }, + { + "id": "ws_67bd653c7a548191af86757fbbca96e1", + "status": "completed", + "type": "web_search_call" + }, + { + "id": "msg_67bd653f34fc8191989241b2659fd1b5", + "content": [ + { + "annotations": [ + { + "index": null, + "title": "Cat miraculously survives 3 weeks trapped in sofa", + "type": "url_citation", + "url": "https://nypost.com/2025/02/24/us-news/cat-survives/" + }, + { + "index": null, + "title": "Another cat story", + "type": "url_citation", + "url": "https://example.com/cat-story" + } + ], + "text": "Here are some recent news stories related to cats:\n\n**1. Cat Survives Three Weeks**", + "type": "output_text", + "logprobs": null + } + ], + "role": "assistant", + "type": "message" + } + ], + "temperature": 1.0, + "tool_choice": "auto", + "tools": [ + { + "type": "web_search", + "location": null, + "sites": null + } + ], + "top_p": 1.0, + "max_completion_tokens": null, + "previous_response_id": null, + "reasoning_effort": null, + "text": { + "format": { + "type": "text" + }, + "stop": null + }, + "top_logprobs": null, + "truncation": "disabled", + "usage": { + "completion_tokens": null, + "prompt_tokens": null, + "total_tokens": 1370, + "completion_tokens_details": null, + "prompt_tokens_details": null + } +}` + + extractor := &ResponsesExtractor{} + resp := &http.Response{ + StatusCode: 200, + Header: make(http.Header), + Body: io.NopCloser(bytes.NewReader([]byte(respBody))), + } + + meta, _, err := extractor.Extract(resp) + if err != nil { + t.Fatalf("Extract() error = %v", err) + } + + if meta.ID != "resp_67bd65392a088191a3b802a61f4fba14" { + t.Errorf("ID = %q, want resp_67bd65392a088191a3b802a61f4fba14", meta.ID) + } + + if meta.Model != "gpt-4o-2024-08-06" { + t.Errorf("Model = %q, want gpt-4o-2024-08-06", meta.Model) + } + + if meta.Usage.TotalTokens != 1370 { + t.Errorf("TotalTokens = %d, want 1370", meta.Usage.TotalTokens) + } + + output := meta.Custom["output"].([]ResponsesOutputItem) + if len(output) != 3 { + t.Fatalf("Output items = %d, want 3", len(output)) + } + + if output[0].Type != "message" || output[1].Type != "web_search_call" || output[2].Type != "message" { + t.Errorf("Output types: %q, %q, %q - want message, web_search_call, message", output[0].Type, output[1].Type, output[2].Type) + } + + annotations := output[2].Content[0].Annotations + if len(annotations) != 2 { + t.Errorf("Annotations count = %d, want 2", len(annotations)) + } + + if annotations[0].Title != "Cat miraculously survives 3 weeks trapped in sofa" { + t.Errorf("First annotation title = %q", annotations[0].Title) + } +} + +func TestResponsesExtractor_MultipleTextSegments(t *testing.T) { + respBody := `{ + "id": "resp_test", + "object": "response", + "model": "gpt-4o", + "status": "completed", + "output": [ + { + "id": "msg_1", + "type": "message", + "role": "assistant", + "content": [ + {"type": "output_text", "text": "First segment"} + ] + }, + { + "id": "ws_1", + "type": "web_search_call", + "status": "completed" + }, + { + "id": "msg_2", + "type": "message", + "role": "assistant", + "content": [ + {"type": "output_text", "text": "Second segment"} + ] + } + ], + "usage": {"total_tokens": 100} + }` + + extractor := &ResponsesExtractor{} + resp := &http.Response{ + StatusCode: 200, + Header: make(http.Header), + Body: io.NopCloser(bytes.NewReader([]byte(respBody))), + } + + meta, _, err := extractor.Extract(resp) + if err != nil { + t.Fatalf("Extract() error = %v", err) + } + + expected := "First segment\nSecond segment" + if meta.Choices[0].Message.Content != expected { + t.Errorf("Content = %v, want %q", meta.Choices[0].Message.Content, expected) + } +} + +func TestResponsesExtractor_EmptyAnnotations(t *testing.T) { + respBody := `{ + "id": "resp_test", + "object": "response", + "model": "gpt-4o", + "status": "completed", + "output": [ + { + "id": "msg_1", + "type": "message", + "role": "assistant", + "content": [ + { + "type": "output_text", + "text": "Hello", + "annotations": [] + } + ] + } + ], + "usage": {"total_tokens": 50} + }` + + extractor := &ResponsesExtractor{} + resp := &http.Response{ + StatusCode: 200, + Header: make(http.Header), + Body: io.NopCloser(bytes.NewReader([]byte(respBody))), + } + + meta, _, err := extractor.Extract(resp) + if err != nil { + t.Fatalf("Extract() error = %v", err) + } + + output := meta.Custom["output"].([]ResponsesOutputItem) + if len(output[0].Content[0].Annotations) != 0 { + t.Errorf("Expected empty annotations, got %d", len(output[0].Content[0].Annotations)) + } +} + +func TestResponsesExtractor_LogprobsField(t *testing.T) { + respBody := `{ + "id": "resp_test", + "object": "response", + "model": "gpt-4o", + "status": "completed", + "output": [ + { + "id": "msg_1", + "type": "message", + "role": "assistant", + "content": [ + { + "type": "output_text", + "text": "Hello", + "logprobs": null + } + ] + } + ], + "usage": {"total_tokens": 50} + }` + + extractor := &ResponsesExtractor{} + resp := &http.Response{ + StatusCode: 200, + Header: make(http.Header), + Body: io.NopCloser(bytes.NewReader([]byte(respBody))), + } + + meta, _, err := extractor.Extract(resp) + if err != nil { + t.Fatalf("Extract() error = %v", err) + } + + output := meta.Custom["output"].([]ResponsesOutputItem) + if output[0].Content[0].Logprobs != nil { + t.Errorf("Logprobs should be nil, got %v", output[0].Content[0].Logprobs) + } +} + +func TestResponsesParser_InputWithContentArray(t *testing.T) { + body := `{ + "model": "gpt-4o", + "input": [ + { + "role": "user", + "content": [ + {"type": "text", "text": "What's in this image?"}, + {"type": "image_url", "image_url": {"url": "https://example.com/cat.jpg"}} + ] + } + ] + }` + + parser := &ResponsesParser{} + meta, _, err := parser.Parse(io.NopCloser(bytes.NewReader([]byte(body)))) + if err != nil { + t.Fatalf("Parse() error = %v", err) + } + + if len(meta.Messages) != 0 { + t.Errorf("Expected 0 messages for array content (not string), got %d", len(meta.Messages)) + } +} + +func TestResponsesParser_InputWithMixedMessages(t *testing.T) { + body := `{ + "model": "gpt-4o", + "input": [ + {"role": "system", "content": "You are helpful"}, + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Hi there"}, + { + "role": "user", + "content": [ + {"type": "text", "text": "Describe this"}, + {"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}} + ] + } + ] + }` + + parser := &ResponsesParser{} + meta, _, err := parser.Parse(io.NopCloser(bytes.NewReader([]byte(body)))) + if err != nil { + t.Fatalf("Parse() error = %v", err) + } + + if len(meta.Messages) != 3 { + t.Errorf("Expected 3 messages with string content, got %d", len(meta.Messages)) + } +} + +func TestResponsesExtractor_WithReasoningContent(t *testing.T) { + respBody := `{ + "id": "resp_test", + "object": "response", + "model": "o1", + "status": "completed", + "output": [ + { + "id": "msg_1", + "type": "reasoning", + "role": "assistant", + "content": [] + }, + { + "id": "msg_2", + "type": "message", + "role": "assistant", + "content": [ + {"type": "output_text", "text": "The answer is 42"} + ] + } + ], + "usage": {"total_tokens": 100} + }` + + extractor := &ResponsesExtractor{} + resp := &http.Response{ + StatusCode: 200, + Header: make(http.Header), + Body: io.NopCloser(bytes.NewReader([]byte(respBody))), + } + + meta, _, err := extractor.Extract(resp) + if err != nil { + t.Fatalf("Extract() error = %v", err) + } + + if meta.Choices[0].Message.Content != "The answer is 42" { + t.Errorf("Content = %v, want 'The answer is 42'", meta.Choices[0].Message.Content) + } +} + +func TestResponsesExtractor_ToolUse(t *testing.T) { + respBody := `{ + "id": "resp_test", + "object": "response", + "model": "gpt-4o", + "status": "completed", + "output": [ + { + "id": "fc_123", + "type": "function_call", + "status": "completed", + "name": "get_weather", + "arguments": "{\"location\":\"SF\"}" + } + ], + "usage": {"total_tokens": 50} + }` + + extractor := &ResponsesExtractor{} + resp := &http.Response{ + StatusCode: 200, + Header: make(http.Header), + Body: io.NopCloser(bytes.NewReader([]byte(respBody))), + } + + meta, _, err := extractor.Extract(resp) + if err != nil { + t.Fatalf("Extract() error = %v", err) + } + + output := meta.Custom["output"].([]ResponsesOutputItem) + if len(output) != 1 { + t.Fatalf("Expected 1 output item, got %d", len(output)) + } + if output[0].Type != "function_call" { + t.Errorf("Output type = %q, want function_call", output[0].Type) + } +} + +func TestResponsesExtractor_Error(t *testing.T) { + respBody := `{ + "id": "resp_test", + "object": "response", + "model": "gpt-4o", + "status": "failed", + "error": { + "type": "invalid_request_error", + "code": "context_length_exceeded", + "message": "The context length exceeds the maximum" + }, + "output": [], + "usage": {"total_tokens": 0} + }` + + extractor := &ResponsesExtractor{} + resp := &http.Response{ + StatusCode: 400, + Header: make(http.Header), + Body: io.NopCloser(bytes.NewReader([]byte(respBody))), + } + + meta, _, err := extractor.Extract(resp) + if err != nil { + t.Fatalf("Extract() error = %v", err) + } + + if meta.Custom["status"] != "failed" { + t.Errorf("Status = %v, want failed", meta.Custom["status"]) + } + + errObj, ok := meta.Custom["error"].(*ResponsesError) + if !ok { + t.Fatalf("Error should be *ResponsesError, got %T", meta.Custom["error"]) + } + if errObj.Code != "context_length_exceeded" { + t.Errorf("Error code = %q, want context_length_exceeded", errObj.Code) + } +} + +func TestResponsesParser_Tools(t *testing.T) { + body := `{ + "model": "gpt-4o", + "input": "Search for news", + "tools": [ + { + "type": "web_search", + "location": null, + "sites": null + }, + { + "type": "function", + "name": "get_weather", + "description": "Get weather info" + } + ] + }` + + parser := &ResponsesParser{} + meta, _, err := parser.Parse(io.NopCloser(bytes.NewReader([]byte(body)))) + if err != nil { + t.Fatalf("Parse() error = %v", err) + } + + tools, ok := meta.Custom["tools"].([]interface{}) + if !ok { + t.Fatalf("tools should be []interface{}, got %T", meta.Custom["tools"]) + } + if len(tools) != 2 { + t.Errorf("Tools count = %d, want 2", len(tools)) + } +} + +func TestResponsesExtractor_ReasoningWithSummary(t *testing.T) { + respBody := `{ + "id": "resp_6820f382ee1c8191bc096bee70894d040ac5ba57aafcbac7", + "created_at": 1746989954.0, + "error": null, + "incomplete_details": null, + "instructions": null, + "metadata": {}, + "model": "o4-mini-2025-04-16", + "object": "response", + "output": [ + { + "id": "rs_6820f383d7c08191846711c5df8233bc0ac5ba57aafcbac7", + "summary": [], + "type": "reasoning", + "status": null + }, + { + "id": "msg_6820f3854688819187769ff582b170a60ac5ba57aafcbac7", + "content": [ + { + "annotations": [], + "text": "Why don't scientists trust atoms? Because they make up everything!", + "type": "output_text" + } + ], + "role": "assistant", + "status": "completed", + "type": "message" + } + ], + "parallel_tool_calls": true, + "temperature": 1.0, + "tool_choice": "auto", + "tools": [], + "top_p": 1.0, + "max_output_tokens": null, + "previous_response_id": null, + "reasoning": { + "effort": "medium", + "generate_summary": null, + "summary": null + }, + "status": "completed", + "text": { + "format": { + "type": "text" + } + }, + "truncation": "disabled", + "usage": { + "input_tokens": 10, + "input_tokens_details": { + "cached_tokens": 0 + }, + "output_tokens": 148, + "output_tokens_details": { + "reasoning_tokens": 128 + }, + "total_tokens": 158 + }, + "user": null, + "service_tier": "default", + "store": true +}` + + extractor := &ResponsesExtractor{} + resp := &http.Response{ + StatusCode: 200, + Header: make(http.Header), + Body: io.NopCloser(bytes.NewReader([]byte(respBody))), + } + + meta, _, err := extractor.Extract(resp) + if err != nil { + t.Fatalf("Extract() error = %v", err) + } + + if meta.ID != "resp_6820f382ee1c8191bc096bee70894d040ac5ba57aafcbac7" { + t.Errorf("ID = %q", meta.ID) + } + + if meta.Model != "o4-mini-2025-04-16" { + t.Errorf("Model = %q, want o4-mini-2025-04-16", meta.Model) + } + + if meta.Usage.TotalTokens != 158 { + t.Errorf("TotalTokens = %d, want 158", meta.Usage.TotalTokens) + } + + if meta.Usage.PromptTokens != 10 { + t.Errorf("PromptTokens = %d, want 10", meta.Usage.PromptTokens) + } + + if meta.Usage.CompletionTokens != 148 { + t.Errorf("CompletionTokens = %d, want 148", meta.Usage.CompletionTokens) + } + + output := meta.Custom["output"].([]ResponsesOutputItem) + if len(output) != 2 { + t.Fatalf("Output items = %d, want 2", len(output)) + } + + if output[0].Type != "reasoning" { + t.Errorf("First output type = %q, want reasoning", output[0].Type) + } + + if output[1].Type != "message" { + t.Errorf("Second output type = %q, want message", output[1].Type) + } + + expectedContent := "Why don't scientists trust atoms? Because they make up everything!" + if meta.Choices[0].Message.Content != expectedContent { + t.Errorf("Content = %v, want %q", meta.Choices[0].Message.Content, expectedContent) + } +} + +func TestResponsesExtractor_ReasoningTokensInUsage(t *testing.T) { + respBody := `{ + "id": "resp_test", + "object": "response", + "model": "o1", + "status": "completed", + "output": [ + { + "id": "rs_123", + "type": "reasoning", + "summary": [] + }, + { + "id": "msg_456", + "type": "message", + "role": "assistant", + "content": [ + {"type": "output_text", "text": "The answer is 42"} + ] + } + ], + "usage": { + "input_tokens": 50, + "output_tokens": 200, + "output_tokens_details": { + "reasoning_tokens": 150 + }, + "total_tokens": 250 + } + }` + + extractor := &ResponsesExtractor{} + resp := &http.Response{ + StatusCode: 200, + Header: make(http.Header), + Body: io.NopCloser(bytes.NewReader([]byte(respBody))), + } + + meta, _, err := extractor.Extract(resp) + if err != nil { + t.Fatalf("Extract() error = %v", err) + } + + if meta.Usage.CompletionTokens != 200 { + t.Errorf("CompletionTokens = %d, want 200", meta.Usage.CompletionTokens) + } + + output := meta.Custom["output"].([]ResponsesOutputItem) + if output[0].Type != "reasoning" { + t.Errorf("First output should be reasoning, got %q", output[0].Type) + } +} + +func TestResponsesExtractor_CachedTokensInUsage(t *testing.T) { + respBody := `{ + "id": "resp_test", + "object": "response", + "model": "gpt-4o", + "status": "completed", + "output": [ + { + "id": "msg_123", + "type": "message", + "role": "assistant", + "content": [ + {"type": "output_text", "text": "Hello"} + ] + } + ], + "usage": { + "input_tokens": 100, + "input_tokens_details": { + "cached_tokens": 80 + }, + "output_tokens": 50, + "total_tokens": 150 + } + }` + + extractor := &ResponsesExtractor{} + resp := &http.Response{ + StatusCode: 200, + Header: make(http.Header), + Body: io.NopCloser(bytes.NewReader([]byte(respBody))), + } + + meta, _, err := extractor.Extract(resp) + if err != nil { + t.Fatalf("Extract() error = %v", err) + } + + cacheUsage, ok := meta.Custom["cache_usage"].(llmproxy.CacheUsage) + if !ok { + t.Fatal("Expected cache_usage in Custom") + } + if cacheUsage.CachedTokens != 80 { + t.Errorf("CachedTokens = %d, want 80", cacheUsage.CachedTokens) + } +} + +func TestResponsesExtractor_WithServiceTier(t *testing.T) { + respBody := `{ + "id": "resp_test", + "object": "response", + "model": "gpt-4o", + "status": "completed", + "service_tier": "default", + "store": true, + "output": [ + { + "id": "msg_123", + "type": "message", + "role": "assistant", + "content": [ + {"type": "output_text", "text": "Hello"} + ] + } + ], + "usage": {"total_tokens": 50} + }` + + extractor := &ResponsesExtractor{} + resp := &http.Response{ + StatusCode: 200, + Header: make(http.Header), + Body: io.NopCloser(bytes.NewReader([]byte(respBody))), + } + + meta, _, err := extractor.Extract(resp) + if err != nil { + t.Fatalf("Extract() error = %v", err) + } + + if meta.Custom["status"] != "completed" { + t.Errorf("Status = %v, want completed", meta.Custom["status"]) + } +} + +func TestResponsesExtractor_WithReasoningEffort(t *testing.T) { + respBody := `{ + "id": "resp_test", + "object": "response", + "model": "o4-mini", + "status": "completed", + "reasoning": { + "effort": "medium", + "generate_summary": null, + "summary": null + }, + "output": [ + { + "id": "rs_123", + "type": "reasoning", + "summary": [] + }, + { + "id": "msg_456", + "type": "message", + "role": "assistant", + "content": [ + {"type": "output_text", "text": "Result"} + ] + } + ], + "usage": {"total_tokens": 100} + }` + + extractor := &ResponsesExtractor{} + resp := &http.Response{ + StatusCode: 200, + Header: make(http.Header), + Body: io.NopCloser(bytes.NewReader([]byte(respBody))), + } + + meta, _, err := extractor.Extract(resp) + if err != nil { + t.Fatalf("Extract() error = %v", err) + } + + output := meta.Custom["output"].([]ResponsesOutputItem) + if len(output) != 2 { + t.Errorf("Expected 2 output items, got %d", len(output)) + } +} + +func TestResponsesExtractor_StatusInOutputMessage(t *testing.T) { + respBody := `{ + "id": "resp_test", + "object": "response", + "model": "gpt-4o", + "status": "completed", + "output": [ + { + "id": "msg_123", + "type": "message", + "role": "assistant", + "status": "completed", + "content": [ + {"type": "output_text", "text": "Hello"} + ] + } + ], + "usage": {"total_tokens": 50} + }` + + extractor := &ResponsesExtractor{} + resp := &http.Response{ + StatusCode: 200, + Header: make(http.Header), + Body: io.NopCloser(bytes.NewReader([]byte(respBody))), + } + + meta, _, err := extractor.Extract(resp) + if err != nil { + t.Fatalf("Extract() error = %v", err) + } + + if meta.Choices[0].FinishReason != "completed" { + t.Errorf("FinishReason = %q, want completed", meta.Choices[0].FinishReason) + } +} + +func TestResponsesExtractor_OutputSummary(t *testing.T) { + respBody := `{ + "id": "resp_test", + "object": "response", + "model": "o4-mini", + "status": "completed", + "output": [ + { + "id": "rs_123", + "type": "reasoning", + "summary": [ + {"type": "summary_text", "text": "Analyzed the problem"} + ] + }, + { + "id": "msg_456", + "type": "message", + "role": "assistant", + "content": [ + {"type": "output_text", "text": "The answer is 42"} + ] + } + ], + "usage": {"total_tokens": 100} + }` + + extractor := &ResponsesExtractor{} + resp := &http.Response{ + StatusCode: 200, + Header: make(http.Header), + Body: io.NopCloser(bytes.NewReader([]byte(respBody))), + } + + meta, _, err := extractor.Extract(resp) + if err != nil { + t.Fatalf("Extract() error = %v", err) + } + + output := meta.Custom["output"].([]ResponsesOutputItem) + if output[0].Type != "reasoning" { + t.Errorf("First output type = %q, want reasoning", output[0].Type) + } +} From 6aa3dede67c3a860856f7b47ee7a7e2cc7511b48 Mon Sep 17 00:00:00 2001 From: Jeff Haynie Date: Wed, 15 Apr 2026 20:25:49 -0500 Subject: [PATCH 2/5] fix: address PR review feedback - Handle whitespace before JSON array in responses extractor - Skip reserved keys (role, content) when merging Custom in MarshalJSON - Add assertions for text content in TestParser_ContentAsArrayOfText - Add tests for whitespace array and Custom field override protection --- metadata.go | 3 ++ providers/openai_compatible/parser_test.go | 46 +++++++++++++++++++ .../openai_compatible/responses_extractor.go | 9 +++- providers/openai_compatible/responses_test.go | 35 ++++++++++++++ 4 files changed, 92 insertions(+), 1 deletion(-) diff --git a/metadata.go b/metadata.go index 0f0cae8..6ceb9a8 100644 --- a/metadata.go +++ b/metadata.go @@ -85,6 +85,9 @@ func (m Message) MarshalJSON() ([]byte, error) { } for k, v := range m.Custom { + if k == "role" || k == "content" { + continue + } result[k] = v } diff --git a/providers/openai_compatible/parser_test.go b/providers/openai_compatible/parser_test.go index 442a1fb..337c704 100644 --- a/providers/openai_compatible/parser_test.go +++ b/providers/openai_compatible/parser_test.go @@ -620,6 +620,20 @@ func TestParser_ContentAsArrayOfText(t *testing.T) { if len(content) != 2 { t.Errorf("expected 2 content parts, got %d", len(content)) } + part0 := content[0].(map[string]interface{}) + if part0["type"] != "text" { + t.Errorf("part 0 type = %v, want text", part0["type"]) + } + if part0["text"] != "hello" { + t.Errorf("part 0 text = %v, want hello", part0["text"]) + } + part1 := content[1].(map[string]interface{}) + if part1["type"] != "text" { + t.Errorf("part 1 type = %v, want text", part1["type"]) + } + if part1["text"] != "world" { + t.Errorf("part 1 text = %v, want world", part1["text"]) + } } func TestParser_MessageWithNonStandardProperties(t *testing.T) { @@ -841,3 +855,35 @@ func TestParser_RefusalInMessage(t *testing.T) { t.Errorf("refusal = %v, want refusal message", meta.Messages[0].Custom["refusal"]) } } + +func TestParser_MessageCustomDoesNotOverrideReservedFields(t *testing.T) { + msg := llmproxy.Message{ + Role: "user", + Content: "hello", + Custom: map[string]any{ + "role": "assistant", + "content": "overridden", + "extra": "value", + }, + } + + remarshaled, err := json.Marshal(msg) + if err != nil { + t.Fatalf("failed to remarshal: %v", err) + } + + var result map[string]interface{} + if err := json.Unmarshal(remarshaled, &result); err != nil { + t.Fatalf("failed to unmarshal: %v", err) + } + + if result["role"] != "user" { + t.Errorf("role = %v, want user (should not be overridden by Custom)", result["role"]) + } + if result["content"] != "hello" { + t.Errorf("content = %v, want hello (should not be overridden by Custom)", result["content"]) + } + if result["extra"] != "value" { + t.Errorf("extra = %v, want value", result["extra"]) + } +} diff --git a/providers/openai_compatible/responses_extractor.go b/providers/openai_compatible/responses_extractor.go index 5d9a928..b0b9689 100644 --- a/providers/openai_compatible/responses_extractor.go +++ b/providers/openai_compatible/responses_extractor.go @@ -18,7 +18,14 @@ func (e *ResponsesExtractor) Extract(resp *http.Response) (llmproxy.ResponseMeta var responsesResp ResponsesResponse if err := json.Unmarshal(body, &responsesResp); err != nil { - if isArray := len(body) > 0 && body[0] == '['; isArray { + firstNonWhitespace := -1 + for i, b := range body { + if b != ' ' && b != '\n' && b != '\r' && b != '\t' { + firstNonWhitespace = i + break + } + } + if firstNonWhitespace >= 0 && body[firstNonWhitespace] == '[' { var outputItems []ResponsesOutputItem if err := json.Unmarshal(body, &outputItems); err != nil { return llmproxy.ResponseMetadata{}, nil, err diff --git a/providers/openai_compatible/responses_test.go b/providers/openai_compatible/responses_test.go index 8aa7f72..760e1e4 100644 --- a/providers/openai_compatible/responses_test.go +++ b/providers/openai_compatible/responses_test.go @@ -288,6 +288,41 @@ func TestNewMultiAPI(t *testing.T) { } } +func TestResponsesExtractor_ArrayWithLeadingWhitespace(t *testing.T) { + respBody := ` + + [ + { + "id": "msg_123", + "type": "message", + "role": "assistant", + "content": [ + {"type": "output_text", "text": "Hello"} + ] + } + ]` + + extractor := &ResponsesExtractor{} + resp := &http.Response{ + StatusCode: 200, + Header: make(http.Header), + Body: io.NopCloser(bytes.NewReader([]byte(respBody))), + } + + meta, _, err := extractor.Extract(resp) + if err != nil { + t.Fatalf("Extract() error = %v", err) + } + + if len(meta.Choices) != 1 { + t.Errorf("Choices length = %d, want 1", len(meta.Choices)) + } + + if meta.Choices[0].Message.Content != "Hello" { + t.Errorf("Content = %v, want Hello", meta.Choices[0].Message.Content) + } +} + func TestResponsesExtractor_WebSearchWithAnnotations(t *testing.T) { respBody := `[ { From 94bcfc6c092ddabba237e917607e1ad8c56771a4 Mon Sep 17 00:00:00 2001 From: Jeff Haynie Date: Wed, 15 Apr 2026 20:29:47 -0500 Subject: [PATCH 3/5] refactor: use strings.Join for text concatenation in responses extractor --- providers/openai_compatible/responses_extractor.go | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/providers/openai_compatible/responses_extractor.go b/providers/openai_compatible/responses_extractor.go index b0b9689..125574c 100644 --- a/providers/openai_compatible/responses_extractor.go +++ b/providers/openai_compatible/responses_extractor.go @@ -4,6 +4,7 @@ import ( "encoding/json" "io" "net/http" + "strings" "github.com/agentuity/llmproxy" ) @@ -92,15 +93,7 @@ func extractResponsesContent(output []ResponsesOutputItem) string { if len(texts) == 0 { return "" } - if len(texts) == 1 { - return texts[0] - } - // Join multiple text segments with newline - result := texts[0] - for _, t := range texts[1:] { - result += "\n" + t - } - return result + return strings.Join(texts, "\n") } type ResponsesResponse struct { From 548429b08bcef79f818f55f6d22907afc4cea861 Mon Sep 17 00:00:00 2001 From: Jeff Haynie Date: Wed, 15 Apr 2026 20:37:37 -0500 Subject: [PATCH 4/5] feat: preserve annotation span fields and raw output in Responses API - Add StartIndex and EndIndex fields to ResponsesOutputAnnotation - Store raw output as json.RawMessage in output_raw Custom field - Add comprehensive tests for span fields and raw output preservation --- .../openai_compatible/responses_extractor.go | 16 +- providers/openai_compatible/responses_test.go | 241 ++++++++++++++++++ 2 files changed, 253 insertions(+), 4 deletions(-) diff --git a/providers/openai_compatible/responses_extractor.go b/providers/openai_compatible/responses_extractor.go index 125574c..c3359eb 100644 --- a/providers/openai_compatible/responses_extractor.go +++ b/providers/openai_compatible/responses_extractor.go @@ -74,6 +74,12 @@ func (e *ResponsesExtractor) Extract(resp *http.Response) (llmproxy.ResponseMeta } if len(responsesResp.Output) > 0 { meta.Custom["output"] = responsesResp.Output + var rawBody struct { + Output json.RawMessage `json:"output"` + } + if err := json.Unmarshal(body, &rawBody); err == nil && len(rawBody.Output) > 0 { + meta.Custom["output_raw"] = rawBody.Output + } } return meta, body, nil @@ -131,10 +137,12 @@ type ResponsesOutputContent struct { } type ResponsesOutputAnnotation struct { - Type string `json:"type"` - Title string `json:"title,omitempty"` - URL string `json:"url,omitempty"` - Index *int `json:"index,omitempty"` + Type string `json:"type"` + Title string `json:"title,omitempty"` + URL string `json:"url,omitempty"` + Index *int `json:"index,omitempty"` + StartIndex int `json:"start_index,omitempty"` + EndIndex int `json:"end_index,omitempty"` } type ResponsesUsage struct { diff --git a/providers/openai_compatible/responses_test.go b/providers/openai_compatible/responses_test.go index 760e1e4..f8a9722 100644 --- a/providers/openai_compatible/responses_test.go +++ b/providers/openai_compatible/responses_test.go @@ -2,6 +2,7 @@ package openai_compatible import ( "bytes" + "encoding/json" "io" "net/http" "testing" @@ -1260,3 +1261,243 @@ func TestResponsesExtractor_OutputSummary(t *testing.T) { t.Errorf("First output type = %q, want reasoning", output[0].Type) } } + +func TestResponsesExtractor_AnnotationSpanFields(t *testing.T) { + respBody := `{ + "id": "resp_test", + "object": "response", + "model": "gpt-4o", + "status": "completed", + "output": [ + { + "id": "msg_123", + "type": "message", + "role": "assistant", + "content": [ + { + "type": "output_text", + "text": "Check out this article", + "annotations": [ + { + "type": "url_citation", + "title": "Example Article", + "url": "https://example.com/article", + "start_index": 10, + "end_index": 25 + } + ] + } + ] + } + ], + "usage": {"total_tokens": 50} + }` + + extractor := &ResponsesExtractor{} + resp := &http.Response{ + StatusCode: 200, + Header: make(http.Header), + Body: io.NopCloser(bytes.NewReader([]byte(respBody))), + } + + meta, _, err := extractor.Extract(resp) + if err != nil { + t.Fatalf("Extract() error = %v", err) + } + + output := meta.Custom["output"].([]ResponsesOutputItem) + if len(output) != 1 { + t.Fatalf("Expected 1 output item, got %d", len(output)) + } + + annotations := output[0].Content[0].Annotations + if len(annotations) != 1 { + t.Fatalf("Expected 1 annotation, got %d", len(annotations)) + } + + annotation := annotations[0] + if annotation.StartIndex != 10 { + t.Errorf("StartIndex = %d, want 10", annotation.StartIndex) + } + if annotation.EndIndex != 25 { + t.Errorf("EndIndex = %d, want 25", annotation.EndIndex) + } + if annotation.Type != "url_citation" { + t.Errorf("Type = %q, want url_citation", annotation.Type) + } + if annotation.Title != "Example Article" { + t.Errorf("Title = %q, want 'Example Article'", annotation.Title) + } +} + +func TestResponsesExtractor_RawOutputPreservation(t *testing.T) { + respBody := `{ + "id": "resp_test", + "object": "response", + "model": "gpt-4o", + "status": "completed", + "output": [ + { + "id": "msg_123", + "type": "message", + "role": "assistant", + "content": [ + { + "type": "output_text", + "text": "Hello", + "annotations": [ + { + "type": "url_citation", + "title": "Test", + "url": "https://example.com", + "start_index": 0, + "end_index": 5 + } + ] + } + ] + } + ], + "usage": {"total_tokens": 50} + }` + + extractor := &ResponsesExtractor{} + resp := &http.Response{ + StatusCode: 200, + Header: make(http.Header), + Body: io.NopCloser(bytes.NewReader([]byte(respBody))), + } + + meta, _, err := extractor.Extract(resp) + if err != nil { + t.Fatalf("Extract() error = %v", err) + } + + rawOutput, ok := meta.Custom["output_raw"].(json.RawMessage) + if !ok { + t.Fatalf("output_raw should be json.RawMessage, got %T", meta.Custom["output_raw"]) + } + + var outputItems []map[string]interface{} + if err := json.Unmarshal(rawOutput, &outputItems); err != nil { + t.Fatalf("Failed to unmarshal raw output: %v", err) + } + + if len(outputItems) != 1 { + t.Errorf("Expected 1 output item in raw, got %d", len(outputItems)) + } + + content := outputItems[0]["content"].([]interface{}) + annotations := content[0].(map[string]interface{})["annotations"].([]interface{}) + annotation := annotations[0].(map[string]interface{}) + + if annotation["start_index"].(float64) != 0 { + t.Errorf("Raw annotation start_index = %v, want 0", annotation["start_index"]) + } + if annotation["end_index"].(float64) != 5 { + t.Errorf("Raw annotation end_index = %v, want 5", annotation["end_index"]) + } +} + +func TestResponsesExtractor_RawOutputWithMultipleAnnotations(t *testing.T) { + respBody := `{ + "id": "resp_test", + "object": "response", + "model": "gpt-4o", + "status": "completed", + "output": [ + { + "id": "msg_123", + "type": "message", + "role": "assistant", + "content": [ + { + "type": "output_text", + "text": "See [article1] and [article2] for more", + "annotations": [ + { + "type": "url_citation", + "title": "Article 1", + "url": "https://example.com/1", + "start_index": 4, + "end_index": 14 + }, + { + "type": "url_citation", + "title": "Article 2", + "url": "https://example.com/2", + "start_index": 19, + "end_index": 29 + } + ] + } + ] + } + ], + "usage": {"total_tokens": 100} + }` + + extractor := &ResponsesExtractor{} + resp := &http.Response{ + StatusCode: 200, + Header: make(http.Header), + Body: io.NopCloser(bytes.NewReader([]byte(respBody))), + } + + meta, _, err := extractor.Extract(resp) + if err != nil { + t.Fatalf("Extract() error = %v", err) + } + + output := meta.Custom["output"].([]ResponsesOutputItem) + annotations := output[0].Content[0].Annotations + if len(annotations) != 2 { + t.Fatalf("Expected 2 annotations, got %d", len(annotations)) + } + + if annotations[0].StartIndex != 4 || annotations[0].EndIndex != 14 { + t.Errorf("First annotation span = [%d, %d], want [4, 14]", annotations[0].StartIndex, annotations[0].EndIndex) + } + if annotations[1].StartIndex != 19 || annotations[1].EndIndex != 29 { + t.Errorf("Second annotation span = [%d, %d], want [19, 29]", annotations[1].StartIndex, annotations[1].EndIndex) + } + + rawOutput := meta.Custom["output_raw"].(json.RawMessage) + var rawItems []ResponsesOutputItem + if err := json.Unmarshal(rawOutput, &rawItems); err != nil { + t.Fatalf("Failed to unmarshal raw output: %v", err) + } + if len(rawItems[0].Content[0].Annotations) != 2 { + t.Errorf("Raw output should have 2 annotations, got %d", len(rawItems[0].Content[0].Annotations)) + } +} + +func TestResponsesExtractor_NoRawOutputWhenEmpty(t *testing.T) { + respBody := `{ + "id": "resp_test", + "object": "response", + "model": "gpt-4o", + "status": "completed", + "output": [], + "usage": {"total_tokens": 0} + }` + + extractor := &ResponsesExtractor{} + resp := &http.Response{ + StatusCode: 200, + Header: make(http.Header), + Body: io.NopCloser(bytes.NewReader([]byte(respBody))), + } + + meta, _, err := extractor.Extract(resp) + if err != nil { + t.Fatalf("Extract() error = %v", err) + } + + if _, ok := meta.Custom["output"]; ok { + t.Error("output should not be set when empty") + } + if _, ok := meta.Custom["output_raw"]; ok { + t.Error("output_raw should not be set when output is empty") + } +} From 7ced1deb26d6286bcf61c364e204ff0b16eeb600 Mon Sep 17 00:00:00 2001 From: Jeff Haynie Date: Wed, 15 Apr 2026 20:43:57 -0500 Subject: [PATCH 5/5] fix: preserve raw output for bare-array responses and zero span values - Change StartIndex/EndIndex to *int to preserve zero values in JSON - Set output_raw for bare-array responses (not just wrapped responses) - Add tests for zero span values, bare-array raw output, missing fields --- .../openai_compatible/responses_extractor.go | 20 +- providers/openai_compatible/responses_test.go | 220 +++++++++++++++++- 2 files changed, 225 insertions(+), 15 deletions(-) diff --git a/providers/openai_compatible/responses_extractor.go b/providers/openai_compatible/responses_extractor.go index c3359eb..403c674 100644 --- a/providers/openai_compatible/responses_extractor.go +++ b/providers/openai_compatible/responses_extractor.go @@ -18,6 +18,7 @@ func (e *ResponsesExtractor) Extract(resp *http.Response) (llmproxy.ResponseMeta } var responsesResp ResponsesResponse + var isBareArray bool if err := json.Unmarshal(body, &responsesResp); err != nil { firstNonWhitespace := -1 for i, b := range body { @@ -33,6 +34,7 @@ func (e *ResponsesExtractor) Extract(resp *http.Response) (llmproxy.ResponseMeta } responsesResp.Output = outputItems responsesResp.Status = "completed" + isBareArray = true } else { return llmproxy.ResponseMetadata{}, nil, err } @@ -74,11 +76,15 @@ func (e *ResponsesExtractor) Extract(resp *http.Response) (llmproxy.ResponseMeta } if len(responsesResp.Output) > 0 { meta.Custom["output"] = responsesResp.Output - var rawBody struct { - Output json.RawMessage `json:"output"` - } - if err := json.Unmarshal(body, &rawBody); err == nil && len(rawBody.Output) > 0 { - meta.Custom["output_raw"] = rawBody.Output + if isBareArray { + meta.Custom["output_raw"] = json.RawMessage(body) + } else { + var rawBody struct { + Output json.RawMessage `json:"output"` + } + if err := json.Unmarshal(body, &rawBody); err == nil && len(rawBody.Output) > 0 { + meta.Custom["output_raw"] = rawBody.Output + } } } @@ -141,8 +147,8 @@ type ResponsesOutputAnnotation struct { Title string `json:"title,omitempty"` URL string `json:"url,omitempty"` Index *int `json:"index,omitempty"` - StartIndex int `json:"start_index,omitempty"` - EndIndex int `json:"end_index,omitempty"` + StartIndex *int `json:"start_index,omitempty"` + EndIndex *int `json:"end_index,omitempty"` } type ResponsesUsage struct { diff --git a/providers/openai_compatible/responses_test.go b/providers/openai_compatible/responses_test.go index f8a9722..a02d7d7 100644 --- a/providers/openai_compatible/responses_test.go +++ b/providers/openai_compatible/responses_test.go @@ -1316,11 +1316,11 @@ func TestResponsesExtractor_AnnotationSpanFields(t *testing.T) { } annotation := annotations[0] - if annotation.StartIndex != 10 { - t.Errorf("StartIndex = %d, want 10", annotation.StartIndex) + if annotation.StartIndex == nil || *annotation.StartIndex != 10 { + t.Errorf("StartIndex = %v, want 10", annotation.StartIndex) } - if annotation.EndIndex != 25 { - t.Errorf("EndIndex = %d, want 25", annotation.EndIndex) + if annotation.EndIndex == nil || *annotation.EndIndex != 25 { + t.Errorf("EndIndex = %v, want 25", annotation.EndIndex) } if annotation.Type != "url_citation" { t.Errorf("Type = %q, want url_citation", annotation.Type) @@ -1455,11 +1455,11 @@ func TestResponsesExtractor_RawOutputWithMultipleAnnotations(t *testing.T) { t.Fatalf("Expected 2 annotations, got %d", len(annotations)) } - if annotations[0].StartIndex != 4 || annotations[0].EndIndex != 14 { - t.Errorf("First annotation span = [%d, %d], want [4, 14]", annotations[0].StartIndex, annotations[0].EndIndex) + if annotations[0].StartIndex == nil || *annotations[0].StartIndex != 4 || annotations[0].EndIndex == nil || *annotations[0].EndIndex != 14 { + t.Errorf("First annotation span = [%v, %v], want [4, 14]", annotations[0].StartIndex, annotations[0].EndIndex) } - if annotations[1].StartIndex != 19 || annotations[1].EndIndex != 29 { - t.Errorf("Second annotation span = [%d, %d], want [19, 29]", annotations[1].StartIndex, annotations[1].EndIndex) + if annotations[1].StartIndex == nil || *annotations[1].StartIndex != 19 || annotations[1].EndIndex == nil || *annotations[1].EndIndex != 29 { + t.Errorf("Second annotation span = [%v, %v], want [19, 29]", annotations[1].StartIndex, annotations[1].EndIndex) } rawOutput := meta.Custom["output_raw"].(json.RawMessage) @@ -1501,3 +1501,207 @@ func TestResponsesExtractor_NoRawOutputWhenEmpty(t *testing.T) { t.Error("output_raw should not be set when output is empty") } } + +func TestResponsesExtractor_AnnotationZeroSpanValues(t *testing.T) { + respBody := `{ + "id": "resp_test", + "object": "response", + "model": "gpt-4o", + "status": "completed", + "output": [ + { + "id": "msg_123", + "type": "message", + "role": "assistant", + "content": [ + { + "type": "output_text", + "text": "Link at start", + "annotations": [ + { + "type": "url_citation", + "title": "Start Link", + "url": "https://example.com", + "start_index": 0, + "end_index": 4 + } + ] + } + ] + } + ], + "usage": {"total_tokens": 50} + }` + + extractor := &ResponsesExtractor{} + resp := &http.Response{ + StatusCode: 200, + Header: make(http.Header), + Body: io.NopCloser(bytes.NewReader([]byte(respBody))), + } + + meta, _, err := extractor.Extract(resp) + if err != nil { + t.Fatalf("Extract() error = %v", err) + } + + output := meta.Custom["output"].([]ResponsesOutputItem) + annotation := output[0].Content[0].Annotations[0] + + if annotation.StartIndex == nil { + t.Fatal("StartIndex should not be nil for zero value") + } + if *annotation.StartIndex != 0 { + t.Errorf("StartIndex = %d, want 0 (zero value should be preserved)", *annotation.StartIndex) + } + if annotation.EndIndex == nil { + t.Fatal("EndIndex should not be nil") + } + if *annotation.EndIndex != 4 { + t.Errorf("EndIndex = %d, want 4", *annotation.EndIndex) + } +} + +func TestResponsesExtractor_BareArrayRawOutputPreservation(t *testing.T) { + respBody := `[ + { + "id": "msg_123", + "type": "message", + "role": "assistant", + "content": [ + { + "type": "output_text", + "text": "Hello from array", + "annotations": [ + { + "type": "url_citation", + "title": "Test", + "url": "https://example.com", + "start_index": 0, + "end_index": 5 + } + ] + } + ] + } + ]` + + extractor := &ResponsesExtractor{} + resp := &http.Response{ + StatusCode: 200, + Header: make(http.Header), + Body: io.NopCloser(bytes.NewReader([]byte(respBody))), + } + + meta, _, err := extractor.Extract(resp) + if err != nil { + t.Fatalf("Extract() error = %v", err) + } + + if meta.Choices[0].Message.Content != "Hello from array" { + t.Errorf("Content = %v, want 'Hello from array'", meta.Choices[0].Message.Content) + } + + rawOutput, ok := meta.Custom["output_raw"].(json.RawMessage) + if !ok { + t.Fatalf("output_raw should be json.RawMessage for bare array, got %T", meta.Custom["output_raw"]) + } + + var rawItems []map[string]interface{} + if err := json.Unmarshal(rawOutput, &rawItems); err != nil { + t.Fatalf("Failed to unmarshal raw output: %v", err) + } + + if len(rawItems) != 1 { + t.Errorf("Expected 1 item in raw output, got %d", len(rawItems)) + } + + content := rawItems[0]["content"].([]interface{}) + annotation := content[0].(map[string]interface{})["annotations"].([]interface{})[0].(map[string]interface{}) + if annotation["start_index"].(float64) != 0 { + t.Errorf("Raw annotation start_index = %v, want 0", annotation["start_index"]) + } +} + +func TestResponsesExtractor_BareArrayWithLeadingWhitespaceRawOutput(t *testing.T) { + respBody := ` + + [ + { + "id": "msg_123", + "type": "message", + "role": "assistant", + "content": [ + {"type": "output_text", "text": "Hello"} + ] + } + ]` + + extractor := &ResponsesExtractor{} + resp := &http.Response{ + StatusCode: 200, + Header: make(http.Header), + Body: io.NopCloser(bytes.NewReader([]byte(respBody))), + } + + meta, _, err := extractor.Extract(resp) + if err != nil { + t.Fatalf("Extract() error = %v", err) + } + + if _, ok := meta.Custom["output_raw"]; !ok { + t.Error("output_raw should be set for bare array with leading whitespace") + } +} + +func TestResponsesExtractor_AnnotationMissingSpanFields(t *testing.T) { + respBody := `{ + "id": "resp_test", + "object": "response", + "model": "gpt-4o", + "status": "completed", + "output": [ + { + "id": "msg_123", + "type": "message", + "role": "assistant", + "content": [ + { + "type": "output_text", + "text": "A link", + "annotations": [ + { + "type": "url_citation", + "title": "No Span", + "url": "https://example.com" + } + ] + } + ] + } + ], + "usage": {"total_tokens": 50} + }` + + extractor := &ResponsesExtractor{} + resp := &http.Response{ + StatusCode: 200, + Header: make(http.Header), + Body: io.NopCloser(bytes.NewReader([]byte(respBody))), + } + + meta, _, err := extractor.Extract(resp) + if err != nil { + t.Fatalf("Extract() error = %v", err) + } + + output := meta.Custom["output"].([]ResponsesOutputItem) + annotation := output[0].Content[0].Annotations[0] + + if annotation.StartIndex != nil { + t.Errorf("StartIndex should be nil when not provided, got %v", annotation.StartIndex) + } + if annotation.EndIndex != nil { + t.Errorf("EndIndex should be nil when not provided, got %v", annotation.EndIndex) + } +}