diff --git a/README.md b/README.md index e9374e2..734a552 100644 --- a/README.md +++ b/README.md @@ -77,9 +77,9 @@ address: 127.0.0.1 port: 8314 debug: false account_type: individual # individual | business | enterprise -vscode_version: "1.93.0" -api_version: "2025-04-01" -copilot_version: "0.26.7" +vscode_version: "1.115.0" +api_version: "2025-05-01" +copilot_version: "0.44.0" model_mappings: exact: opus: claude-opus-4.7-1m @@ -164,6 +164,32 @@ cargo clippy # lint | `src/server.rs` | Axum router and all HTTP handlers | | `src/store.rs` | In-memory request store for the dashboard | +## Mimicking the Copilot Client + +The proxy authenticates to GitHub Copilot by impersonating the official +**VS Code Copilot Chat** client. To do this faithfully it sends the same +identity headers that the real client sends to `api.githubcopilot.com` +(`Editor-Version`, `Editor-Plugin-Version`, `User-Agent`, +`Copilot-Integration-Id`, `OpenAI-Intent`, `X-Interaction-Type`, +`X-GitHub-Api-Version`, etc.). These are built in +`AppState::copilot_headers` / `github_headers` (`src/state.rs`) from the +version strings in `src/config.rs`. + +GitHub may reject requests that report stale client versions, so these values +occasionally need refreshing. The source of truth is the now open-source +[`microsoft/vscode-copilot-chat`](https://github.com/microsoft/vscode-copilot-chat) +repository: + +| Config value | Where to read it | +|--------------|------------------| +| `copilot_version` | `version` field in the extension's `package.json` | +| `vscode_version` | `engines.vscode` baseline in `package.json` | +| `api_version` | `X-GitHub-Api-Version` constant in `src/platform/networking/common/networking.ts` | + +After updating the constants in `src/config.rs`, run the test suite (the header +test in `tests/integration.rs` guards the expected header set) and bump the +example values in this README. + ## Notes on Parity with `ghc-tunnel` This Rust port focuses on the **core proxy behavior**: authentication, token diff --git a/src/anthropic.rs b/src/anthropic.rs index df798ca..f522818 100644 --- a/src/anthropic.rs +++ b/src/anthropic.rs @@ -628,6 +628,7 @@ const ALLOWED_ANTHROPIC_KEYS: &[&str] = &[ "tools", "tool_choice", "thinking", + "output_config", "service_tier", ]; @@ -701,6 +702,41 @@ pub fn adjust_thinking_budget(req: &Value) -> Value { req.clone() } +/// Maps a legacy `thinking.budget_tokens` value to an `output_config.effort` +/// level accepted by adaptive-thinking models. Thresholds approximate the +/// previous budget tiers: up to ~8k tokens is treated as low effort, up to ~24k +/// as medium, and anything larger as high. +fn effort_for_budget(budget: u64) -> &'static str { + match budget { + 0..=8_191 => "low", + 8_192..=24_575 => "medium", + _ => "high", + } +} + +/// Rewrites a legacy `thinking: {type: "enabled", budget_tokens: N}` block into +/// the adaptive form required by newer models such as `claude-opus-4.8`: +/// `thinking: {type: "adaptive"}` plus `output_config: {effort: ...}`, where the +/// effort level is derived from the original token budget. +/// +/// Returns `None` when there is no enabled-style thinking block to transform, so +/// callers can leave requests for models that still accept `enabled` untouched. +pub fn adapt_thinking_to_adaptive(req: &Value) -> Option { + let thinking = req.get("thinking")?; + if thinking.get("type").and_then(|t| t.as_str()) != Some("enabled") { + return None; + } + let budget = thinking + .get("budget_tokens") + .and_then(|b| b.as_u64()) + .unwrap_or(0); + let effort = effort_for_budget(budget); + let mut out = req.clone(); + out["thinking"] = json!({ "type": "adaptive" }); + out["output_config"] = json!({ "effort": effort }); + Some(out) +} + /// Applies `system_prompt_add` / `system_prompt_remove` to a direct Anthropic /// request, and strips the `x-anthropic-billing-header` marker text. pub fn apply_system_prompt(req: &Value, cfg: &Config) -> Value { @@ -925,4 +961,47 @@ mod tests { assert!(out.get("foo").is_none()); assert_eq!(out["model"], "m"); } + + #[test] + fn sanitize_keeps_output_config() { + let req = json!({"model": "m", "messages": [], "output_config": {"effort": "high"}}); + let out = sanitize_anthropic_request(&req); + assert_eq!(out["output_config"]["effort"], "high"); + } + + #[test] + fn adapt_thinking_rewrites_enabled_to_adaptive() { + let req = json!({ + "model": "claude-opus-4.8", + "thinking": {"type": "enabled", "budget_tokens": 16000}, + "messages": [] + }); + let out = adapt_thinking_to_adaptive(&req).expect("should transform"); + assert_eq!(out["thinking"]["type"], "adaptive"); + assert!(out["thinking"].get("budget_tokens").is_none()); + assert_eq!(out["output_config"]["effort"], "medium"); + } + + #[test] + fn adapt_thinking_effort_thresholds() { + let low = json!({"thinking": {"type": "enabled", "budget_tokens": 4000}}); + assert_eq!( + adapt_thinking_to_adaptive(&low).unwrap()["output_config"]["effort"], + "low" + ); + let high = json!({"thinking": {"type": "enabled", "budget_tokens": 32000}}); + assert_eq!( + adapt_thinking_to_adaptive(&high).unwrap()["output_config"]["effort"], + "high" + ); + } + + #[test] + fn adapt_thinking_ignores_non_enabled() { + // No thinking block at all. + assert!(adapt_thinking_to_adaptive(&json!({"model": "m"})).is_none()); + // Already adaptive. + let adaptive = json!({"thinking": {"type": "adaptive"}}); + assert!(adapt_thinking_to_adaptive(&adaptive).is_none()); + } } diff --git a/src/config.rs b/src/config.rs index 67be46b..cd49c1f 100644 --- a/src/config.rs +++ b/src/config.rs @@ -6,11 +6,17 @@ use std::collections::BTreeMap; use std::path::PathBuf; /// Default VS Code version string sent in upstream request headers. -pub const VSCODE_VERSION: &str = "1.93.0"; -/// Default GitHub Copilot API version header value. -pub const API_VERSION: &str = "2025-04-01"; -/// Default Copilot Chat plugin version string. -pub const COPILOT_VERSION: &str = "0.26.7"; +/// +/// Kept in sync with the `engines.vscode` baseline of the latest +/// `microsoft/vscode-copilot-chat` release (see "Mimicking the Copilot client" +/// in the README for how to refresh these values). +pub const VSCODE_VERSION: &str = "1.115.0"; +/// Default GitHub Copilot API version header value (`X-GitHub-Api-Version`), +/// matching the value sent by the latest Copilot Chat client. +pub const API_VERSION: &str = "2025-05-01"; +/// Default Copilot Chat plugin version string, matching the `version` field of +/// the latest `microsoft/vscode-copilot-chat` release. +pub const COPILOT_VERSION: &str = "0.44.0"; /// Default model name that Claude "opus"/"sonnet" requests are mapped to. pub const DEFAULT_OPUS: &str = "claude-opus-4.7-1m"; diff --git a/src/server.rs b/src/server.rs index b041218..6d8b526 100644 --- a/src/server.rs +++ b/src/server.rs @@ -434,6 +434,7 @@ async fn messages_direct( let is_stream = req.get("stream").and_then(|s| s.as_bool()).unwrap_or(false); let mut current = req.clone(); + let mut thinking_adapted = false; for _ in 0..4 { let mut sanitized = anthropic::sanitize_anthropic_request(¤t); sanitized = anthropic::adjust_thinking_budget(&sanitized); @@ -441,11 +442,38 @@ async fn messages_direct( let payload = serde_json::to_vec(&sanitized).unwrap_or_default(); if is_stream { + let upstream = state + .http + .post(&url) + .headers(headers.clone()) + .body(payload) + .send() + .await; + let upstream = match upstream { + Ok(r) => r, + Err(e) => return anthropic_error(StatusCode::GATEWAY_TIMEOUT, e.to_string()), + }; + let status = upstream.status(); + // Inspect 400 responses so we can transparently recover from the + // adaptive-thinking migration before committing to the SSE stream. + if status == StatusCode::BAD_REQUEST { + let text = upstream.text().await.unwrap_or_default(); + log_error("/v1/messages", ¤t, &text, status.as_u16()); + if !thinking_adapted + && util::is_thinking_enabled_unsupported_error(status.as_u16(), &text) + { + if let Some(adapted) = anthropic::adapt_thinking_to_adaptive(¤t) { + tracing::info!("[Direct Anthropic] adapting thinking to adaptive format"); + current = adapted; + thinking_adapted = true; + continue; + } + } + return passthrough_error(status, text); + } return stream_anthropic_direct( state.clone(), - &url, - headers.clone(), - payload, + upstream, original_model, translated, req_size, @@ -500,6 +528,15 @@ async fn messages_direct( } } } + if !thinking_adapted && util::is_thinking_enabled_unsupported_error(status.as_u16(), &text) + { + if let Some(adapted) = anthropic::adapt_thinking_to_adaptive(¤t) { + tracing::info!("[Direct Anthropic] adapting thinking to adaptive format"); + current = adapted; + thinking_adapted = true; + continue; + } + } return passthrough_error(status, text); } anthropic_error(StatusCode::BAD_GATEWAY, "Exhausted retries".into()) @@ -868,28 +905,14 @@ async fn stream_responses( } /// Streams a direct Anthropic SSE response back to the client verbatim. -#[allow(clippy::too_many_arguments)] async fn stream_anthropic_direct( state: SharedState, - url: &str, - headers: HeaderMap, - payload: Vec, + upstream: reqwest::Response, original_model: String, translated: String, req_size: usize, start: Instant, ) -> Response { - let upstream = state - .http - .post(url) - .headers(headers) - .body(payload) - .send() - .await; - let upstream = match upstream { - Ok(r) => r, - Err(e) => return anthropic_error(StatusCode::GATEWAY_TIMEOUT, e.to_string()), - }; let status = upstream.status().as_u16(); let stream = async_stream::stream! { use futures_util::StreamExt; diff --git a/src/state.rs b/src/state.rs index b6c6db2..fe92f98 100644 --- a/src/state.rs +++ b/src/state.rs @@ -113,7 +113,17 @@ impl AppState { HeaderValue::from_static("conversation-panel"), ); insert(&mut h, "X-GitHub-Api-Version", &self.config.api_version); - insert(&mut h, "X-Request-Id", &uuid::Uuid::new_v4().to_string()); + // The latest Copilot client mirrors the request intent in the + // `X-Interaction-Type` header for non-subagent/background requests. + h.insert( + "X-Interaction-Type", + HeaderValue::from_static("conversation-panel"), + ); + // A single request id is shared between `X-Request-Id` and + // `X-Agent-Task-Id`, matching the latest Copilot client behavior. + let request_id = uuid::Uuid::new_v4().to_string(); + insert(&mut h, "X-Request-Id", &request_id); + insert(&mut h, "X-Agent-Task-Id", &request_id); h.insert( "X-VSCode-User-Agent-Library-Version", HeaderValue::from_static("electron-fetch"), diff --git a/src/util.rs b/src/util.rs index fb8952a..faaf97c 100644 --- a/src/util.rs +++ b/src/util.rs @@ -11,6 +11,13 @@ pub fn is_orphaned_tool_error(status: u16, body: &str) -> bool { status == 400 && body.contains("tool_use_id") && body.contains("tool_result") } +/// Detects the upstream 400 error returned by models that no longer accept +/// `thinking.type: "enabled"` and instead require the adaptive thinking format +/// (`thinking.type: "adaptive"` plus `output_config.effort`). +pub fn is_thinking_enabled_unsupported_error(status: u16, body: &str) -> bool { + status == 400 && body.contains("thinking.type.enabled") && body.contains("adaptive") +} + /// Extracts orphaned tool-use ids referenced in an error message. pub fn extract_orphaned_ids(body: &str) -> Vec { let mut ids: Vec = Vec::new(); @@ -123,3 +130,22 @@ pub async fn post_with_retry( } } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn detects_thinking_enabled_unsupported_error() { + let body = "\"thinking.type.enabled\" is not supported for this model. \ + Use \"thinking.type.adaptive\" and \"output_config.effort\" to control thinking behavior."; + assert!(is_thinking_enabled_unsupported_error(400, body)); + // Wrong status code. + assert!(!is_thinking_enabled_unsupported_error(200, body)); + // Unrelated 400 error. + assert!(!is_thinking_enabled_unsupported_error( + 400, + "some other validation error" + )); + } +} diff --git a/tests/integration.rs b/tests/integration.rs index 9f2cd58..d280b0f 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -135,3 +135,43 @@ async fn router_builds_and_serves_dashboard() { .unwrap(); assert_eq!(resp.status(), StatusCode::NOT_FOUND); } + +#[tokio::test] +async fn copilot_headers_mimic_latest_client() { + let cfg = config::Config::default(); + let state = ghc_proxy::state::AppState::new(cfg, "dummy-token".into()); + + let h = state.copilot_headers(false).await; + + // Identity headers the upstream Copilot API expects from the VS Code client. + assert_eq!(h.get("Copilot-Integration-Id").unwrap(), "vscode-chat"); + assert_eq!( + h.get("Editor-Version").unwrap(), + &format!("vscode/{}", config::VSCODE_VERSION) + ); + assert_eq!( + h.get("Editor-Plugin-Version").unwrap(), + &format!("copilot-chat/{}", config::COPILOT_VERSION) + ); + assert_eq!( + h.get("User-Agent").unwrap(), + &format!("GitHubCopilotChat/{}", config::COPILOT_VERSION) + ); + assert_eq!(h.get("X-GitHub-Api-Version").unwrap(), config::API_VERSION); + assert_eq!(h.get("OpenAI-Intent").unwrap(), "conversation-panel"); + assert_eq!(h.get("X-Interaction-Type").unwrap(), "conversation-panel"); + assert_eq!( + h.get("X-VSCode-User-Agent-Library-Version").unwrap(), + "electron-fetch" + ); + + // X-Request-Id and X-Agent-Task-Id must be present and share the same value. + let request_id = h.get("X-Request-Id").unwrap(); + assert_eq!(h.get("X-Agent-Task-Id").unwrap(), request_id); + assert!(!request_id.is_empty()); + + // Vision header is only present when requested. + assert!(h.get("Copilot-Vision-Request").is_none()); + let hv = state.copilot_headers(true).await; + assert_eq!(hv.get("Copilot-Vision-Request").unwrap(), "true"); +}