MartinForReal · MartinForReal · Jun 3, 2026 · Jun 3, 2026 · Jun 3, 2026 · Jun 3, 2026
diff --git a/README.md b/README.md
@@ -77,9 +77,9 @@ address: 127.0.0.1
 port: 8314
 debug: false
 account_type: individual            # individual | business | enterprise
-vscode_version: "1.93.0"
-api_version: "2025-04-01"
-copilot_version: "0.26.7"
+vscode_version: "1.115.0"
+api_version: "2025-05-01"
+copilot_version: "0.44.0"
 model_mappings:
   exact:
     opus: claude-opus-4.7-1m
@@ -164,6 +164,32 @@ cargo clippy     # lint
 | `src/server.rs` | Axum router and all HTTP handlers |
 | `src/store.rs` | In-memory request store for the dashboard |
 
+## Mimicking the Copilot Client
+
+The proxy authenticates to GitHub Copilot by impersonating the official
+**VS Code Copilot Chat** client. To do this faithfully it sends the same
+identity headers that the real client sends to `api.githubcopilot.com`
+(`Editor-Version`, `Editor-Plugin-Version`, `User-Agent`,
+`Copilot-Integration-Id`, `OpenAI-Intent`, `X-Interaction-Type`,
+`X-GitHub-Api-Version`, etc.). These are built in
+`AppState::copilot_headers` / `github_headers` (`src/state.rs`) from the
+version strings in `src/config.rs`.
+
+GitHub may reject requests that report stale client versions, so these values
+occasionally need refreshing. The source of truth is the now open-source
+[`microsoft/vscode-copilot-chat`](https://github.com/microsoft/vscode-copilot-chat)
+repository:
+
+| Config value | Where to read it |
+|--------------|------------------|
+| `copilot_version` | `version` field in the extension's `package.json` |
+| `vscode_version` | `engines.vscode` baseline in `package.json` |
+| `api_version` | `X-GitHub-Api-Version` constant in `src/platform/networking/common/networking.ts` |
+
+After updating the constants in `src/config.rs`, run the test suite (the header
+test in `tests/integration.rs` guards the expected header set) and bump the
+example values in this README.
+
 ## Notes on Parity with `ghc-tunnel`
 
 This Rust port focuses on the **core proxy behavior**: authentication, token

diff --git a/src/anthropic.rs b/src/anthropic.rs
@@ -628,6 +628,7 @@ const ALLOWED_ANTHROPIC_KEYS: &[&str] = &[
     "tools",
     "tool_choice",
     "thinking",
+    "output_config",
     "service_tier",
 ];
 
@@ -701,6 +702,41 @@ pub fn adjust_thinking_budget(req: &Value) -> Value {
     req.clone()
 }
 
+/// Maps a legacy `thinking.budget_tokens` value to an `output_config.effort`
+/// level accepted by adaptive-thinking models. Thresholds approximate the
+/// previous budget tiers: up to ~8k tokens is treated as low effort, up to ~24k
+/// as medium, and anything larger as high.
+fn effort_for_budget(budget: u64) -> &'static str {
+    match budget {
+        0..=8_191 => "low",
+        8_192..=24_575 => "medium",
+        _ => "high",
+    }
+}
+
+/// Rewrites a legacy `thinking: {type: "enabled", budget_tokens: N}` block into
+/// the adaptive form required by newer models such as `claude-opus-4.8`:
+/// `thinking: {type: "adaptive"}` plus `output_config: {effort: ...}`, where the
+/// effort level is derived from the original token budget.
+///
+/// Returns `None` when there is no enabled-style thinking block to transform, so
+/// callers can leave requests for models that still accept `enabled` untouched.
+pub fn adapt_thinking_to_adaptive(req: &Value) -> Option<Value> {
+    let thinking = req.get("thinking")?;
+    if thinking.get("type").and_then(|t| t.as_str()) != Some("enabled") {
+        return None;
+    }
+    let budget = thinking
+        .get("budget_tokens")
+        .and_then(|b| b.as_u64())
+        .unwrap_or(0);
+    let effort = effort_for_budget(budget);
+    let mut out = req.clone();
+    out["thinking"] = json!({ "type": "adaptive" });
+    out["output_config"] = json!({ "effort": effort });
+    Some(out)
+}
+
 /// Applies `system_prompt_add` / `system_prompt_remove` to a direct Anthropic
 /// request, and strips the `x-anthropic-billing-header` marker text.
 pub fn apply_system_prompt(req: &Value, cfg: &Config) -> Value {
@@ -925,4 +961,47 @@ mod tests {
         assert!(out.get("foo").is_none());
         assert_eq!(out["model"], "m");
     }
+
+    #[test]
+    fn sanitize_keeps_output_config() {
+        let req = json!({"model": "m", "messages": [], "output_config": {"effort": "high"}});
+        let out = sanitize_anthropic_request(&req);
+        assert_eq!(out["output_config"]["effort"], "high");
+    }
+
+    #[test]
+    fn adapt_thinking_rewrites_enabled_to_adaptive() {
+        let req = json!({
+            "model": "claude-opus-4.8",
+            "thinking": {"type": "enabled", "budget_tokens": 16000},
+            "messages": []
+        });
+        let out = adapt_thinking_to_adaptive(&req).expect("should transform");
+        assert_eq!(out["thinking"]["type"], "adaptive");
+        assert!(out["thinking"].get("budget_tokens").is_none());
+        assert_eq!(out["output_config"]["effort"], "medium");
+    }
+
+    #[test]
+    fn adapt_thinking_effort_thresholds() {
+        let low = json!({"thinking": {"type": "enabled", "budget_tokens": 4000}});
+        assert_eq!(
+            adapt_thinking_to_adaptive(&low).unwrap()["output_config"]["effort"],
+            "low"
+        );
+        let high = json!({"thinking": {"type": "enabled", "budget_tokens": 32000}});
+        assert_eq!(
+            adapt_thinking_to_adaptive(&high).unwrap()["output_config"]["effort"],
+            "high"
+        );
+    }
+
+    #[test]
+    fn adapt_thinking_ignores_non_enabled() {
+        // No thinking block at all.
+        assert!(adapt_thinking_to_adaptive(&json!({"model": "m"})).is_none());
+        // Already adaptive.
+        let adaptive = json!({"thinking": {"type": "adaptive"}});
+        assert!(adapt_thinking_to_adaptive(&adaptive).is_none());
+    }
 }
diff --git a/src/config.rs b/src/config.rs
@@ -6,11 +6,17 @@ use std::collections::BTreeMap;
 use std::path::PathBuf;
 
 /// Default VS Code version string sent in upstream request headers.
-pub const VSCODE_VERSION: &str = "1.93.0";
-/// Default GitHub Copilot API version header value.
-pub const API_VERSION: &str = "2025-04-01";
-/// Default Copilot Chat plugin version string.
-pub const COPILOT_VERSION: &str = "0.26.7";
+///
+/// Kept in sync with the `engines.vscode` baseline of the latest
+/// `microsoft/vscode-copilot-chat` release (see "Mimicking the Copilot client"
+/// in the README for how to refresh these values).
+pub const VSCODE_VERSION: &str = "1.115.0";
+/// Default GitHub Copilot API version header value (`X-GitHub-Api-Version`),
+/// matching the value sent by the latest Copilot Chat client.
+pub const API_VERSION: &str = "2025-05-01";
+/// Default Copilot Chat plugin version string, matching the `version` field of
+/// the latest `microsoft/vscode-copilot-chat` release.
+pub const COPILOT_VERSION: &str = "0.44.0";
 
 /// Default model name that Claude "opus"/"sonnet" requests are mapped to.
 pub const DEFAULT_OPUS: &str = "claude-opus-4.7-1m";

diff --git a/src/server.rs b/src/server.rs
@@ -434,18 +434,46 @@ async fn messages_direct(
     let is_stream = req.get("stream").and_then(|s| s.as_bool()).unwrap_or(false);
 
     let mut current = req.clone();
+    let mut thinking_adapted = false;
     for _ in 0..4 {
         let mut sanitized = anthropic::sanitize_anthropic_request(&current);
         sanitized = anthropic::adjust_thinking_budget(&sanitized);
         let req_size = serde_json::to_vec(&current).map(|v| v.len()).unwrap_or(0);
         let payload = serde_json::to_vec(&sanitized).unwrap_or_default();
 
         if is_stream {
+            let upstream = state
+                .http
+                .post(&url)
+                .headers(headers.clone())
+                .body(payload)
+                .send()
+                .await;
+            let upstream = match upstream {
+                Ok(r) => r,
+                Err(e) => return anthropic_error(StatusCode::GATEWAY_TIMEOUT, e.to_string()),
+            };
+            let status = upstream.status();
+            // Inspect 400 responses so we can transparently recover from the
+            // adaptive-thinking migration before committing to the SSE stream.
+            if status == StatusCode::BAD_REQUEST {
+                let text = upstream.text().await.unwrap_or_default();
+                log_error("/v1/messages", &current, &text, status.as_u16());
+                if !thinking_adapted
+                    && util::is_thinking_enabled_unsupported_error(status.as_u16(), &text)
+                {
+                    if let Some(adapted) = anthropic::adapt_thinking_to_adaptive(&current) {
+                        tracing::info!("[Direct Anthropic] adapting thinking to adaptive format");
+                        current = adapted;
+                        thinking_adapted = true;
+                        continue;
+                    }
+                }
+                return passthrough_error(status, text);
+            }
             return stream_anthropic_direct(
                 state.clone(),
-                &url,
-                headers.clone(),
-                payload,
+                upstream,
                 original_model,
                 translated,
                 req_size,
@@ -500,6 +528,15 @@ async fn messages_direct(
                 }
             }
         }
+        if !thinking_adapted && util::is_thinking_enabled_unsupported_error(status.as_u16(), &text)
+        {
+            if let Some(adapted) = anthropic::adapt_thinking_to_adaptive(&current) {
+                tracing::info!("[Direct Anthropic] adapting thinking to adaptive format");
+                current = adapted;
+                thinking_adapted = true;
+                continue;
+            }
+        }
         return passthrough_error(status, text);
     }
     anthropic_error(StatusCode::BAD_GATEWAY, "Exhausted retries".into())
@@ -868,28 +905,14 @@ async fn stream_responses(
 }
 
 /// Streams a direct Anthropic SSE response back to the client verbatim.
-#[allow(clippy::too_many_arguments)]
 async fn stream_anthropic_direct(
     state: SharedState,
-    url: &str,
-    headers: HeaderMap,
-    payload: Vec<u8>,
+    upstream: reqwest::Response,
     original_model: String,
     translated: String,
     req_size: usize,
     start: Instant,
 ) -> Response {
-    let upstream = state
-        .http
-        .post(url)
-        .headers(headers)
-        .body(payload)
-        .send()
-        .await;
-    let upstream = match upstream {
-        Ok(r) => r,
-        Err(e) => return anthropic_error(StatusCode::GATEWAY_TIMEOUT, e.to_string()),
-    };
     let status = upstream.status().as_u16();
     let stream = async_stream::stream! {
         use futures_util::StreamExt;

diff --git a/src/state.rs b/src/state.rs
@@ -113,7 +113,17 @@ impl AppState {
             HeaderValue::from_static("conversation-panel"),
         );
         insert(&mut h, "X-GitHub-Api-Version", &self.config.api_version);
-        insert(&mut h, "X-Request-Id", &uuid::Uuid::new_v4().to_string());
+        // The latest Copilot client mirrors the request intent in the
+        // `X-Interaction-Type` header for non-subagent/background requests.
+        h.insert(
+            "X-Interaction-Type",
+            HeaderValue::from_static("conversation-panel"),
+        );
+        // A single request id is shared between `X-Request-Id` and
+        // `X-Agent-Task-Id`, matching the latest Copilot client behavior.
+        let request_id = uuid::Uuid::new_v4().to_string();
+        insert(&mut h, "X-Request-Id", &request_id);
+        insert(&mut h, "X-Agent-Task-Id", &request_id);
         h.insert(
             "X-VSCode-User-Agent-Library-Version",
             HeaderValue::from_static("electron-fetch"),

diff --git a/src/util.rs b/src/util.rs
@@ -11,6 +11,13 @@ pub fn is_orphaned_tool_error(status: u16, body: &str) -> bool {
     status == 400 && body.contains("tool_use_id") && body.contains("tool_result")
 }
 
+/// Detects the upstream 400 error returned by models that no longer accept
+/// `thinking.type: "enabled"` and instead require the adaptive thinking format
+/// (`thinking.type: "adaptive"` plus `output_config.effort`).
+pub fn is_thinking_enabled_unsupported_error(status: u16, body: &str) -> bool {
+    status == 400 && body.contains("thinking.type.enabled") && body.contains("adaptive")
+}
+
 /// Extracts orphaned tool-use ids referenced in an error message.
 pub fn extract_orphaned_ids(body: &str) -> Vec<String> {
     let mut ids: Vec<String> = Vec::new();
@@ -123,3 +130,22 @@ pub async fn post_with_retry(
         }
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn detects_thinking_enabled_unsupported_error() {
+        let body = "\"thinking.type.enabled\" is not supported for this model. \
+            Use \"thinking.type.adaptive\" and \"output_config.effort\" to control thinking behavior.";
+        assert!(is_thinking_enabled_unsupported_error(400, body));
+        // Wrong status code.
+        assert!(!is_thinking_enabled_unsupported_error(200, body));
+        // Unrelated 400 error.
+        assert!(!is_thinking_enabled_unsupported_error(
+            400,
+            "some other validation error"
+        ));
+    }
+}
diff --git a/tests/integration.rs b/tests/integration.rs
@@ -135,3 +135,43 @@ async fn router_builds_and_serves_dashboard() {
         .unwrap();
     assert_eq!(resp.status(), StatusCode::NOT_FOUND);
 }
+
+#[tokio::test]
+async fn copilot_headers_mimic_latest_client() {
+    let cfg = config::Config::default();
+    let state = ghc_proxy::state::AppState::new(cfg, "dummy-token".into());
+
+    let h = state.copilot_headers(false).await;
+
+    // Identity headers the upstream Copilot API expects from the VS Code client.
+    assert_eq!(h.get("Copilot-Integration-Id").unwrap(), "vscode-chat");
+    assert_eq!(
+        h.get("Editor-Version").unwrap(),
+        &format!("vscode/{}", config::VSCODE_VERSION)
+    );
+    assert_eq!(
+        h.get("Editor-Plugin-Version").unwrap(),
+        &format!("copilot-chat/{}", config::COPILOT_VERSION)
+    );
+    assert_eq!(
+        h.get("User-Agent").unwrap(),
+        &format!("GitHubCopilotChat/{}", config::COPILOT_VERSION)
+    );
+    assert_eq!(h.get("X-GitHub-Api-Version").unwrap(), config::API_VERSION);
+    assert_eq!(h.get("OpenAI-Intent").unwrap(), "conversation-panel");
+    assert_eq!(h.get("X-Interaction-Type").unwrap(), "conversation-panel");
+    assert_eq!(
+        h.get("X-VSCode-User-Agent-Library-Version").unwrap(),
+        "electron-fetch"
+    );
+
+    // X-Request-Id and X-Agent-Task-Id must be present and share the same value.
+    let request_id = h.get("X-Request-Id").unwrap();
+    assert_eq!(h.get("X-Agent-Task-Id").unwrap(), request_id);
+    assert!(!request_id.is_empty());
+
+    // Vision header is only present when requested.
+    assert!(h.get("Copilot-Vision-Request").is_none());
+    let hv = state.copilot_headers(true).await;
+    assert_eq!(hv.get("Copilot-Vision-Request").unwrap(), "true");
+}