Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 29 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,9 @@ address: 127.0.0.1
port: 8314
debug: false
account_type: individual # individual | business | enterprise
vscode_version: "1.93.0"
api_version: "2025-04-01"
copilot_version: "0.26.7"
vscode_version: "1.115.0"
api_version: "2025-05-01"
copilot_version: "0.44.0"
model_mappings:
exact:
opus: claude-opus-4.7-1m
Expand Down Expand Up @@ -164,6 +164,32 @@ cargo clippy # lint
| `src/server.rs` | Axum router and all HTTP handlers |
| `src/store.rs` | In-memory request store for the dashboard |

## Mimicking the Copilot Client

The proxy authenticates to GitHub Copilot by impersonating the official
**VS Code Copilot Chat** client. To do this faithfully it sends the same
identity headers that the real client sends to `api.githubcopilot.com`
(`Editor-Version`, `Editor-Plugin-Version`, `User-Agent`,
`Copilot-Integration-Id`, `OpenAI-Intent`, `X-Interaction-Type`,
`X-GitHub-Api-Version`, etc.). These are built in
`AppState::copilot_headers` / `github_headers` (`src/state.rs`) from the
version strings in `src/config.rs`.

GitHub may reject requests that report stale client versions, so these values
occasionally need refreshing. The source of truth is the now open-source
[`microsoft/vscode-copilot-chat`](https://github.com/microsoft/vscode-copilot-chat)
repository:

| Config value | Where to read it |
|--------------|------------------|
| `copilot_version` | `version` field in the extension's `package.json` |
| `vscode_version` | `engines.vscode` baseline in `package.json` |
| `api_version` | `X-GitHub-Api-Version` constant in `src/platform/networking/common/networking.ts` |

After updating the constants in `src/config.rs`, run the test suite (the header
test in `tests/integration.rs` guards the expected header set) and bump the
example values in this README.

## Notes on Parity with `ghc-tunnel`

This Rust port focuses on the **core proxy behavior**: authentication, token
Expand Down
79 changes: 79 additions & 0 deletions src/anthropic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -628,6 +628,7 @@ const ALLOWED_ANTHROPIC_KEYS: &[&str] = &[
"tools",
"tool_choice",
"thinking",
"output_config",
"service_tier",
];

Expand Down Expand Up @@ -701,6 +702,41 @@ pub fn adjust_thinking_budget(req: &Value) -> Value {
req.clone()
}

/// Maps a legacy `thinking.budget_tokens` value to an `output_config.effort`
/// level accepted by adaptive-thinking models. Thresholds approximate the
/// previous budget tiers: up to ~8k tokens is treated as low effort, up to ~24k
/// as medium, and anything larger as high.
fn effort_for_budget(budget: u64) -> &'static str {
match budget {
0..=8_191 => "low",
8_192..=24_575 => "medium",
_ => "high",
}
}

/// Rewrites a legacy `thinking: {type: "enabled", budget_tokens: N}` block into
/// the adaptive form required by newer models such as `claude-opus-4.8`:
/// `thinking: {type: "adaptive"}` plus `output_config: {effort: ...}`, where the
/// effort level is derived from the original token budget.
///
/// Returns `None` when there is no enabled-style thinking block to transform, so
/// callers can leave requests for models that still accept `enabled` untouched.
pub fn adapt_thinking_to_adaptive(req: &Value) -> Option<Value> {
let thinking = req.get("thinking")?;
if thinking.get("type").and_then(|t| t.as_str()) != Some("enabled") {
return None;
}
let budget = thinking
.get("budget_tokens")
.and_then(|b| b.as_u64())
.unwrap_or(0);
let effort = effort_for_budget(budget);
let mut out = req.clone();
out["thinking"] = json!({ "type": "adaptive" });
out["output_config"] = json!({ "effort": effort });
Some(out)
}

/// Applies `system_prompt_add` / `system_prompt_remove` to a direct Anthropic
/// request, and strips the `x-anthropic-billing-header` marker text.
pub fn apply_system_prompt(req: &Value, cfg: &Config) -> Value {
Expand Down Expand Up @@ -925,4 +961,47 @@ mod tests {
assert!(out.get("foo").is_none());
assert_eq!(out["model"], "m");
}

#[test]
fn sanitize_keeps_output_config() {
let req = json!({"model": "m", "messages": [], "output_config": {"effort": "high"}});
let out = sanitize_anthropic_request(&req);
assert_eq!(out["output_config"]["effort"], "high");
}

#[test]
fn adapt_thinking_rewrites_enabled_to_adaptive() {
let req = json!({
"model": "claude-opus-4.8",
"thinking": {"type": "enabled", "budget_tokens": 16000},
"messages": []
});
let out = adapt_thinking_to_adaptive(&req).expect("should transform");
assert_eq!(out["thinking"]["type"], "adaptive");
assert!(out["thinking"].get("budget_tokens").is_none());
assert_eq!(out["output_config"]["effort"], "medium");
}

#[test]
fn adapt_thinking_effort_thresholds() {
let low = json!({"thinking": {"type": "enabled", "budget_tokens": 4000}});
assert_eq!(
adapt_thinking_to_adaptive(&low).unwrap()["output_config"]["effort"],
"low"
);
let high = json!({"thinking": {"type": "enabled", "budget_tokens": 32000}});
assert_eq!(
adapt_thinking_to_adaptive(&high).unwrap()["output_config"]["effort"],
"high"
);
}

#[test]
fn adapt_thinking_ignores_non_enabled() {
// No thinking block at all.
assert!(adapt_thinking_to_adaptive(&json!({"model": "m"})).is_none());
// Already adaptive.
let adaptive = json!({"thinking": {"type": "adaptive"}});
assert!(adapt_thinking_to_adaptive(&adaptive).is_none());
}
}
16 changes: 11 additions & 5 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,17 @@ use std::collections::BTreeMap;
use std::path::PathBuf;

/// Default VS Code version string sent in upstream request headers.
pub const VSCODE_VERSION: &str = "1.93.0";
/// Default GitHub Copilot API version header value.
pub const API_VERSION: &str = "2025-04-01";
/// Default Copilot Chat plugin version string.
pub const COPILOT_VERSION: &str = "0.26.7";
///
/// Kept in sync with the `engines.vscode` baseline of the latest
/// `microsoft/vscode-copilot-chat` release (see "Mimicking the Copilot client"
/// in the README for how to refresh these values).
pub const VSCODE_VERSION: &str = "1.115.0";
/// Default GitHub Copilot API version header value (`X-GitHub-Api-Version`),
/// matching the value sent by the latest Copilot Chat client.
pub const API_VERSION: &str = "2025-05-01";
/// Default Copilot Chat plugin version string, matching the `version` field of
/// the latest `microsoft/vscode-copilot-chat` release.
pub const COPILOT_VERSION: &str = "0.44.0";

/// Default model name that Claude "opus"/"sonnet" requests are mapped to.
pub const DEFAULT_OPUS: &str = "claude-opus-4.7-1m";
Expand Down
59 changes: 41 additions & 18 deletions src/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -434,18 +434,46 @@ async fn messages_direct(
let is_stream = req.get("stream").and_then(|s| s.as_bool()).unwrap_or(false);

let mut current = req.clone();
let mut thinking_adapted = false;
for _ in 0..4 {
let mut sanitized = anthropic::sanitize_anthropic_request(&current);
sanitized = anthropic::adjust_thinking_budget(&sanitized);
let req_size = serde_json::to_vec(&current).map(|v| v.len()).unwrap_or(0);
let payload = serde_json::to_vec(&sanitized).unwrap_or_default();

if is_stream {
let upstream = state
.http
.post(&url)
.headers(headers.clone())
.body(payload)
.send()
.await;
let upstream = match upstream {
Ok(r) => r,
Err(e) => return anthropic_error(StatusCode::GATEWAY_TIMEOUT, e.to_string()),
};
let status = upstream.status();
// Inspect 400 responses so we can transparently recover from the
// adaptive-thinking migration before committing to the SSE stream.
if status == StatusCode::BAD_REQUEST {
let text = upstream.text().await.unwrap_or_default();
log_error("/v1/messages", &current, &text, status.as_u16());
if !thinking_adapted
&& util::is_thinking_enabled_unsupported_error(status.as_u16(), &text)
{
if let Some(adapted) = anthropic::adapt_thinking_to_adaptive(&current) {
tracing::info!("[Direct Anthropic] adapting thinking to adaptive format");
current = adapted;
thinking_adapted = true;
continue;
}
}
return passthrough_error(status, text);
}
return stream_anthropic_direct(
state.clone(),
&url,
headers.clone(),
payload,
upstream,
original_model,
translated,
req_size,
Expand Down Expand Up @@ -500,6 +528,15 @@ async fn messages_direct(
}
}
}
if !thinking_adapted && util::is_thinking_enabled_unsupported_error(status.as_u16(), &text)
{
if let Some(adapted) = anthropic::adapt_thinking_to_adaptive(&current) {
tracing::info!("[Direct Anthropic] adapting thinking to adaptive format");
current = adapted;
thinking_adapted = true;
continue;
}
}
return passthrough_error(status, text);
}
anthropic_error(StatusCode::BAD_GATEWAY, "Exhausted retries".into())
Expand Down Expand Up @@ -868,28 +905,14 @@ async fn stream_responses(
}

/// Streams a direct Anthropic SSE response back to the client verbatim.
#[allow(clippy::too_many_arguments)]
async fn stream_anthropic_direct(
state: SharedState,
url: &str,
headers: HeaderMap,
payload: Vec<u8>,
upstream: reqwest::Response,
original_model: String,
translated: String,
req_size: usize,
start: Instant,
) -> Response {
let upstream = state
.http
.post(url)
.headers(headers)
.body(payload)
.send()
.await;
let upstream = match upstream {
Ok(r) => r,
Err(e) => return anthropic_error(StatusCode::GATEWAY_TIMEOUT, e.to_string()),
};
let status = upstream.status().as_u16();
let stream = async_stream::stream! {
use futures_util::StreamExt;
Expand Down
12 changes: 11 additions & 1 deletion src/state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,17 @@ impl AppState {
HeaderValue::from_static("conversation-panel"),
);
insert(&mut h, "X-GitHub-Api-Version", &self.config.api_version);
insert(&mut h, "X-Request-Id", &uuid::Uuid::new_v4().to_string());
// The latest Copilot client mirrors the request intent in the
// `X-Interaction-Type` header for non-subagent/background requests.
h.insert(
"X-Interaction-Type",
HeaderValue::from_static("conversation-panel"),
);
// A single request id is shared between `X-Request-Id` and
// `X-Agent-Task-Id`, matching the latest Copilot client behavior.
let request_id = uuid::Uuid::new_v4().to_string();
insert(&mut h, "X-Request-Id", &request_id);
insert(&mut h, "X-Agent-Task-Id", &request_id);
h.insert(
"X-VSCode-User-Agent-Library-Version",
HeaderValue::from_static("electron-fetch"),
Expand Down
26 changes: 26 additions & 0 deletions src/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,13 @@ pub fn is_orphaned_tool_error(status: u16, body: &str) -> bool {
status == 400 && body.contains("tool_use_id") && body.contains("tool_result")
}

/// Detects the upstream 400 error returned by models that no longer accept
/// `thinking.type: "enabled"` and instead require the adaptive thinking format
/// (`thinking.type: "adaptive"` plus `output_config.effort`).
pub fn is_thinking_enabled_unsupported_error(status: u16, body: &str) -> bool {
status == 400 && body.contains("thinking.type.enabled") && body.contains("adaptive")
}

/// Extracts orphaned tool-use ids referenced in an error message.
pub fn extract_orphaned_ids(body: &str) -> Vec<String> {
let mut ids: Vec<String> = Vec::new();
Expand Down Expand Up @@ -123,3 +130,22 @@ pub async fn post_with_retry(
}
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn detects_thinking_enabled_unsupported_error() {
let body = "\"thinking.type.enabled\" is not supported for this model. \
Use \"thinking.type.adaptive\" and \"output_config.effort\" to control thinking behavior.";
assert!(is_thinking_enabled_unsupported_error(400, body));
// Wrong status code.
assert!(!is_thinking_enabled_unsupported_error(200, body));
// Unrelated 400 error.
assert!(!is_thinking_enabled_unsupported_error(
400,
"some other validation error"
));
}
}
40 changes: 40 additions & 0 deletions tests/integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,3 +135,43 @@ async fn router_builds_and_serves_dashboard() {
.unwrap();
assert_eq!(resp.status(), StatusCode::NOT_FOUND);
}

#[tokio::test]
async fn copilot_headers_mimic_latest_client() {
let cfg = config::Config::default();
let state = ghc_proxy::state::AppState::new(cfg, "dummy-token".into());

let h = state.copilot_headers(false).await;

// Identity headers the upstream Copilot API expects from the VS Code client.
assert_eq!(h.get("Copilot-Integration-Id").unwrap(), "vscode-chat");
assert_eq!(
h.get("Editor-Version").unwrap(),
&format!("vscode/{}", config::VSCODE_VERSION)
);
assert_eq!(
h.get("Editor-Plugin-Version").unwrap(),
&format!("copilot-chat/{}", config::COPILOT_VERSION)
);
assert_eq!(
h.get("User-Agent").unwrap(),
&format!("GitHubCopilotChat/{}", config::COPILOT_VERSION)
);
assert_eq!(h.get("X-GitHub-Api-Version").unwrap(), config::API_VERSION);
assert_eq!(h.get("OpenAI-Intent").unwrap(), "conversation-panel");
assert_eq!(h.get("X-Interaction-Type").unwrap(), "conversation-panel");
assert_eq!(
h.get("X-VSCode-User-Agent-Library-Version").unwrap(),
"electron-fetch"
);

// X-Request-Id and X-Agent-Task-Id must be present and share the same value.
let request_id = h.get("X-Request-Id").unwrap();
assert_eq!(h.get("X-Agent-Task-Id").unwrap(), request_id);
assert!(!request_id.is_empty());

// Vision header is only present when requested.
assert!(h.get("Copilot-Vision-Request").is_none());
let hv = state.copilot_headers(true).await;
assert_eq!(hv.get("Copilot-Vision-Request").unwrap(), "true");
}