diff --git a/client.py b/client.py index 75471219..26a710f5 100644 --- a/client.py +++ b/client.py @@ -38,7 +38,7 @@ def convert_model_for_vertex(model: str) -> str: Vertex AI uses @ to separate model name from version (e.g., claude-sonnet-4-5@20250929) while the Anthropic API uses - (e.g., claude-sonnet-4-5-20250929). - Models without a date suffix (e.g., claude-opus-4-6) pass through unchanged. + Models without a date suffix (e.g., claude-opus-4-7) pass through unchanged. Args: model: Model name in Anthropic format (with hyphens) @@ -342,8 +342,10 @@ def create_client( # Uses get_effective_sdk_env() which reads provider settings from the database, # ensuring UI-configured alternative providers (GLM, Ollama, Kimi, Custom) propagate # correctly to the Claude CLI subprocess - from registry import get_effective_sdk_env + from registry import get_effective_sdk_env, get_effort_setting sdk_env = get_effective_sdk_env() + effort = get_effort_setting() + print(f" - Reasoning effort: {effort}") # Detect alternative API mode (Ollama, GLM, or Vertex AI) base_url = sdk_env.get("ANTHROPIC_BASE_URL", "") @@ -452,6 +454,9 @@ async def pre_compact_hook( return ClaudeSDKClient( options=ClaudeAgentOptions( model=model, + # SDK 0.1.61's effort Literal omits "xhigh" but the CLI's + # --effort flag accepts it; the SDK forwards the string unchanged. + effort=effort, # type: ignore[arg-type] cli_path=system_cli, # Use system CLI to avoid bundled Bun crash (exit code 3) system_prompt="You are an expert full-stack developer building a production-quality web application.", setting_sources=["project"], # Enable skills, commands, and CLAUDE.md from project dir diff --git a/registry.py b/registry.py index 37a2c851..6255d431 100644 --- a/registry.py +++ b/registry.py @@ -14,7 +14,7 @@ from contextlib import contextmanager from datetime import datetime from pathlib import Path -from typing import Any +from typing import Any, Literal, cast from sqlalchemy import Boolean, Column, DateTime, Integer, String, create_engine, text from sqlalchemy.orm import DeclarativeBase, sessionmaker @@ -46,14 +46,17 @@ def _migrate_registry_dir() -> None: # Available models with display names # To add a new model: add an entry here with {"id": "model-id", "name": "Display Name"} AVAILABLE_MODELS = [ - {"id": "claude-opus-4-6", "name": "Claude Opus"}, - {"id": "claude-sonnet-4-5-20250929", "name": "Claude Sonnet"}, + {"id": "claude-opus-4-7", "name": "Claude Opus"}, + {"id": "claude-sonnet-4-6", "name": "Claude Sonnet"}, ] # Map legacy model IDs to their current replacements. # Used by get_all_settings() to auto-migrate stale values on first read after upgrade. LEGACY_MODEL_MAP = { - "claude-opus-4-5-20251101": "claude-opus-4-6", + "claude-opus-4-5-20251101": "claude-opus-4-7", + "claude-opus-4-6": "claude-opus-4-7", + "claude-sonnet-4-5": "claude-sonnet-4-6", + "claude-sonnet-4-5-20250929": "claude-sonnet-4-6", } # List of valid model IDs (derived from AVAILABLE_MODELS) @@ -65,7 +68,15 @@ def _migrate_registry_dir() -> None: _env_default_model = os.getenv("ANTHROPIC_DEFAULT_OPUS_MODEL") if _env_default_model is not None: _env_default_model = _env_default_model.strip() -DEFAULT_MODEL = _env_default_model or "claude-opus-4-6" +# Auto-remap stale env-provided values (e.g. user's .env still pins 4.6) +if _env_default_model and _env_default_model in LEGACY_MODEL_MAP: + logging.getLogger(__name__).warning( + "ANTHROPIC_DEFAULT_OPUS_MODEL=%s is legacy; remapping to %s. " + "Update your .env to silence this warning.", + _env_default_model, LEGACY_MODEL_MAP[_env_default_model], + ) + _env_default_model = LEGACY_MODEL_MAP[_env_default_model] +DEFAULT_MODEL = _env_default_model or "claude-opus-4-7" # Ensure env-provided DEFAULT_MODEL is in VALID_MODELS for validation consistency # (idempotent: only adds if missing, doesn't alter AVAILABLE_MODELS semantics) @@ -671,6 +682,28 @@ def get_setting(key: str, default: str | None = None) -> str | None: return default +# Valid Claude Code reasoning/effort levels. Must match the CLI's --effort +# choices (low, medium, high, xhigh, max) — note: the SDK's Literal type at +# 0.1.61 omits "xhigh", but the string is forwarded to the CLI as-is and +# accepted there. +EffortLevel = Literal["low", "medium", "high", "xhigh", "max"] +VALID_EFFORT_LEVELS: tuple[EffortLevel, ...] = ("low", "medium", "high", "xhigh", "max") +DEFAULT_EFFORT: EffortLevel = "xhigh" + + +def get_effort_setting() -> EffortLevel: + """ + Read the global reasoning-effort setting, falling back to ``xhigh``. + + Unknown/invalid stored values are treated as missing so a DB corruption or + schema drift can't force the CLI into an unsupported mode. + """ + value = get_setting("effort") + if value in VALID_EFFORT_LEVELS: + return cast(EffortLevel, value) + return DEFAULT_EFFORT + + def set_setting(key: str, value: str) -> None: """ Set a setting value (creates or updates). @@ -699,7 +732,7 @@ def get_all_settings() -> dict[str, str]: """ Get all settings as a dictionary. - Automatically migrates legacy model IDs (e.g. claude-opus-4-5-20251101 -> claude-opus-4-6) + Automatically migrates legacy model IDs (e.g. claude-opus-4-6 -> claude-opus-4-7) on first read after upgrade. This is a one-time silent migration. Returns: @@ -747,10 +780,10 @@ def get_all_settings() -> dict[str, str]: "base_url": None, "requires_auth": False, "models": [ - {"id": "claude-opus-4-6", "name": "Claude Opus"}, - {"id": "claude-sonnet-4-5-20250929", "name": "Claude Sonnet"}, + {"id": "claude-opus-4-7", "name": "Claude Opus"}, + {"id": "claude-sonnet-4-6", "name": "Claude Sonnet"}, ], - "default_model": "claude-opus-4-6", + "default_model": "claude-opus-4-7", }, "kimi": { "name": "Kimi K2.5 (Moonshot)", @@ -778,11 +811,11 @@ def get_all_settings() -> dict[str, str]: "requires_auth": True, "auth_env_var": "ANTHROPIC_API_KEY", "models": [ - {"id": "claude-opus-4-6", "name": "Claude Opus"}, - {"id": "claude-sonnet-4-5", "name": "Claude Sonnet"}, + {"id": "claude-opus-4-7", "name": "Claude Opus"}, + {"id": "claude-sonnet-4-6", "name": "Claude Sonnet"}, {"id": "claude-haiku-4-5", "name": "Claude Haiku"}, ], - "default_model": "claude-opus-4-6", + "default_model": "claude-opus-4-7", }, "ollama": { "name": "Ollama (Local)", diff --git a/server/routers/settings.py b/server/routers/settings.py index b0224cd5..9ff04acd 100644 --- a/server/routers/settings.py +++ b/server/routers/settings.py @@ -26,6 +26,7 @@ AVAILABLE_MODELS, DEFAULT_MODEL, get_all_settings, + get_effort_setting, get_setting, set_setting, ) @@ -95,6 +96,8 @@ def _parse_bool(value: str | None, default: bool = False) -> bool: return value.lower() == "true" + + @router.get("", response_model=SettingsResponse) async def get_settings(): """Get current global settings.""" @@ -114,6 +117,7 @@ async def get_settings(): playwright_headless=True, # Always headless - embedded browser view replaces desktop windows batch_size=_parse_int(all_settings.get("batch_size"), 3), testing_batch_size=_parse_int(all_settings.get("testing_batch_size"), 3), + effort=get_effort_setting(), api_provider=api_provider, api_base_url=all_settings.get("api_base_url"), api_has_auth_token=bool(all_settings.get("api_auth_token")), @@ -142,6 +146,9 @@ async def update_settings(update: SettingsUpdate): if update.testing_batch_size is not None: set_setting("testing_batch_size", str(update.testing_batch_size)) + if update.effort is not None: + set_setting("effort", update.effort) + # API provider settings if update.api_provider is not None: old_provider = get_setting("api_provider", "claude") @@ -182,6 +189,7 @@ async def update_settings(update: SettingsUpdate): playwright_headless=True, # Always headless - embedded browser view replaces desktop windows batch_size=_parse_int(all_settings.get("batch_size"), 3), testing_batch_size=_parse_int(all_settings.get("testing_batch_size"), 3), + effort=get_effort_setting(), api_provider=api_provider, api_base_url=all_settings.get("api_base_url"), api_has_auth_token=bool(all_settings.get("api_auth_token")), diff --git a/server/schemas.py b/server/schemas.py index 3619a480..d54f00aa 100644 --- a/server/schemas.py +++ b/server/schemas.py @@ -18,7 +18,7 @@ if str(_root) not in sys.path: sys.path.insert(0, str(_root)) -from registry import DEFAULT_MODEL, VALID_MODELS +from registry import DEFAULT_MODEL, LEGACY_MODEL_MAP, VALID_MODELS # ============================================================================ # Project Schemas @@ -484,6 +484,7 @@ class SettingsResponse(BaseModel): playwright_headless: bool = True batch_size: int = 3 # Features per coding agent batch (1-15) testing_batch_size: int = 3 # Features per testing agent batch (1-15) + effort: Literal["low", "medium", "high", "xhigh", "max"] = "xhigh" api_provider: str = "claude" api_base_url: str | None = None api_has_auth_token: bool = False # Never expose actual token @@ -504,6 +505,7 @@ class SettingsUpdate(BaseModel): playwright_headless: bool | None = None batch_size: int | None = None # Features per agent batch (1-15) testing_batch_size: int | None = None # Features per testing agent batch (1-15) + effort: Literal["low", "medium", "high", "xhigh", "max"] | None = None api_provider: str | None = None api_base_url: str | None = Field(None, max_length=500) api_auth_token: str | None = Field(None, max_length=500) # Write-only, never returned @@ -520,12 +522,16 @@ def validate_api_base_url(cls, v: str | None) -> str | None: @field_validator('model') @classmethod - def validate_model(cls, v: str | None, info) -> str | None: # type: ignore[override] + def validate_model(cls, v: str | None, info) -> str | None: if v is not None: # Skip VALID_MODELS check when using an alternative API provider api_provider = info.data.get("api_provider") if api_provider and api_provider != "claude": return v + # Transparently accept legacy IDs so in-flight clients don't 422 + # during an upgrade window; LEGACY_MODEL_MAP already covers migration. + if v in LEGACY_MODEL_MAP: + v = LEGACY_MODEL_MAP[v] if v not in VALID_MODELS: raise ValueError(f"Invalid model. Must be one of: {VALID_MODELS}") return v diff --git a/server/services/assistant_chat_session.py b/server/services/assistant_chat_session.py index cff1f6ce..275a900d 100755 --- a/server/services/assistant_chat_session.py +++ b/server/services/assistant_chat_session.py @@ -270,8 +270,9 @@ async def start(self) -> AsyncGenerator[dict, None]: system_cli = shutil.which("claude") # Build environment overrides for API configuration - from registry import DEFAULT_MODEL, get_effective_sdk_env + from registry import DEFAULT_MODEL, get_effective_sdk_env, get_effort_setting sdk_env = get_effective_sdk_env() + effort = get_effort_setting() # Determine model from SDK env (provider-aware) or fallback to env/default model = sdk_env.get("ANTHROPIC_DEFAULT_OPUS_MODEL") or os.getenv("ANTHROPIC_DEFAULT_OPUS_MODEL", DEFAULT_MODEL) @@ -281,6 +282,7 @@ async def start(self) -> AsyncGenerator[dict, None]: self.client = ClaudeSDKClient( options=ClaudeAgentOptions( model=model, + effort=effort, # type: ignore[arg-type] # SDK 0.1.61 Literal omits "xhigh" cli_path=system_cli, # System prompt loaded from CLAUDE.md via setting_sources # This avoids Windows command line length limit (~8191 chars) diff --git a/server/services/expand_chat_session.py b/server/services/expand_chat_session.py index 00a0926c..cb576f1d 100644 --- a/server/services/expand_chat_session.py +++ b/server/services/expand_chat_session.py @@ -161,8 +161,9 @@ async def start(self) -> AsyncGenerator[dict, None]: system_prompt = skill_content.replace("$ARGUMENTS", project_path) # Build environment overrides for API configuration - from registry import DEFAULT_MODEL, get_effective_sdk_env + from registry import DEFAULT_MODEL, get_effective_sdk_env, get_effort_setting sdk_env = get_effective_sdk_env() + effort = get_effort_setting() # Determine model from SDK env (provider-aware) or fallback to env/default model = sdk_env.get("ANTHROPIC_DEFAULT_OPUS_MODEL") or os.getenv("ANTHROPIC_DEFAULT_OPUS_MODEL", DEFAULT_MODEL) @@ -184,6 +185,7 @@ async def start(self) -> AsyncGenerator[dict, None]: self.client = ClaudeSDKClient( options=ClaudeAgentOptions( model=model, + effort=effort, # type: ignore[arg-type] # SDK 0.1.61 Literal omits "xhigh" cli_path=system_cli, system_prompt=system_prompt, allowed_tools=[ diff --git a/server/services/spec_chat_session.py b/server/services/spec_chat_session.py index 9e853c36..377640c4 100644 --- a/server/services/spec_chat_session.py +++ b/server/services/spec_chat_session.py @@ -147,8 +147,9 @@ async def start(self) -> AsyncGenerator[dict, None]: system_cli = shutil.which("claude") # Build environment overrides for API configuration - from registry import DEFAULT_MODEL, get_effective_sdk_env + from registry import DEFAULT_MODEL, get_effective_sdk_env, get_effort_setting sdk_env = get_effective_sdk_env() + effort = get_effort_setting() # Determine model from SDK env (provider-aware) or fallback to env/default model = sdk_env.get("ANTHROPIC_DEFAULT_OPUS_MODEL") or os.getenv("ANTHROPIC_DEFAULT_OPUS_MODEL", DEFAULT_MODEL) @@ -157,6 +158,7 @@ async def start(self) -> AsyncGenerator[dict, None]: self.client = ClaudeSDKClient( options=ClaudeAgentOptions( model=model, + effort=effort, # type: ignore[arg-type] # SDK 0.1.61 Literal omits "xhigh" cli_path=system_cli, # System prompt loaded from CLAUDE.md via setting_sources # Include "user" for global skills and subagents from ~/.claude/ diff --git a/ui/src/components/SettingsModal.tsx b/ui/src/components/SettingsModal.tsx index 46ecc790..5605f4e6 100644 --- a/ui/src/components/SettingsModal.tsx +++ b/ui/src/components/SettingsModal.tsx @@ -2,7 +2,7 @@ import { useState } from 'react' import { Loader2, AlertCircle, AlertTriangle, Check, Moon, Sun, Eye, EyeOff, ShieldCheck } from 'lucide-react' import { useSettings, useUpdateSettings, useAvailableModels, useAvailableProviders } from '../hooks/useProjects' import { useTheme, THEMES } from '../hooks/useTheme' -import type { ProviderInfo } from '../lib/types' +import type { EffortLevel, ProviderInfo } from '../lib/types' import { Dialog, DialogContent, @@ -70,6 +70,12 @@ export function SettingsModal({ isOpen, onClose }: SettingsModalProps) { } } + const handleEffortChange = (level: EffortLevel) => { + if (!updateSettings.isPending) { + updateSettings.mutate({ effort: level }) + } + } + const handleProviderChange = (providerId: string) => { if (!updateSettings.isPending) { updateSettings.mutate({ api_provider: providerId }) @@ -386,6 +392,30 @@ export function SettingsModal({ isOpen, onClose }: SettingsModalProps) { )} + {/* Reasoning Effort */} +
+ How deeply Claude thinks before responding. xhigh is recommended for autonomous coding. +
++ How deeply Claude thinks before responding. xhigh is recommended for autonomous coding. +
+