diff --git a/crates/forge_app/src/app.rs b/crates/forge_app/src/app.rs index d53b3c5b7e..37f62fae1c 100644 --- a/crates/forge_app/src/app.rs +++ b/crates/forge_app/src/app.rs @@ -10,7 +10,7 @@ use crate::apply_tunable_parameters::ApplyTunableParameters; use crate::changed_files::ChangedFiles; use crate::dto::ToolsOverview; use crate::hooks::{ - CompactionHandler, DoomLoopDetector, PendingTodosHandler, TitleGenerationHandler, + CompactionHandler, DoomLoopDetector, LoopGuard, PendingTodosHandler, TitleGenerationHandler, TracingHandler, }; use crate::init_conversation_metrics::InitConversationMetrics; @@ -168,7 +168,9 @@ impl> ForgeAp .and(CompactionHandler::new(agent.clone(), environment.clone())), ) .on_toolcall_start(tracing_handler.clone()) - .on_toolcall_end(tracing_handler) + .on_toolcall_end(tracing_handler.and(LoopGuard::new( + forge_config.loop_guard.clone().unwrap_or_default(), + ))) .on_end(on_end_hook); let orch = Orchestrator::new( diff --git a/crates/forge_app/src/hooks/loop_guard.rs b/crates/forge_app/src/hooks/loop_guard.rs new file mode 100644 index 0000000000..dad76ef7d4 --- /dev/null +++ b/crates/forge_app/src/hooks/loop_guard.rs @@ -0,0 +1,306 @@ +use async_trait::async_trait; +use forge_config::LoopGuardConfig; +use forge_domain::{ + ContextMessage, Conversation, EventData, EventHandle, Role, ToolCallArguments, ToolName, + ToolcallEndPayload, +}; +use forge_template::Element; +use tracing::warn; + +use crate::TemplateEngine; + +/// Guard that detects when the agent is repeating the same tool call and +/// injects a warning or blocks execution by injecting a tool error into the +/// conversation. +/// +/// The guard operates in two phases: +/// 1. **Warning phase** (at `warn_threshold` repeated calls): injects a +/// strongly worded `` user message urging the agent to +/// change approach. +/// 2. **Fail phase** (at `fail_threshold` repeated calls): injects a +/// `` user message that acts as a blocked result, forcing the +/// agent to stop looping. +/// +/// Detection is based on the `(tool_name, arguments)` signature of calls in the +/// most recent assistant messages in the conversation history. +#[derive(Debug, Clone, Default)] +pub struct LoopGuard { + config: LoopGuardConfig, +} + +impl LoopGuard { + /// Creates a new loop guard from a [`LoopGuardConfig`]. + pub fn new(config: LoopGuardConfig) -> Self { + Self { config } + } + + /// Counts how many times the given tool call has been repeated + /// consecutively at the tail of the conversation's assistant message + /// history. + fn count_consecutive_repeats( + conversation: &Conversation, + name: &ToolName, + arguments: &ToolCallArguments, + ) -> usize { + let messages = match conversation.context.as_ref() { + Some(ctx) => &ctx.messages, + None => return 0, + }; + + // Walk assistant messages in reverse, counting consecutive identical calls. + messages + .iter() + .rev() + .filter_map(|entry| { + if let forge_domain::ContextMessage::Text(msg) = &entry.message + && msg.role == Role::Assistant + { + return msg.tool_calls.as_ref(); + } + None + }) + .take_while(|calls| { + calls + .iter() + .any(|c| &c.name == name && &c.arguments == arguments) + }) + .count() + } +} + +#[async_trait] +impl EventHandle> for LoopGuard { + async fn handle( + &self, + event: &EventData, + conversation: &mut Conversation, + ) -> anyhow::Result<()> { + let tool_call = &event.payload.tool_call; + let repeats = + Self::count_consecutive_repeats(conversation, &tool_call.name, &tool_call.arguments); + + if repeats == 0 { + return Ok(()); + } + + if repeats >= self.config.fail_threshold { + warn!( + agent_id = %event.agent.id, + tool = %tool_call.name, + repeats, + "Loop guard: blocking repeated tool call" + ); + + let error_msg = TemplateEngine::default().render( + "forge-loop-guard-blocked.md", + &serde_json::json!({ "repeats": repeats }), + )?; + + let content = Element::new("tool_error").cdata(error_msg); + if let Some(ctx) = conversation.context.as_mut() { + ctx.messages + .push(ContextMessage::user(content, None).into()); + } + } else if repeats >= self.config.warn_threshold { + warn!( + agent_id = %event.agent.id, + tool = %tool_call.name, + repeats, + "Loop guard: warning about repeated tool call" + ); + + let reminder = TemplateEngine::default().render( + "forge-loop-guard-reminder.md", + &serde_json::json!({ "repeats": repeats }), + )?; + + let content = Element::new("system_reminder").cdata(reminder); + if let Some(ctx) = conversation.context.as_mut() { + ctx.messages + .push(ContextMessage::user(content, None).into()); + } + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use forge_config::LoopGuardConfig; + use forge_domain::{ + Agent, AgentId, Context, ContextMessage, Conversation, ConversationId, EventData, + EventHandle, MessageEntry, ModelId, ProviderId, Role, TextMessage, ToolCallArguments, + ToolCallFull, ToolResult, ToolcallEndPayload, + }; + use pretty_assertions::assert_eq; + + use super::*; + + fn fixture_guard(warn: usize, fail: usize) -> LoopGuard { + LoopGuard::new(LoopGuardConfig { warn_threshold: warn, fail_threshold: fail }) + } + + fn format_conversation(conversation: &Conversation) -> String { + conversation + .context + .as_ref() + .unwrap() + .messages + .iter() + .map(|entry| match &entry.message { + ContextMessage::Text(msg) => { + let mut parts = vec![format!("[{}]", msg.role)]; + if !msg.content.is_empty() { + parts.push(msg.content.clone()); + } + if let Some(calls) = &msg.tool_calls { + for call in calls { + parts.push(format!( + " tool_call: {}({})", + call.name, + call.arguments.clone().into_string() + )); + } + } + parts.join("\n") + } + ContextMessage::Tool(result) => { + format!("[Tool: {}]\n{:?}", result.name, result.output) + } + ContextMessage::Image(_) => "[Image]".to_string(), + }) + .collect::>() + .join("\n---\n") + } + + fn fixture_agent() -> Agent { + Agent::new( + AgentId::new("test-agent"), + ProviderId::FORGE, + ModelId::new("test-model"), + ) + } + + fn fixture_assistant_message(name: &str, args: &str) -> TextMessage { + TextMessage { + role: Role::Assistant, + content: String::new(), + raw_content: None, + tool_calls: Some(vec![ + ToolCallFull::new(name).arguments(ToolCallArguments::from_json(args)), + ]), + thought_signature: None, + model: None, + reasoning_details: None, + droppable: false, + phase: None, + } + } + + fn fixture_conversation(messages: Vec) -> Conversation { + let entries: Vec = messages + .into_iter() + .map(|m| MessageEntry::from(ContextMessage::Text(m))) + .collect(); + Conversation { + id: ConversationId::generate(), + title: None, + context: Some(Context::default().messages(entries)), + metrics: Default::default(), + metadata: forge_domain::MetaData::new(chrono::Utc::now()), + } + } + + fn fixture_event(name: &str, args: &str) -> EventData { + let tool_call = ToolCallFull::new(name).arguments(ToolCallArguments::from_json(args)); + EventData::new( + fixture_agent(), + ModelId::new("test-model"), + ToolcallEndPayload::new(tool_call.clone(), ToolResult::from(tool_call)), + ) + } + + #[tokio::test] + async fn test_no_repeat_does_nothing() { + let guard = LoopGuard::default(); + let event = fixture_event("read", r#"{"path":"file.txt"}"#); + let msg = fixture_assistant_message("write", r#"{"path":"other.txt"}"#); + let mut conversation = fixture_conversation(vec![msg]); + + guard.handle(&event, &mut conversation).await.unwrap(); + + let actual = conversation.context.as_ref().unwrap().messages.len(); + let expected = 1; // unchanged + assert_eq!(actual, expected); + } + + #[tokio::test] + async fn test_below_warn_threshold_does_nothing() { + let guard = fixture_guard(3, 5); + let event = fixture_event("read", r#"{"path":"file.txt"}"#); + // Only 2 consecutive identical calls in history — below threshold + let msg1 = fixture_assistant_message("read", r#"{"path":"file.txt"}"#); + let msg2 = fixture_assistant_message("read", r#"{"path":"file.txt"}"#); + let mut conversation = fixture_conversation(vec![msg1, msg2]); + + guard.handle(&event, &mut conversation).await.unwrap(); + + let actual = conversation.context.as_ref().unwrap().messages.len(); + let expected = 2; + assert_eq!(actual, expected); + } + + #[tokio::test] + async fn test_at_warn_threshold_injects_reminder() { + let guard = fixture_guard(3, 5); + let event = fixture_event("read", r#"{"path":"file.txt"}"#); + // 3 identical calls in history = warn threshold + let msg1 = fixture_assistant_message("read", r#"{"path":"file.txt"}"#); + let msg2 = fixture_assistant_message("read", r#"{"path":"file.txt"}"#); + let msg3 = fixture_assistant_message("read", r#"{"path":"file.txt"}"#); + let mut conversation = fixture_conversation(vec![msg1, msg2, msg3]); + + guard.handle(&event, &mut conversation).await.unwrap(); + + let ctx = conversation.context.as_ref().unwrap(); + assert_eq!(ctx.messages.len(), 4); // 3 original + 1 reminder + insta::assert_snapshot!(format_conversation(&conversation)); + } + + #[tokio::test] + async fn test_at_fail_threshold_injects_error() { + let guard = fixture_guard(3, 5); + let event = fixture_event("read", r#"{"path":"file.txt"}"#); + // 5 identical calls = fail threshold + let msgs: Vec<_> = (0..5) + .map(|_| fixture_assistant_message("read", r#"{"path":"file.txt"}"#)) + .collect(); + let mut conversation = fixture_conversation(msgs); + + guard.handle(&event, &mut conversation).await.unwrap(); + + let ctx = conversation.context.as_ref().unwrap(); + assert_eq!(ctx.messages.len(), 6); // 5 original + 1 error + insta::assert_snapshot!(format_conversation(&conversation)); + } + + #[tokio::test] + async fn test_different_args_resets_count() { + let guard = fixture_guard(3, 5); + let event = fixture_event("read", r#"{"path":"file.txt"}"#); + // 2 identical, then 1 different, then 2 identical — only 2 consecutive at tail + let msg1 = fixture_assistant_message("read", r#"{"path":"file.txt"}"#); + let msg2 = fixture_assistant_message("read", r#"{"path":"file.txt"}"#); + let msg3 = fixture_assistant_message("read", r#"{"path":"other.txt"}"#); + let msg4 = fixture_assistant_message("read", r#"{"path":"file.txt"}"#); + let msg5 = fixture_assistant_message("read", r#"{"path":"file.txt"}"#); + let mut conversation = fixture_conversation(vec![msg1, msg2, msg3, msg4, msg5]); + + guard.handle(&event, &mut conversation).await.unwrap(); + + let actual = conversation.context.as_ref().unwrap().messages.len(); + let expected = 5; // no injection + assert_eq!(actual, expected); + } +} diff --git a/crates/forge_app/src/hooks/mod.rs b/crates/forge_app/src/hooks/mod.rs index 26a43401f2..4b95eaa5b1 100644 --- a/crates/forge_app/src/hooks/mod.rs +++ b/crates/forge_app/src/hooks/mod.rs @@ -1,11 +1,13 @@ mod compaction; mod doom_loop; +mod loop_guard; mod pending_todos; mod title_generation; mod tracing; pub use compaction::CompactionHandler; pub use doom_loop::DoomLoopDetector; +pub use loop_guard::LoopGuard; pub use pending_todos::PendingTodosHandler; pub use title_generation::TitleGenerationHandler; pub use tracing::TracingHandler; diff --git a/crates/forge_app/src/hooks/snapshots/forge_app__hooks__loop_guard__tests__at_fail_threshold_injects_error.snap b/crates/forge_app/src/hooks/snapshots/forge_app__hooks__loop_guard__tests__at_fail_threshold_injects_error.snap new file mode 100644 index 0000000000..c85d8fb1cd --- /dev/null +++ b/crates/forge_app/src/hooks/snapshots/forge_app__hooks__loop_guard__tests__at_fail_threshold_injects_error.snap @@ -0,0 +1,22 @@ +--- +source: crates/forge_app/src/hooks/loop_guard.rs +expression: format_conversation(&conversation) +--- +[Assistant] + tool_call: read({"path":"file.txt"}) +--- +[Assistant] + tool_call: read({"path":"file.txt"}) +--- +[Assistant] + tool_call: read({"path":"file.txt"}) +--- +[Assistant] + tool_call: read({"path":"file.txt"}) +--- +[Assistant] + tool_call: read({"path":"file.txt"}) +--- +[User] + diff --git a/crates/forge_app/src/hooks/snapshots/forge_app__hooks__loop_guard__tests__at_warn_threshold_injects_reminder.snap b/crates/forge_app/src/hooks/snapshots/forge_app__hooks__loop_guard__tests__at_warn_threshold_injects_reminder.snap new file mode 100644 index 0000000000..b53b2b493f --- /dev/null +++ b/crates/forge_app/src/hooks/snapshots/forge_app__hooks__loop_guard__tests__at_warn_threshold_injects_reminder.snap @@ -0,0 +1,24 @@ +--- +source: crates/forge_app/src/hooks/loop_guard.rs +expression: format_conversation(&conversation) +--- +[Assistant] + tool_call: read({"path":"file.txt"}) +--- +[Assistant] + tool_call: read({"path":"file.txt"}) +--- +[Assistant] + tool_call: read({"path":"file.txt"}) +--- +[User] + diff --git a/crates/forge_app/src/orch_spec/orch_runner.rs b/crates/forge_app/src/orch_spec/orch_runner.rs index c33c8349b3..2f72e4d54d 100644 --- a/crates/forge_app/src/orch_spec/orch_runner.rs +++ b/crates/forge_app/src/orch_spec/orch_runner.rs @@ -12,7 +12,7 @@ use tokio::sync::Mutex; pub use super::orch_setup::TestContext; use crate::app::build_template_config; use crate::apply_tunable_parameters::ApplyTunableParameters; -use crate::hooks::{DoomLoopDetector, PendingTodosHandler}; +use crate::hooks::{DoomLoopDetector, LoopGuard, PendingTodosHandler}; use crate::init_conversation_metrics::InitConversationMetrics; use crate::orch::Orchestrator; use crate::set_conversation_id::SetConversationId; @@ -135,6 +135,9 @@ impl Runner { .hook(Arc::new( Hook::default() .on_request(DoomLoopDetector::default()) + .on_toolcall_end(LoopGuard::new( + setup.config.loop_guard.clone().unwrap_or_default(), + )) .on_end(PendingTodosHandler::new()), )) .sender(tx); diff --git a/crates/forge_config/.forge.toml b/crates/forge_config/.forge.toml index 2104408aa7..29a782bef4 100644 --- a/crates/forge_config/.forge.toml +++ b/crates/forge_config/.forge.toml @@ -72,3 +72,7 @@ frequency = "daily" [reasoning] enabled = true effort = "medium" + +[loop_guard] +warn_threshold = 3 +fail_threshold = 5 diff --git a/crates/forge_config/src/config.rs b/crates/forge_config/src/config.rs index 9de4df92b2..5aae187678 100644 --- a/crates/forge_config/src/config.rs +++ b/crates/forge_config/src/config.rs @@ -9,7 +9,8 @@ use serde::{Deserialize, Serialize}; use crate::reader::ConfigReader; use crate::writer::ConfigWriter; use crate::{ - AutoDumpFormat, Compact, Decimal, HttpConfig, ModelConfig, ReasoningConfig, RetryConfig, Update, + AutoDumpFormat, Compact, Decimal, HttpConfig, LoopGuardConfig, ModelConfig, ReasoningConfig, + RetryConfig, Update, }; /// Wire protocol a provider uses for chat completions. @@ -287,6 +288,12 @@ pub struct ForgeConfig { #[serde(default)] pub verify_todos: bool, + /// Loop guard configuration; controls the thresholds at which a warning + /// reminder or hard tool error is injected when the agent repeats the same + /// tool call consecutively. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub loop_guard: Option, + /// Switches patch replacement fallback from the legacy fuzzy-search range /// lookup to the newer text-patch gRPC API. /// Defaults to `false` so patching continues to use the legacy fallback diff --git a/crates/forge_config/src/lib.rs b/crates/forge_config/src/lib.rs index cc253277e4..b6afadcab1 100644 --- a/crates/forge_config/src/lib.rs +++ b/crates/forge_config/src/lib.rs @@ -5,6 +5,7 @@ mod decimal; mod error; mod http; mod legacy; +mod loop_guard; mod model; mod percentage; mod reader; @@ -18,6 +19,7 @@ pub use config::*; pub use decimal::*; pub use error::Error; pub use http::*; +pub use loop_guard::*; pub use model::*; pub use percentage::*; pub use reader::*; diff --git a/crates/forge_config/src/loop_guard.rs b/crates/forge_config/src/loop_guard.rs new file mode 100644 index 0000000000..ab89aab2cc --- /dev/null +++ b/crates/forge_config/src/loop_guard.rs @@ -0,0 +1,21 @@ +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +/// Configuration for the loop guard hook that detects and breaks repetitive +/// tool call patterns. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema, fake::Dummy)] +#[serde(rename_all = "snake_case")] +pub struct LoopGuardConfig { + /// Number of identical consecutive tool calls before a warning reminder is + /// injected into the conversation. + pub warn_threshold: usize, + /// Number of identical consecutive tool calls before a hard tool error is + /// injected, blocking further identical calls. + pub fail_threshold: usize, +} + +impl Default for LoopGuardConfig { + fn default() -> Self { + Self { warn_threshold: 3, fail_threshold: 5 } + } +} diff --git a/forge.schema.json b/forge.schema.json index 872cd28dcb..b63eb20555 100644 --- a/forge.schema.json +++ b/forge.schema.json @@ -77,6 +77,17 @@ } ] }, + "loop_guard": { + "description": "Loop guard configuration; controls the thresholds at which a warning\nreminder or hard tool error is injected when the agent repeats the same\ntool call consecutively.", + "anyOf": [ + { + "$ref": "#/$defs/LoopGuardConfig" + }, + { + "type": "null" + } + ] + }, "max_commit_count": { "description": "Maximum number of recent commits included as context for commit message\ngeneration.", "type": "integer", @@ -615,6 +626,28 @@ "accept_invalid_certs" ] }, + "LoopGuardConfig": { + "description": "Configuration for the loop guard hook that detects and breaks repetitive\ntool call patterns.", + "type": "object", + "properties": { + "fail_threshold": { + "description": "Number of identical consecutive tool calls before a hard tool error is\ninjected, blocking further identical calls.", + "type": "integer", + "format": "uint", + "minimum": 0 + }, + "warn_threshold": { + "description": "Number of identical consecutive tool calls before a warning reminder is\ninjected into the conversation.", + "type": "integer", + "format": "uint", + "minimum": 0 + } + }, + "required": [ + "warn_threshold", + "fail_threshold" + ] + }, "ModelConfig": { "description": "Pairs a provider and model together for a specific operation.", "type": "object", diff --git a/templates/forge-loop-guard-blocked.md b/templates/forge-loop-guard-blocked.md new file mode 100644 index 0000000000..6910df7be5 --- /dev/null +++ b/templates/forge-loop-guard-blocked.md @@ -0,0 +1,2 @@ +Blocked by loop guard: this tool call is equivalent to previous repeated calls ({{repeats}} times). +Use a different tool, materially different arguments, or stop and explain the blocker. \ No newline at end of file diff --git a/templates/forge-loop-guard-reminder.md b/templates/forge-loop-guard-reminder.md new file mode 100644 index 0000000000..941405d0a3 --- /dev/null +++ b/templates/forge-loop-guard-reminder.md @@ -0,0 +1,10 @@ +Loop guard triggered: you have made {{repeats}} identical tool calls without meaningful progress. + +For your next step, do NOT repeat the same tool call with the same or semantically equivalent arguments. + +You must choose one of these actions: +1. Use a different tool. +2. Use the same tool with materially different arguments, and briefly state what new information you expect. +3. Stop tool use and explain the blocker, including what you already tried and why it did not work. + +Do not call the same tool again merely to inspect, verify, retry, or double-check. \ No newline at end of file