From 03d1c6faec1a8a7d69085d402bfa031d29a9d531 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 11 Jun 2026 00:02:50 +0000 Subject: [PATCH] fix(modes): enforce rollback-on-failure with an undo-stack guard bring_up cleaned up after failures with hand-rolled blocks repeated at each failure site, and the sites had drifted: spawn_services failures rolled back nothing, a failed compose group left earlier groups' containers running while deleting the overlay files teardown needs, pre_spawn failures rolled back in container mode but not hybrid/host, and post_up failures on a resumed session deleted the session's data volumes. Each provisioning step now registers its inverse with a Rollback guard immediately after it succeeds; any early return unwinds the stack in reverse order (stop containers before deleting their overlay, kill spawned services, remove a freshly created worktree). disarm() keeps everything once the session is fully up. Rollback only removes volumes for sessions it created (not on resume), and reused worktrees are never deleted by rollback. Fixes #2 https://claude.ai/code/session_017UcuvzMKHVfyBCcq8ipAko --- src/main.rs | 1 + src/modes/container.rs | 88 +++++++++---------- src/modes/host.rs | 186 ++++++++++++++++++++++++++++++++++------- src/modes/hybrid.rs | 116 ++++++++++++------------- src/rollback.rs | 101 ++++++++++++++++++++++ 5 files changed, 360 insertions(+), 132 deletions(-) create mode 100644 src/rollback.rs diff --git a/src/main.rs b/src/main.rs index 4442aae..bacf3b1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -9,6 +9,7 @@ mod hooks; mod log; mod modes; mod process; +mod rollback; mod slot; mod state; mod sync; diff --git a/src/modes/container.rs b/src/modes/container.rs index 578bba6..a0000c6 100644 --- a/src/modes/container.rs +++ b/src/modes/container.rs @@ -8,6 +8,7 @@ use crate::docker; use crate::env; use crate::hooks; use crate::log::StepLogger; +use crate::rollback::Rollback; use crate::state::Session; use crate::validate; use crate::worktree::WorktreeManager; @@ -49,6 +50,13 @@ impl super::ModeHandler for ContainerMode { hooks::run(cmd, root, &std::collections::HashMap::new())?; } + // Every step below registers its undo; any early return tears down + // exactly what was created so far, in reverse order. + let mut rollback = Rollback::new(); + // Only delete volumes the rollback created: on resume the session's + // existing data volumes must survive a failed re-up. + let rollback_volumes = !reuse_worktree; + let docker_svcs_config: Vec<_> = config .docker_services() .into_iter() @@ -157,25 +165,28 @@ impl super::ModeHandler for ContainerMode { slot, )?; std::fs::write(&overlay_path, &yaml).context("failed to write overlay file")?; + { + let overlay = overlay_path.clone(); + rollback.push(move || { + let _ = std::fs::remove_file(&overlay); + }); + } let compose_str = compose_path.to_string_lossy().to_string(); let overlay_str = overlay_path.to_string_lossy().to_string(); - if let Err(e) = docker::compose_up( + docker::compose_up( &project, &compose_str, Some(&overlay_str), watch, &compose_env, - ) { - for ov in &written_overlays { - let _ = std::fs::remove_file(ov); - } - let _ = std::fs::remove_file(&overlay_path); - if !reuse_worktree { - let _ = wt.remove(&worktree_path); - } - return Err(e); + )?; + { + let (p, c, o) = (project.clone(), compose_str, overlay_str.clone()); + rollback.push(move || { + let _ = docker::compose_down(&p, &c, Some(&o), rollback_volumes); + }); } written_overlays.push(overlay_str); @@ -203,22 +214,28 @@ impl super::ModeHandler for ContainerMode { slot, )?; std::fs::write(&overlay_path, &yaml).context("failed to write overlay file")?; + { + let overlay = overlay_path.clone(); + rollback.push(move || { + let _ = std::fs::remove_file(&overlay); + }); + } let compose_str = compose_path.to_string_lossy().to_string(); let overlay_str = overlay_path.to_string_lossy().to_string(); - if let Err(e) = docker::compose_up( + docker::compose_up( &project, &compose_str, Some(&overlay_str), watch, &std::collections::HashMap::new(), - ) { - let _ = std::fs::remove_file(&overlay_path); - if !reuse_worktree { - let _ = wt.remove(&worktree_path); - } - return Err(e); + )?; + { + let (p, c, o) = (project.clone(), compose_str, overlay_str.clone()); + rollback.push(move || { + let _ = docker::compose_down(&p, &c, Some(&o), rollback_volumes); + }); } allocated_ports = compose_data @@ -247,12 +264,13 @@ impl super::ModeHandler for ContainerMode { } else { log.step(&format!("Creating worktree (branch: {branch})...")); log.detail(&worktree_path.display().to_string()); - if let Err(e) = wt.create(&worktree_path, branch) { - tear_down_all_overlays(&project, root, &written_overlays, true); - for ov in &written_overlays { - let _ = std::fs::remove_file(ov); - } - return Err(e); + wt.create(&worktree_path, branch)?; + { + let root_owned = root.to_owned(); + let wt_path = worktree_path.clone(); + rollback.push(move || { + let _ = WorktreeManager::new(root_owned).remove(&wt_path); + }); } } @@ -278,34 +296,18 @@ impl super::ModeHandler for ContainerMode { if let Some(cmd) = &config.hooks.pre_spawn { log.step("Running pre_spawn hook..."); log.detail(cmd); - if let Err(e) = hooks::run(cmd, &worktree_path, &env_map) { - tear_down_all_overlays(&project, root, &written_overlays, true); - if !reuse_worktree { - let _ = wt.remove(&worktree_path); - } - for ov in &written_overlays { - let _ = std::fs::remove_file(ov); - } - return Err(e); - } + hooks::run(cmd, &worktree_path, &env_map)?; } // post_up: all containers up, full env available if let Some(cmd) = &config.hooks.post_up { log.step("Running post_up hook..."); log.detail(cmd); - if let Err(e) = hooks::run(cmd, &worktree_path, &env_map) { - tear_down_all_overlays(&project, root, &written_overlays, true); - if !reuse_worktree { - let _ = wt.remove(&worktree_path); - } - for ov in &written_overlays { - let _ = std::fs::remove_file(ov); - } - return Err(e); - } + hooks::run(cmd, &worktree_path, &env_map)?; } + rollback.disarm(); + let app_port = allocated_ports.first().map(|(_, p)| *p); let stored_port_overrides: std::collections::HashMap = allocated_ports.iter().cloned().collect(); diff --git a/src/modes/host.rs b/src/modes/host.rs index e6c84da..22ca818 100644 --- a/src/modes/host.rs +++ b/src/modes/host.rs @@ -8,6 +8,7 @@ use crate::env; use crate::hooks; use crate::log::StepLogger; use crate::process; +use crate::rollback::Rollback; use crate::state::Session; use crate::validate; use crate::worktree::WorktreeManager; @@ -59,6 +60,10 @@ impl super::ModeHandler for HostMode { log.detail(&format!("{name}: {port}")); } + // Every step below registers its undo; any early return tears down + // exactly what was created so far, in reverse order. + let mut rollback = Rollback::new(); + if reuse_worktree { if !worktree_path.exists() { return Err(anyhow::anyhow!( @@ -72,6 +77,13 @@ impl super::ModeHandler for HostMode { log.step(&format!("Creating worktree (branch: {branch})...")); log.detail(&worktree_path.display().to_string()); wt.create(&worktree_path, branch)?; + { + let root_owned = root.to_owned(); + let wt_path = worktree_path.clone(); + rollback.push(move || { + let _ = WorktreeManager::new(root_owned).remove(&wt_path); + }); + } } if !no_inherit_env && !config.inherit_env.is_empty() { @@ -98,7 +110,7 @@ impl super::ModeHandler for HostMode { .copied() .collect(); - let spawn = if svcs_to_spawn.iter().any(|s| s.command.is_some()) { + if svcs_to_spawn.iter().any(|s| s.command.is_some()) { log.step(&format!( "Spawning native services ({})...", global.process_manager @@ -109,43 +121,29 @@ impl super::ModeHandler for HostMode { log.detail(&format!("{} on port {} — {}", svc.name, port, cmd)); } } - process::spawn_services( - &global.process_manager, - slug, - &svcs_to_spawn, - &worktree_path, - &env_map, - )? - } else { - process::spawn_services( - &global.process_manager, - slug, - &svcs_to_spawn, - &worktree_path, - &env_map, - )? - }; + } + let spawn = process::spawn_services( + &global.process_manager, + slug, + &svcs_to_spawn, + &worktree_path, + &env_map, + )?; + if spawn.tmux_session.is_some() || !spawn.pid_files.is_empty() { + let manager = global.process_manager.clone(); + let spawned = spawn.clone(); + rollback.push(move || process::kill_services(&manager, &spawned)); + } // post_up: all services spawned, full env available if let Some(cmd) = &config.hooks.post_up { log.step("Running post_up hook..."); log.detail(cmd); - if let Err(e) = hooks::run(cmd, &worktree_path, &env_map) { - let pm = if spawn.tmux_session.is_some() || !spawn.pid_files.is_empty() { - Some(&global.process_manager) - } else { - None - }; - if let Some(pm) = pm { - process::kill_services(pm, &spawn); - } - if !reuse_worktree { - let _ = wt.remove(&worktree_path); - } - return Err(e); - } + hooks::run(cmd, &worktree_path, &env_map)?; } + rollback.disarm(); + let pm = if spawn.tmux_session.is_some() || !spawn.pid_files.is_empty() { Some(global.process_manager) } else { @@ -297,3 +295,129 @@ fn native_ports_for_slot( .collect() } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::config::{HookConfig, Mode}; + use crate::modes::ModeHandler; + use std::process::Command; + use tempfile::TempDir; + + fn setup_git_repo(dir: &Path) { + Command::new("git") + .args(["init"]) + .current_dir(dir) + .output() + .unwrap(); + Command::new("git") + .args(["commit", "--allow-empty", "-m", "init"]) + .current_dir(dir) + .env("GIT_AUTHOR_NAME", "test") + .env("GIT_AUTHOR_EMAIL", "test@test.com") + .env("GIT_COMMITTER_NAME", "test") + .env("GIT_COMMITTER_EMAIL", "test@test.com") + .output() + .unwrap(); + } + + fn make_config() -> Config { + Config { + mode: Mode::Host, + max_slots: 8, + prefix: "ecluse".into(), + worktree_dir: ".ecluse/worktrees".into(), + app_label: "ecluse.role".into(), + app_label_value: "app".into(), + strict_port: false, + port_search_range: 10, + slot_stride: 1, + services: vec![], + hooks: HookConfig::default(), + inherit_env: vec![], + } + } + + fn bring_up(config: &Config, root: &Path, slug: &str, reuse: bool) -> Result { + let log = crate::log::StepLogger::new(true); + HostMode.bring_up( + slug, + 1, + slug, + config, + root, + false, + reuse, + true, + None, + &std::collections::HashMap::new(), + None, + &std::collections::HashSet::new(), + &std::collections::HashMap::new(), + &log, + ) + } + + #[test] + fn failed_post_up_hook_rolls_back_fresh_worktree() { + let dir = TempDir::new().unwrap(); + setup_git_repo(dir.path()); + let mut config = make_config(); + config.hooks.post_up = Some("false".into()); + + let result = bring_up(&config, dir.path(), "rb-post", false); + assert!(result.is_err()); + assert!( + !dir.path().join(".ecluse/worktrees/rb-post").exists(), + "fresh worktree must be removed when post_up fails" + ); + } + + // pre_spawn failure previously left the worktree behind (no manual cleanup + // at that site) — the rollback guard must cover it like every other step. + #[test] + fn failed_pre_spawn_hook_rolls_back_fresh_worktree() { + let dir = TempDir::new().unwrap(); + setup_git_repo(dir.path()); + let mut config = make_config(); + config.hooks.pre_spawn = Some("false".into()); + + let result = bring_up(&config, dir.path(), "rb-spawn", false); + assert!(result.is_err()); + assert!( + !dir.path().join(".ecluse/worktrees/rb-spawn").exists(), + "fresh worktree must be removed when pre_spawn fails" + ); + } + + #[test] + fn failed_post_up_hook_keeps_reused_worktree() { + let dir = TempDir::new().unwrap(); + setup_git_repo(dir.path()); + let wt = WorktreeManager::new(dir.path().to_owned()); + let path = dir.path().join(".ecluse/worktrees/rb-reuse"); + std::fs::create_dir_all(path.parent().unwrap()).unwrap(); + wt.create(&path, "rb-reuse").unwrap(); + + let mut config = make_config(); + config.hooks.post_up = Some("false".into()); + + let result = bring_up(&config, dir.path(), "rb-reuse", true); + assert!(result.is_err()); + assert!(path.exists(), "reused worktree must survive rollback"); + } + + #[test] + fn successful_bring_up_keeps_worktree() { + let dir = TempDir::new().unwrap(); + setup_git_repo(dir.path()); + let config = make_config(); + + let session = bring_up(&config, dir.path(), "rb-ok", false).unwrap(); + assert!( + dir.path().join(".ecluse/worktrees/rb-ok").exists(), + "disarmed rollback must not remove anything" + ); + assert_eq!(session.slug, "rb-ok"); + } +} diff --git a/src/modes/hybrid.rs b/src/modes/hybrid.rs index e3fda02..db81a88 100644 --- a/src/modes/hybrid.rs +++ b/src/modes/hybrid.rs @@ -10,6 +10,7 @@ use crate::env; use crate::hooks; use crate::log::StepLogger; use crate::process; +use crate::rollback::Rollback; use crate::state::Session; use crate::validate; use crate::worktree::WorktreeManager; @@ -51,6 +52,13 @@ impl super::ModeHandler for HybridMode { hooks::run(cmd, root, &std::collections::HashMap::new())?; } + // Every step below registers its undo; any early return tears down + // exactly what was created so far, in reverse order. + let mut rollback = Rollback::new(); + // Only delete volumes the rollback created: on resume the session's + // existing data volumes must survive a failed re-up. + let rollback_volumes = !reuse_worktree; + let docker_svcs_config: Vec<_> = config .docker_services() .into_iter() @@ -162,27 +170,30 @@ impl super::ModeHandler for HybridMode { slot, )?; std::fs::write(&overlay_path, &yaml).context("failed to write overlay file")?; + { + let overlay = overlay_path.clone(); + rollback.push(move || { + let _ = std::fs::remove_file(&overlay); + }); + } let compose_str = compose_path.to_string_lossy().to_string(); let overlay_str = overlay_path.to_string_lossy().to_string(); let svc_refs: Vec<&str> = svc_names.iter().map(|s| s.as_str()).collect(); - if let Err(e) = docker::compose_up_services( + docker::compose_up_services( &project, &compose_str, Some(&overlay_str), &svc_refs, watch, &compose_env, - ) { - for ov in &written_overlays { - let _ = std::fs::remove_file(ov); - } - let _ = std::fs::remove_file(&overlay_path); - if !reuse_worktree { - let _ = wt.remove(&worktree_path); - } - return Err(e); + )?; + { + let (p, c, o) = (project.clone(), compose_str, overlay_str.clone()); + rollback.push(move || { + let _ = docker::compose_down(&p, &c, Some(&o), rollback_volumes); + }); } written_overlays.push(overlay_str); @@ -221,24 +232,30 @@ impl super::ModeHandler for HybridMode { slot, )?; std::fs::write(&overlay_path, &yaml).context("failed to write overlay file")?; + { + let overlay = overlay_path.clone(); + rollback.push(move || { + let _ = std::fs::remove_file(&overlay); + }); + } let compose_str = compose_path.to_string_lossy().to_string(); let overlay_str = overlay_path.to_string_lossy().to_string(); let data_refs: Vec<&str> = data_svcs.iter().map(|s| s.as_str()).collect(); - if let Err(e) = docker::compose_up_services( + docker::compose_up_services( &project, &compose_str, Some(&overlay_str), &data_refs, watch, &std::collections::HashMap::new(), - ) { - let _ = std::fs::remove_file(&overlay_path); - if !reuse_worktree { - let _ = wt.remove(&worktree_path); - } - return Err(e); + )?; + { + let (p, c, o) = (project.clone(), compose_str, overlay_str.clone()); + rollback.push(move || { + let _ = docker::compose_down(&p, &c, Some(&o), rollback_volumes); + }); } for (name, svc) in &compose_data.services { @@ -265,12 +282,13 @@ impl super::ModeHandler for HybridMode { } else { log.step(&format!("Creating worktree (branch: {branch})...")); log.detail(&worktree_path.display().to_string()); - if let Err(e) = wt.create(&worktree_path, branch) { - tear_down_all_overlays(&project, root, &written_overlays, true); - for ov in &written_overlays { - let _ = std::fs::remove_file(ov); - } - return Err(e); + wt.create(&worktree_path, branch)?; + { + let root_owned = root.to_owned(); + let wt_path = worktree_path.clone(); + rollback.push(move || { + let _ = WorktreeManager::new(root_owned).remove(&wt_path); + }); } } @@ -359,7 +377,7 @@ impl super::ModeHandler for HybridMode { .copied() .collect(); - let spawn = if native_svcs_to_spawn.iter().any(|s| s.command.is_some()) { + if native_svcs_to_spawn.iter().any(|s| s.command.is_some()) { log.step(&format!( "Spawning native services ({})...", global.process_manager @@ -370,47 +388,29 @@ impl super::ModeHandler for HybridMode { log.detail(&format!("{} on port {} — {}", svc.name, port, cmd)); } } - process::spawn_services( - &global.process_manager, - slug, - &native_svcs_to_spawn, - &worktree_path, - &env_map, - )? - } else { - process::spawn_services( - &global.process_manager, - slug, - &native_svcs_to_spawn, - &worktree_path, - &env_map, - )? - }; + } + let spawn = process::spawn_services( + &global.process_manager, + slug, + &native_svcs_to_spawn, + &worktree_path, + &env_map, + )?; + if spawn.tmux_session.is_some() || !spawn.pid_files.is_empty() { + let manager = global.process_manager.clone(); + let spawned = spawn.clone(); + rollback.push(move || process::kill_services(&manager, &spawned)); + } // post_up: all services up and spawned, full env available if let Some(cmd) = &config.hooks.post_up { log.step("Running post_up hook..."); log.detail(cmd); - if let Err(e) = hooks::run(cmd, &worktree_path, &env_map) { - let pm = if spawn.tmux_session.is_some() || !spawn.pid_files.is_empty() { - Some(&global.process_manager) - } else { - None - }; - if let Some(pm) = pm { - process::kill_services(pm, &spawn); - } - tear_down_all_overlays(&project, root, &written_overlays, true); - if !reuse_worktree { - let _ = wt.remove(&worktree_path); - } - for ov in &written_overlays { - let _ = std::fs::remove_file(ov); - } - return Err(e); - } + hooks::run(cmd, &worktree_path, &env_map)?; } + rollback.disarm(); + let pm = if spawn.tmux_session.is_some() || !spawn.pid_files.is_empty() { Some(global.process_manager) } else { diff --git a/src/rollback.rs b/src/rollback.rs new file mode 100644 index 0000000..694159b --- /dev/null +++ b/src/rollback.rs @@ -0,0 +1,101 @@ +/// Undo stack for partially-completed `bring_up` operations. +/// +/// Every provisioning step registers its inverse right after it succeeds. If +/// `bring_up` returns early (any `?`), the guard runs the registered steps in +/// reverse order on drop, so whatever was created — containers, overlay files, +/// worktree, spawned processes — is torn down again. Call `disarm()` once the +/// session is fully up to keep everything. +/// +/// This replaces hand-rolled cleanup blocks at each failure site, which had +/// drifted apart between modes and skipped several failure paths entirely. +pub struct Rollback { + steps: Vec>, + armed: bool, +} + +impl Rollback { + pub fn new() -> Self { + Self { + steps: vec![], + armed: true, + } + } + + /// Register the inverse of a step that just succeeded. + pub fn push(&mut self, undo: F) { + self.steps.push(Box::new(undo)); + } + + /// The operation completed — keep everything that was created. + pub fn disarm(&mut self) { + self.armed = false; + } +} + +impl Default for Rollback { + fn default() -> Self { + Self::new() + } +} + +impl Drop for Rollback { + fn drop(&mut self) { + if !self.armed { + return; + } + while let Some(undo) = self.steps.pop() { + undo(); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::cell::RefCell; + use std::rc::Rc; + + #[test] + fn runs_steps_in_reverse_order_on_drop() { + let order: Rc>> = Rc::new(RefCell::new(vec![])); + { + let mut rb = Rollback::new(); + let o1 = Rc::clone(&order); + rb.push(move || o1.borrow_mut().push(1)); + let o2 = Rc::clone(&order); + rb.push(move || o2.borrow_mut().push(2)); + let o3 = Rc::clone(&order); + rb.push(move || o3.borrow_mut().push(3)); + } + assert_eq!(*order.borrow(), vec![3, 2, 1], "undo must run LIFO"); + } + + #[test] + fn disarm_skips_all_steps() { + let ran = Rc::new(RefCell::new(false)); + { + let mut rb = Rollback::new(); + let r = Rc::clone(&ran); + rb.push(move || *r.borrow_mut() = true); + rb.disarm(); + } + assert!(!*ran.borrow(), "disarmed guard must not run undo steps"); + } + + #[test] + fn empty_guard_drops_cleanly() { + let _ = Rollback::new(); + } + + #[test] + fn steps_pushed_after_disarm_do_not_run() { + let ran = Rc::new(RefCell::new(false)); + { + let mut rb = Rollback::new(); + rb.disarm(); + let r = Rc::clone(&ran); + rb.push(move || *r.borrow_mut() = true); + } + assert!(!*ran.borrow()); + } +}