diff --git a/CHANGELOG.md b/CHANGELOG.md index 326149a..60be72c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,33 @@ Format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). --- +## [Unreleased] + +### Added +- Pending sessions: `up`/`down` reserve the session and release the state lock during provisioning/teardown, so parallel sessions never serialize on image pulls or hooks. `ls` shows `(pending)` (and `"status"` in `--json`), warns about entries pending >15 minutes after a crashed command, and `down ` recovers them. Concurrent finalizes are guarded by per-operation ownership tokens — deleted sessions are never resurrected by a racing command. +- Rollback-on-failure is enforced by an undo-stack guard: any `bring_up` failure tears down exactly what was created (containers, overlays, worktree, spawned services), in reverse order. Failed re-ups of existing sessions keep their data volumes. +- PID files record the process start token; a recycled PID is never signaled, never attributed by `whose-pid`, and reports as down. Containers are matched by compose project label instead of name substrings. +- tmux services run as their window's own process with recorded pane PIDs; commands that exit within ~1.5s fail `ecluse up` with the exit status and last output. Dead panes are kept for inspection; window `shell` is a plain shell with the session env. +- `publish_primary` on `[[services]]`: explicit control over primary-port publication (the implicit "extra_port with container_port suppresses the primary" rule is deprecated; `validate` warns). +- Docker-gated end-to-end test suite (`tests/docker_e2e.rs`): lifecycle, rollback, multi-compose teardown, container mode — runs wherever a docker daemon is available, skips elsewhere. + +### Changed +- `extra_ports` use the same per-slot spacing as primary ports (`base + slot*slot_stride`) and are probed before startup: occupied extras are a hard error under `strict_port` and a warning otherwise. With `slot_stride = 1` (the default) allocations are unchanged. +- `up --force` only kills processes owned by the session (verified via pid files/tmux panes), with TERM→KILL escalation; unowned listeners produce a warning naming the PID. +- Teardown dispatches on the session's recorded mode, so changing `mode` in `.ecluse.toml` no longer strands containers of existing sessions. +- `ModeHandler::bring_up` takes a `BringUpRequest`; container/hybrid share the docker-startup, worktree, port-allocation and spawn building blocks (was ~100 duplicated lines). +- Session state records explicit `(compose, overlay)` pairs; teardown no longer reconstructs compose paths from overlay filenames (ambiguous for hyphenated slugs). Legacy state files still tear down via the old path. +- Branch handling: `up ` tracks `origin/` when it only exists on the remote (instead of forking a same-named branch from HEAD), and refuses to resume a slug that belongs to a different branch. +- The tmux env preamble is per-slug (`.ecluse/preambles/.sh`); a shared file leaked ports between parallel sessions. + +### Fixed +- `kill` paths signal the whole process group with TERM→KILL grace — service children no longer survive `ecluse down` holding their port. +- A failing service spawn cleans up the services already spawned instead of orphaning them. +- `ecluse sync` no longer accepts any directory whose path merely contains the slug; the cwd must be a linked worktree of the repo. It also refuses sessions that are mid-operation. +- `worktree remove` verifies the directory is actually gone instead of treating `git worktree prune` success as removal success. +- Shared-lock acquisition reads the real state when `state.lock` is missing (previously reported "no sessions" while sessions ran); `ls` no longer panics on malformed timestamps; `.env.ecluse` is parsed by a single shared parser everywhere. +- Examples and README migrated off the deprecated `on_up`/`on_down` hook names (the README example also ran migrations in `pre_up`, before any env exists; now `post_up`). + ## [0.2.17] — 2026-06-10 ### Added diff --git a/docs/src/limits.md b/docs/src/limits.md index b1fe3ad..3a9c0a6 100644 --- a/docs/src/limits.md +++ b/docs/src/limits.md @@ -110,3 +110,13 @@ If you interrupt `ecluse up` mid-flight, partial state may be left behind. Use ` ## Platform support macOS and Linux. Windows is not supported. + +## Pending sessions + +`up` and `down` reserve the session with a *pending* marker and release the +state lock while they work, so parallel sessions never serialize on slow +provisioning. `ecluse ls` shows `(pending)` during the operation and warns +when a pending entry is older than 15 minutes (the owning command crashed); +`ecluse down ` takes such a session over and cleans it up. Commands that +need a settled session (`up`, `env`, `status`, `shell`, `sync`) refuse pending +sessions with an `operation in progress` error. diff --git a/examples/fastapi-hybrid/.ecluse.toml b/examples/fastapi-hybrid/.ecluse.toml index ba16d71..2f51c72 100644 --- a/examples/fastapi-hybrid/.ecluse.toml +++ b/examples/fastapi-hybrid/.ecluse.toml @@ -24,4 +24,4 @@ run = "docker" base_port = 6379 [hooks] -on_up = "alembic upgrade head" +post_up = "alembic upgrade head" diff --git a/examples/fastapi-hybrid/README.md b/examples/fastapi-hybrid/README.md index ea38e64..402714f 100644 --- a/examples/fastapi-hybrid/README.md +++ b/examples/fastapi-hybrid/README.md @@ -27,7 +27,7 @@ Postgres runs in a Docker container managed by ecluse. The FastAPI process and V ## Hooks -- `on_up`: runs `alembic upgrade head` to apply pending migrations against the slot's database. +- `post_up`: runs `alembic upgrade head` to apply pending migrations against the slot's database. ## Usage diff --git a/examples/go-hybrid/.ecluse.toml b/examples/go-hybrid/.ecluse.toml index 45ee37e..9ac6c87 100644 --- a/examples/go-hybrid/.ecluse.toml +++ b/examples/go-hybrid/.ecluse.toml @@ -14,4 +14,4 @@ run = "docker" base_port = 5432 [hooks] -on_up = "migrate -path ./migrations -database \"postgres://localhost:$ECLUSE_POSTGRES_PORT/myapp\" up" +post_up = "migrate -path ./migrations -database \"postgres://localhost:$ECLUSE_POSTGRES_PORT/myapp\" up" diff --git a/examples/go-hybrid/README.md b/examples/go-hybrid/README.md index 2a33e76..fe7f2d7 100644 --- a/examples/go-hybrid/README.md +++ b/examples/go-hybrid/README.md @@ -25,7 +25,7 @@ Postgres runs in a Docker container managed by ecluse. The Go binary runs native ## Hooks -- `on_up`: runs `migrate -path ./migrations -database "$DATABASE_URL" up` to apply pending migrations. +- `post_up`: runs `migrate -path ./migrations -database "$DATABASE_URL" up` to apply pending migrations. Requires the [`migrate` CLI](https://github.com/golang-migrate/migrate) to be installed (`brew install golang-migrate`). diff --git a/examples/k3d/.ecluse.toml b/examples/k3d/.ecluse.toml index d09786a..f24d1ab 100644 --- a/examples/k3d/.ecluse.toml +++ b/examples/k3d/.ecluse.toml @@ -18,5 +18,5 @@ name = "https" base_port = 8443 [hooks] -on_up = "k3d cluster create ecluse-$ECLUSE_SLUG --port \"$ECLUSE_HTTP_PORT:80@loadbalancer\" --port \"$ECLUSE_HTTPS_PORT:443@loadbalancer\"" -on_down = "k3d cluster delete ecluse-$ECLUSE_SLUG" +post_up = "k3d cluster create ecluse-$ECLUSE_SLUG --port \"$ECLUSE_HTTP_PORT:80@loadbalancer\" --port \"$ECLUSE_HTTPS_PORT:443@loadbalancer\"" +pre_down = "k3d cluster delete ecluse-$ECLUSE_SLUG" diff --git a/examples/k3d/README.md b/examples/k3d/README.md index b8d2fc2..9dffc0a 100644 --- a/examples/k3d/README.md +++ b/examples/k3d/README.md @@ -24,8 +24,8 @@ brew install k3d helm helmfile ## Hooks -- `on_up`: `k3d cluster create ecluse-$ECLUSE_SLUG --port "$PORT:80@loadbalancer"` — provisions a fresh k3s cluster. -- `on_down`: `k3d cluster delete ecluse-$ECLUSE_SLUG` — destroys the cluster and all its resources. +- `post_up`: `k3d cluster create ecluse-$ECLUSE_SLUG --port "$PORT:80@loadbalancer"` — provisions a fresh k3s cluster. +- `pre_down`: `k3d cluster delete ecluse-$ECLUSE_SLUG` — destroys the cluster and all its resources. ## Usage diff --git a/examples/mongo-hybrid/.ecluse.toml b/examples/mongo-hybrid/.ecluse.toml index d516b0e..3d0ec41 100644 --- a/examples/mongo-hybrid/.ecluse.toml +++ b/examples/mongo-hybrid/.ecluse.toml @@ -14,4 +14,4 @@ run = "docker" base_port = 27017 [hooks] -on_up = "npm run db:seed" +post_up = "npm run db:seed" diff --git a/examples/mongo-hybrid/README.md b/examples/mongo-hybrid/README.md index 285ee15..1de36c4 100644 --- a/examples/mongo-hybrid/README.md +++ b/examples/mongo-hybrid/README.md @@ -35,7 +35,7 @@ const mongoUrl = `mongodb://localhost:${process.env.ECLUSE_MONGODB_PORT}/${proce ## Hooks -- `on_up`: runs `npm run db:seed` (optional) to seed initial data for this slot. +- `post_up`: runs `npm run db:seed` (optional) to seed initial data for this slot. ## Usage diff --git a/examples/nextjs-hybrid/.ecluse.toml b/examples/nextjs-hybrid/.ecluse.toml index 503cf6f..9d47fa3 100644 --- a/examples/nextjs-hybrid/.ecluse.toml +++ b/examples/nextjs-hybrid/.ecluse.toml @@ -14,4 +14,4 @@ run = "docker" base_port = 5432 [hooks] -on_up = "npx prisma migrate deploy" +post_up = "npx prisma migrate deploy" diff --git a/examples/nextjs-hybrid/README.md b/examples/nextjs-hybrid/README.md index 323c9c4..ceb89a3 100644 --- a/examples/nextjs-hybrid/README.md +++ b/examples/nextjs-hybrid/README.md @@ -25,7 +25,7 @@ Postgres runs in a Docker container managed by ecluse. Next.js runs natively. Ea ## Hooks -- `on_up`: runs `npx prisma migrate deploy` against the slot's database. +- `post_up`: runs `npx prisma migrate deploy` against the slot's database. ## Usage diff --git a/examples/node-container/README.md b/examples/node-container/README.md index 6987107..84cc621 100644 --- a/examples/node-container/README.md +++ b/examples/node-container/README.md @@ -2,7 +2,7 @@ Node.js + Postgres fully containerized. Everything runs in Docker — no native processes. -ecluse manages a separate compose project per worktree, with all ports offset by slot. The Docker image entrypoint runs `prisma migrate deploy` before starting the server, so no `on_up` hook is required. +ecluse manages a separate compose project per worktree, with all ports offset by slot. The Docker image entrypoint runs `prisma migrate deploy` before starting the server, so no `post_up` hook is required. ## Mode diff --git a/examples/node-hybrid/.ecluse.toml b/examples/node-hybrid/.ecluse.toml index dc33b90..1d7a258 100644 --- a/examples/node-hybrid/.ecluse.toml +++ b/examples/node-hybrid/.ecluse.toml @@ -24,4 +24,4 @@ run = "docker" base_port = 6379 [hooks] -on_up = "npm run db:migrate" +post_up = "npm run db:migrate" diff --git a/examples/node-hybrid/README.md b/examples/node-hybrid/README.md index b273598..d607516 100644 --- a/examples/node-hybrid/README.md +++ b/examples/node-hybrid/README.md @@ -26,7 +26,7 @@ Postgres runs in a Docker container managed by ecluse. The Express API and React ## Hooks -- `on_up`: runs `npm run db:migrate` (wraps `prisma migrate deploy`) to apply pending migrations against the slot's database. +- `post_up`: runs `npm run db:migrate` (wraps `prisma migrate deploy`) to apply pending migrations against the slot's database. ## Usage diff --git a/examples/rails-hybrid/.ecluse.toml b/examples/rails-hybrid/.ecluse.toml index 85f9757..83ab16e 100644 --- a/examples/rails-hybrid/.ecluse.toml +++ b/examples/rails-hybrid/.ecluse.toml @@ -24,5 +24,5 @@ run = "docker" base_port = 6379 [hooks] -on_up = "bin/rails db:prepare" -on_down = "bin/rails db:drop DISABLE_DATABASE_ENVIRONMENT_CHECK=1" +post_up = "bin/rails db:prepare" +pre_down = "bin/rails db:drop DISABLE_DATABASE_ENVIRONMENT_CHECK=1" diff --git a/examples/rails-hybrid/README.md b/examples/rails-hybrid/README.md index 9b3cb45..6996dc5 100644 --- a/examples/rails-hybrid/README.md +++ b/examples/rails-hybrid/README.md @@ -28,8 +28,8 @@ Data services (postgres, redis) run in Docker containers managed by ecluse. The ## Hooks -- `on_up`: runs `bin/rails db:prepare` — creates and migrates the database for this slot. -- `on_down`: runs `bin/rails db:drop` — drops the database before tearing down. +- `post_up`: runs `bin/rails db:prepare` — creates and migrates the database for this slot. +- `pre_down`: runs `bin/rails db:drop` — drops the database before tearing down. ## Usage diff --git a/examples/rails-monorepo/.ecluse.toml b/examples/rails-monorepo/.ecluse.toml index d9ff237..f7ed61b 100644 --- a/examples/rails-monorepo/.ecluse.toml +++ b/examples/rails-monorepo/.ecluse.toml @@ -29,5 +29,5 @@ run = "docker" base_port = 6379 [hooks] -on_up = "bin/rails db:prepare" -on_down = "bin/rails db:drop DISABLE_DATABASE_ENVIRONMENT_CHECK=1" +post_up = "bin/rails db:prepare" +pre_down = "bin/rails db:drop DISABLE_DATABASE_ENVIRONMENT_CHECK=1" diff --git a/examples/t3-host/.ecluse.toml b/examples/t3-host/.ecluse.toml index 884105b..40508be 100644 --- a/examples/t3-host/.ecluse.toml +++ b/examples/t3-host/.ecluse.toml @@ -8,5 +8,5 @@ name = "app" base_port = 3000 [hooks] -on_up = "npx prisma migrate deploy" -on_down = "npx prisma migrate reset --force" +post_up = "npx prisma migrate deploy" +pre_down = "npx prisma migrate reset --force" diff --git a/examples/t3-host/README.md b/examples/t3-host/README.md index 305bb95..bf5c49d 100644 --- a/examples/t3-host/README.md +++ b/examples/t3-host/README.md @@ -20,8 +20,8 @@ Each slot gets a distinct `PORT` so multiple worktrees can serve simultaneously. ## Hooks -- `on_up`: runs `npx prisma migrate deploy` to apply migrations. -- `on_down`: runs `npx prisma migrate reset --force` to wipe the slot's database on teardown (optional — remove if you want to keep data). +- `post_up`: runs `npx prisma migrate deploy` to apply migrations. +- `pre_down`: runs `npx prisma migrate reset --force` to wipe the slot's database on teardown (optional — remove if you want to keep data). ## .env setup diff --git a/examples/t3-monorepo/.ecluse.toml b/examples/t3-monorepo/.ecluse.toml index 4ed6281..410f8cd 100644 --- a/examples/t3-monorepo/.ecluse.toml +++ b/examples/t3-monorepo/.ecluse.toml @@ -34,5 +34,5 @@ run = "docker" base_port = 6379 [hooks] -on_up = "npx prisma migrate deploy" -on_down = "npx prisma migrate reset --force" +post_up = "npx prisma migrate deploy" +pre_down = "npx prisma migrate reset --force" diff --git a/skills/ecluse/SKILL.md b/skills/ecluse/SKILL.md index bf65b5f..910b7ac 100644 --- a/skills/ecluse/SKILL.md +++ b/skills/ecluse/SKILL.md @@ -692,6 +692,49 @@ RUST_LOG=debug ecluse up feat-foo --- +## Concurrency and recovery + +`ecluse up`/`down` no longer hold the state lock while provisioning or tearing +down — sessions are reserved with a **pending** marker instead, so parallel +agents never block on each other's slow image pulls or hooks. + +What this means for you: + +- `ecluse ls` shows ` (pending)` while an up/down is in flight + (`"status": "pending"` in `ls --json`). Other read commands keep working. +- Running `up`, `env`, `status`, `shell`, or `sync` against a pending session + fails with `operation in progress`. Wait for the owning command, or — if it + crashed — run `ecluse down ` to take the session over and clean it up. +- `ls` warns when a session has been pending for more than 15 minutes; that + means the owning command died and the slot is leaked until you `down` it. +- If a session is removed (`down`, `flush`) while another command was still + provisioning it, the loser detects the takeover, tears down whatever it + created, and exits non-zero. State never resurrects deleted sessions. + +### Service identity + +Pid files record the process **start token** alongside the PID. A recycled PID +(same number, different process) is never killed, never attributed by +`whose-pid`, and reports as down in `status`. Containers are matched by their +compose project label, never by name substrings. + +### tmux sessions + +Services run as their tmux window's own process. A command that exits within +~1.5 s fails `ecluse up` with the exit status and last output — a "ready" +session means the services actually started. Dead panes are kept on screen +(`remain-on-exit`) so you can attach and read the error; window `shell` is a +plain shell with the session env loaded. Service commands must be +long-running: a command like `echo done` is treated as an instant failure +under tmux. + +### --force and unowned ports + +`ecluse up --force` only kills processes that **belong to the session** +(verified via pid files / tmux panes). A process squatting the session's port +that ecluse does not own produces a warning naming the PID instead of a kill — +inspect it with `ecluse whose-pid ` and kill it manually if intended. + ## Limits What ecluse intentionally does not do in v0. These are design decisions, not bugs. diff --git a/skills/ecluse/examples/fastapi-hybrid/.ecluse.toml b/skills/ecluse/examples/fastapi-hybrid/.ecluse.toml index 38344b2..7559ebd 100644 --- a/skills/ecluse/examples/fastapi-hybrid/.ecluse.toml +++ b/skills/ecluse/examples/fastapi-hybrid/.ecluse.toml @@ -24,4 +24,4 @@ run = "docker" base_port = 6379 [hooks] -on_up = "alembic upgrade head" +post_up = "alembic upgrade head" diff --git a/skills/ecluse/examples/k3d/.ecluse.toml b/skills/ecluse/examples/k3d/.ecluse.toml index e9648be..1812d7e 100644 --- a/skills/ecluse/examples/k3d/.ecluse.toml +++ b/skills/ecluse/examples/k3d/.ecluse.toml @@ -16,5 +16,5 @@ name = "https" base_port = 8443 [hooks] -on_up = "k3d cluster create ecluse-$ECLUSE_SLUG --port \"$ECLUSE_HTTP_PORT:80@loadbalancer\" --port \"$ECLUSE_HTTPS_PORT:443@loadbalancer\"" -on_down = "k3d cluster delete ecluse-$ECLUSE_SLUG" +post_up = "k3d cluster create ecluse-$ECLUSE_SLUG --port \"$ECLUSE_HTTP_PORT:80@loadbalancer\" --port \"$ECLUSE_HTTPS_PORT:443@loadbalancer\"" +pre_down = "k3d cluster delete ecluse-$ECLUSE_SLUG" diff --git a/skills/ecluse/examples/t3-host/.ecluse.toml b/skills/ecluse/examples/t3-host/.ecluse.toml index 8c05647..3d39996 100644 --- a/skills/ecluse/examples/t3-host/.ecluse.toml +++ b/skills/ecluse/examples/t3-host/.ecluse.toml @@ -8,5 +8,5 @@ name = "app" base_port = 3000 [hooks] -on_up = "npx prisma migrate deploy" -on_down = "npx prisma migrate reset --force" +post_up = "npx prisma migrate deploy" +pre_down = "npx prisma migrate reset --force" diff --git a/skills/ecluse/examples/t3-monorepo/.ecluse.toml b/skills/ecluse/examples/t3-monorepo/.ecluse.toml index 4ed6281..410f8cd 100644 --- a/skills/ecluse/examples/t3-monorepo/.ecluse.toml +++ b/skills/ecluse/examples/t3-monorepo/.ecluse.toml @@ -34,5 +34,5 @@ run = "docker" base_port = 6379 [hooks] -on_up = "npx prisma migrate deploy" -on_down = "npx prisma migrate reset --force" +post_up = "npx prisma migrate deploy" +pre_down = "npx prisma migrate reset --force" diff --git a/src/compose.rs b/src/compose.rs index 4298a28..d7c7d0d 100644 --- a/src/compose.rs +++ b/src/compose.rs @@ -553,6 +553,7 @@ mod tests { port_env: vec![], debug_port: None, extra_ports: vec![], + publish_primary: None, host_port: None, } } diff --git a/src/config.rs b/src/config.rs index 79f5d93..383f987 100644 --- a/src/config.rs +++ b/src/config.rs @@ -222,6 +222,12 @@ pub struct ServiceConfig { /// ``` #[serde(default, skip_serializing_if = "Vec::is_empty")] pub extra_ports: Vec, + /// Whether the service's primary port is published to the host. + /// Defaults to the legacy implicit rule: published unless any extra_port + /// sets `container_port`. Set it explicitly — the implicit rule is + /// deprecated (`ecluse validate` warns). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub publish_primary: Option, /// Host-side port range base for docker services. When set, the overlay publishes /// `(host_port+slot) → base_port` instead of `(base_port+slot) → base_port`. /// Defaults to `base_port` when omitted — zero behavior change for existing configs. @@ -283,6 +289,13 @@ impl ServiceConfig { .saturating_add((slot as u16).saturating_mul(stride)) } + /// Host port for a secondary (`extra_ports`) allocation — same per-slot + /// spacing rule as primary ports so `slot_stride` means one thing. + pub fn extra_port_for_slot(base: u16, slot: u8, slot_stride: u8) -> u16 { + let stride = slot_stride.max(1) as u16; + base.saturating_add((slot as u16).saturating_mul(stride)) + } + /// Returns all extra port allocations as `(host_base_port, env_var_name)` pairs. /// Merges `extra_ports` (new) with `debug_port` (legacy) so all callers go through one path. pub fn all_extra_ports(&self) -> Vec<(u16, String)> { @@ -309,13 +322,18 @@ impl ServiceConfig { .collect() } - /// True when any extra_port has an explicit `container_port` set. In that case the - /// primary `base_port` of this service should not be published to the host — the - /// extra_ports entries are the only host-side publishes. + /// True when the primary `base_port` must not be published to the host + /// (only the extra_ports entries are). `publish_primary` wins when set; + /// otherwise the legacy implicit rule applies: suppressed when any + /// extra_port has an explicit `container_port`. pub fn suppress_primary_publish(&self) -> bool { - self.extra_ports - .iter() - .any(|ep| ep.container_port.is_some()) + match self.publish_primary { + Some(publish) => !publish, + None => self + .extra_ports + .iter() + .any(|ep| ep.container_port.is_some()), + } } } @@ -362,7 +380,11 @@ pub struct Config { /// side affect both. Good for shared secrets that should stay in sync. /// - `copy`: file is copied from root once on first `ecluse up`; future edits in the /// worktree stay local. Good for per-worktree feature flags / overrides. - #[serde(default = "default_inherit_env", skip_serializing_if = "Vec::is_empty")] + /// + /// Always serialized: an explicit `[]` opt-out must survive a save/load + /// round-trip — skipping empty on write would deserialize back to the + /// default and silently undo the opt-out. + #[serde(default = "default_inherit_env")] pub inherit_env: Vec, } @@ -772,6 +794,7 @@ base_port = 5432 port_env: vec![], debug_port: None, extra_ports: vec![], + publish_primary: None, host_port: None, }; assert_eq!(svc.port(1, 1), 8001); @@ -823,6 +846,7 @@ base_port = 5432 port_env: vec![], debug_port: None, extra_ports: vec![], + publish_primary: None, host_port: None, }, ServiceConfig { @@ -834,6 +858,7 @@ base_port = 5432 port_env: vec![], debug_port: None, extra_ports: vec![], + publish_primary: None, host_port: None, }, ], @@ -1035,6 +1060,7 @@ base_port = 3000 port_env: vec![], debug_port: None, extra_ports: vec![], + publish_primary: None, host_port: None, }; assert_eq!(svc.host_port_base(), 5432); @@ -1052,6 +1078,7 @@ base_port = 3000 port_env: vec![], debug_port: None, extra_ports: vec![], + publish_primary: None, host_port: Some(11532), }; assert_eq!(svc.host_port_base(), 11532); @@ -1070,6 +1097,7 @@ base_port = 3000 port_env: vec![], debug_port: None, extra_ports: vec![], + publish_primary: None, host_port: None, }; assert_eq!(svc.port(1, 10), 3010); @@ -1088,6 +1116,7 @@ base_port = 3000 port_env: vec![], debug_port: None, extra_ports: vec![], + publish_primary: None, host_port: None, }; // Stride of 0 would zero out the slot offset; we clamp to 1 to keep ports unique. @@ -1111,6 +1140,63 @@ base_port = 3000 assert_eq!(config.slot_stride, 1); } + // ── inherit_env round-trip ──────────────────────────────────────────────── + + // An explicit opt-out must survive save → load; skipping the empty vec on + // write made it deserialize back to the default. + #[test] + fn inherit_env_empty_opt_out_survives_roundtrip() { + let dir = TempDir::new().unwrap(); + write_toml(&dir, "mode = \"host\"\ninherit_env = []\n"); + let config = Config::load(dir.path()).unwrap(); + assert!(config.inherit_env.is_empty()); + + config.save(dir.path()).unwrap(); + let reloaded = Config::load(dir.path()).unwrap(); + assert!( + reloaded.inherit_env.is_empty(), + "explicit [] opt-out must not come back as the default" + ); + } + + // ── extra ports & publish_primary ───────────────────────────────────────── + + #[test] + fn extra_port_for_slot_honors_stride() { + assert_eq!(ServiceConfig::extra_port_for_slot(9000, 1, 1), 9001); + assert_eq!(ServiceConfig::extra_port_for_slot(9000, 2, 1), 9002); + assert_eq!(ServiceConfig::extra_port_for_slot(9000, 1, 10), 9010); + assert_eq!(ServiceConfig::extra_port_for_slot(9000, 3, 10), 9030); + } + + #[test] + fn publish_primary_overrides_implicit_suppression() { + let mut svc = ServiceConfig { + name: "db".into(), + base_port: 5432, + run: ServiceRun::Docker, + compose: None, + command: None, + port_env: vec![], + debug_port: None, + extra_ports: vec![ExtraPort { + base_port: 11532, + port_env: "PGPORT".into(), + container_port: Some(5432), + }], + publish_primary: None, + host_port: None, + }; + // Implicit rule: container_port set → suppressed. + assert!(svc.suppress_primary_publish()); + // Explicit publish_primary wins in both directions. + svc.publish_primary = Some(true); + assert!(!svc.suppress_primary_publish()); + svc.publish_primary = Some(false); + svc.extra_ports.clear(); + assert!(svc.suppress_primary_publish()); + } + // ── inherit_env ─────────────────────────────────────────────────────────── #[test] diff --git a/src/env.rs b/src/env.rs index f911c45..b2d740b 100644 --- a/src/env.rs +++ b/src/env.rs @@ -10,18 +10,18 @@ use crate::config::ServiceConfig; /// `native_ports` maps service name → host port for native services. /// The first entry also sets `PORT` (primary service alias for framework compatibility). /// `docker_ports` maps service name → host port for docker services (ECLUSE__PORT only). -/// `native_configs` is used to apply `port_env` aliases and `extra_ports` for native services. -/// `docker_configs` is used to apply `extra_ports` (and `port_env`) for docker services. +/// `service_configs` supplies `port_env` aliases and `extra_ports` for any +/// service appearing in either port list (names are unique across kinds). /// /// `ECLUSE_SLOT`, `ECLUSE_SLUG`, `ECLUSE_MODE` are always present. pub fn build_env( slot: u8, + slot_stride: u8, slug: &str, mode: &str, native_ports: &IndexMap, docker_ports: &[(String, u16)], - native_configs: &[&ServiceConfig], - docker_configs: &[&ServiceConfig], + service_configs: &[&ServiceConfig], ) -> HashMap { let mut env = HashMap::new(); env.insert("ECLUSE_SLOT".into(), slot.to_string()); @@ -40,14 +40,14 @@ pub fn build_env( } // port_env aliases: set each declared var name to this service's port - if let Some(svc) = native_configs.iter().find(|s| s.name == *name) { + if let Some(svc) = service_configs.iter().find(|s| s.name == *name) { for alias in &svc.port_env { env.insert(alias.clone(), port.to_string()); } - // extra_ports (and legacy debug_port): emit each as port_env = base_port + slot + // extra_ports (and legacy debug_port): same per-slot spacing as primaries for (base, env_key) in svc.all_extra_ports() { - let port = base.saturating_add(slot as u16); + let port = ServiceConfig::extra_port_for_slot(base, slot, slot_stride); env.insert(env_key, port.to_string()); } } @@ -58,13 +58,13 @@ pub fn build_env( let key = service_env_key(name); env.insert(format!("ECLUSE_{}_PORT", key), port.to_string()); - if let Some(svc) = docker_configs.iter().find(|s| s.name == *name) { + if let Some(svc) = service_configs.iter().find(|s| s.name == *name) { for alias in &svc.port_env { env.insert(alias.clone(), port.to_string()); } for (base, env_key) in svc.all_extra_ports() { - let extra_port = base.saturating_add(slot as u16); + let extra_port = ServiceConfig::extra_port_for_slot(base, slot, slot_stride); env.insert(env_key, extra_port.to_string()); } } @@ -87,7 +87,7 @@ mod tests { #[test] fn basic_env_vars_always_present() { - let env = build_env(2, "my-task", "host", &IndexMap::new(), &[], &[], &[]); + let env = build_env(2, 1, "my-task", "host", &IndexMap::new(), &[], &[]); assert_eq!(env["ECLUSE_SLOT"], "2"); assert_eq!(env["ECLUSE_SLUG"], "my-task"); assert_eq!(env["ECLUSE_MODE"], "host"); @@ -97,7 +97,7 @@ mod tests { #[test] fn first_native_port_sets_port_alias() { let np = ports(&[("api", 8001), ("worker", 8002)]); - let env = build_env(1, "feat", "host", &np, &[], &[], &[]); + let env = build_env(1, 1, "feat", "host", &np, &[], &[]); assert_eq!(env["PORT"], "8001"); assert_eq!(env["ECLUSE_API_PORT"], "8001"); assert_eq!(env["ECLUSE_WORKER_PORT"], "8002"); @@ -105,20 +105,20 @@ mod tests { #[test] fn no_port_when_no_native_ports() { - let env = build_env(1, "feat", "container", &IndexMap::new(), &[], &[], &[]); + let env = build_env(1, 1, "feat", "container", &IndexMap::new(), &[], &[]); assert!(!env.contains_key("PORT")); } #[test] fn docker_ports_set_port_vars_no_port_alias() { let env = build_env( + 1, 1, "feat", "hybrid", &IndexMap::new(), &[("postgres".into(), 5433), ("redis".into(), 6380)], &[], - &[], ); assert_eq!(env["ECLUSE_POSTGRES_PORT"], "5433"); assert_eq!(env["ECLUSE_REDIS_PORT"], "6380"); @@ -129,13 +129,13 @@ mod tests { fn native_and_docker_ports_coexist() { let np = ports(&[("api", 8001)]); let env = build_env( + 1, 1, "feat", "hybrid", &np, &[("postgres".into(), 5433)], &[], - &[], ); assert_eq!(env["PORT"], "8001"); assert_eq!(env["ECLUSE_API_PORT"], "8001"); @@ -153,7 +153,7 @@ mod tests { fn write_env_file_creates_sorted_file() { let dir = tempfile::TempDir::new().unwrap(); let np = ports(&[("api", 8001)]); - let env = build_env(1, "feat", "host", &np, &[], &[], &[]); + let env = build_env(1, 1, "feat", "host", &np, &[], &[]); write_env_file(dir.path(), &env).unwrap(); let content = std::fs::read_to_string(dir.path().join(".env.ecluse")).unwrap(); let lines: Vec<&str> = content.lines().collect(); @@ -169,7 +169,7 @@ mod tests { fn fallback_app_service_port() { // Simulate fallback: single "app" service at 3000 + slot let np = ports(&[("app", 3001)]); - let env = build_env(1, "feat", "host", &np, &[], &[], &[]); + let env = build_env(1, 1, "feat", "host", &np, &[], &[]); assert_eq!(env["PORT"], "3001"); assert_eq!(env["ECLUSE_APP_PORT"], "3001"); } @@ -180,13 +180,13 @@ mod tests { np.insert("api".into(), 8001); np.insert("worker".into(), 8002); np.insert("frontend".into(), 3001); - let env = build_env(1, "s", "host", &np, &[], &[], &[]); + let env = build_env(1, 1, "s", "host", &np, &[], &[]); assert_eq!(env["PORT"], "8001"); } #[test] fn build_env_slot_zero_and_empty_slug() { - let env = build_env(0, "", "host", &IndexMap::new(), &[], &[], &[]); + let env = build_env(0, 1, "", "host", &IndexMap::new(), &[], &[]); assert_eq!(env["ECLUSE_SLOT"], "0"); assert_eq!(env["ECLUSE_SLUG"], ""); } @@ -194,9 +194,9 @@ mod tests { #[test] fn write_env_file_overwrites_existing() { let dir = tempfile::TempDir::new().unwrap(); - let env1 = build_env(1, "a", "host", &IndexMap::new(), &[], &[], &[]); + let env1 = build_env(1, 1, "a", "host", &IndexMap::new(), &[], &[]); write_env_file(dir.path(), &env1).unwrap(); - let env2 = build_env(2, "b", "container", &IndexMap::new(), &[], &[], &[]); + let env2 = build_env(2, 1, "b", "container", &IndexMap::new(), &[], &[]); write_env_file(dir.path(), &env2).unwrap(); let content = std::fs::read_to_string(dir.path().join(".env.ecluse")).unwrap(); assert!(content.contains("ECLUSE_SLUG=b")); @@ -206,7 +206,7 @@ mod tests { #[test] fn write_env_file_ends_with_newline() { let dir = tempfile::TempDir::new().unwrap(); - let env = build_env(1, "x", "host", &IndexMap::new(), &[], &[], &[]); + let env = build_env(1, 1, "x", "host", &IndexMap::new(), &[], &[]); write_env_file(dir.path(), &env).unwrap(); let content = std::fs::read_to_string(dir.path().join(".env.ecluse")).unwrap(); assert!(content.ends_with('\n')); @@ -215,20 +215,20 @@ mod tests { #[test] fn service_env_key_mixed_separators() { let env = build_env( + 1, 1, "s", "hybrid", &IndexMap::new(), &[("my-db.local".into(), 5432)], &[], - &[], ); assert_eq!(env["ECLUSE_MY_DB_LOCAL_PORT"], "5432"); } #[test] fn no_offset_env_var() { - let env = build_env(1, "feat", "host", &IndexMap::new(), &[], &[], &[]); + let env = build_env(1, 1, "feat", "host", &IndexMap::new(), &[], &[]); assert!(!env.contains_key("ECLUSE_OFFSET")); } @@ -244,10 +244,11 @@ mod tests { port_env: vec!["DJANGO_PORT".into()], debug_port: None, extra_ports: vec![], + publish_primary: None, host_port: None, }; let np = ports(&[("api", 3001)]); - let env = build_env(1, "s", "host", &np, &[], &[&svc], &[]); + let env = build_env(1, 1, "s", "host", &np, &[], &[&svc]); assert_eq!(env["DJANGO_PORT"], "3001"); assert_eq!(env["ECLUSE_API_PORT"], "3001"); } @@ -264,10 +265,11 @@ mod tests { port_env: vec!["DJANGO_PORT".into(), "APP_PORT".into()], debug_port: None, extra_ports: vec![], + publish_primary: None, host_port: None, }; let np = ports(&[("api", 3001)]); - let env = build_env(1, "s", "host", &np, &[], &[&svc], &[]); + let env = build_env(1, 1, "s", "host", &np, &[], &[&svc]); assert_eq!(env["DJANGO_PORT"], "3001"); assert_eq!(env["APP_PORT"], "3001"); } @@ -284,10 +286,11 @@ mod tests { port_env: vec![], debug_port: None, extra_ports: vec![], + publish_primary: None, host_port: None, }; let np = ports(&[("api", 3001)]); - let env = build_env(1, "s", "host", &np, &[], &[&svc], &[]); + let env = build_env(1, 1, "s", "host", &np, &[], &[&svc]); assert_eq!(env.get("ECLUSE_API_PORT").map(|s| s.as_str()), Some("3001")); assert!(!env.contains_key("DJANGO_PORT")); } @@ -296,11 +299,11 @@ mod tests { fn per_service_base_port_slot_arithmetic() { // api base_port=8000, slot=1 → 8001; slot=2 → 8002 let np1 = ports(&[("api", 8001)]); - let env1 = build_env(1, "s1", "host", &np1, &[], &[], &[]); + let env1 = build_env(1, 1, "s1", "host", &np1, &[], &[]); assert_eq!(env1["ECLUSE_API_PORT"], "8001"); let np2 = ports(&[("api", 8002)]); - let env2 = build_env(2, "s2", "host", &np2, &[], &[], &[]); + let env2 = build_env(2, 1, "s2", "host", &np2, &[], &[]); assert_eq!(env2["ECLUSE_API_PORT"], "8002"); } @@ -316,10 +319,11 @@ mod tests { port_env: vec![], debug_port: Some(9229), extra_ports: vec![], + publish_primary: None, host_port: None, }; let np = ports(&[("app", 7101)]); - let env = build_env(1, "s", "host", &np, &[], &[&svc], &[]); + let env = build_env(1, 1, "s", "host", &np, &[], &[&svc]); assert_eq!(env["ECLUSE_APP_DEBUG_PORT"], "9230"); } @@ -335,10 +339,11 @@ mod tests { port_env: vec![], debug_port: Some(9229), extra_ports: vec![], + publish_primary: None, host_port: None, }; let np2 = ports(&[("app", 7102)]); - let env2 = build_env(2, "s2", "host", &np2, &[], &[&svc], &[]); + let env2 = build_env(2, 1, "s2", "host", &np2, &[], &[&svc]); assert_eq!(env2["ECLUSE_APP_DEBUG_PORT"], "9231"); } @@ -365,10 +370,11 @@ mod tests { container_port: None, }, ], + publish_primary: None, host_port: None, }; let np = ports(&[("api", 3001)]); - let env = build_env(1, "s", "host", &np, &[], &[&svc], &[]); + let env = build_env(1, 1, "s", "host", &np, &[], &[&svc]); assert_eq!(env["NODE_INSPECT_PORT"], "9230"); // 9229 + 1 assert_eq!(env["PGPORT"], "11534"); // 11533 + 1 } @@ -389,10 +395,11 @@ mod tests { port_env: "AUX_PORT".into(), container_port: None, }], + publish_primary: None, host_port: None, }; let np = ports(&[("api", 3001)]); - let env = build_env(1, "s", "host", &np, &[], &[&svc], &[]); + let env = build_env(1, 1, "s", "host", &np, &[], &[&svc]); assert_eq!(env["ECLUSE_API_DEBUG_PORT"], "9230"); assert_eq!(env["AUX_PORT"], "5556"); } @@ -409,10 +416,11 @@ mod tests { port_env: vec![], debug_port: None, extra_ports: vec![], + publish_primary: None, host_port: None, }; let np = ports(&[("api", 4445)]); - let env = build_env(1, "s", "host", &np, &[], &[&svc], &[]); + let env = build_env(1, 1, "s", "host", &np, &[], &[&svc]); assert!(!env.contains_key("ECLUSE_API_DEBUG_PORT")); } @@ -432,16 +440,17 @@ mod tests { port_env: "PGPORT".into(), container_port: None, }], + publish_primary: None, host_port: None, }; // slot 1: ECLUSE_POSTGRES_PORT = 5433 (primary), PGPORT = 11533 (extra) let env = build_env( + 1, 1, "s", "hybrid", &IndexMap::new(), &[("postgres".into(), 5433)], - &[], &[&svc], ); assert_eq!(env["ECLUSE_POSTGRES_PORT"], "5433"); @@ -460,15 +469,16 @@ mod tests { port_env: vec!["DOLT_PORT".into()], debug_port: None, extra_ports: vec![], + publish_primary: None, host_port: None, }; let env = build_env( + 1, 1, "s", "hybrid", &IndexMap::new(), &[("dolt".into(), 3307)], - &[], &[&svc], ); assert_eq!(env["ECLUSE_DOLT_PORT"], "3307"); @@ -493,17 +503,18 @@ mod tests { port_env: "PGPORT".into(), container_port: Some(5432), }], + publish_primary: None, host_port: None, }; // suppress_primary_publish=true: caller tracks extra port as primary // build_env receives extra port host value (11533) as the docker_ports entry let env = build_env( + 1, 1, "s", "hybrid", &IndexMap::new(), &[("postgres".into(), 11533)], - &[], &[&svc], ); assert_eq!(env["ECLUSE_POSTGRES_PORT"], "11533"); @@ -579,3 +590,34 @@ pub fn write_env_file(worktree: &Path, env: &HashMap) -> Result< std::fs::write(worktree.join(".env.ecluse"), content) .with_context(|| format!("failed to write .env.ecluse in {}", worktree.display())) } + +#[cfg(test)] +mod stride_tests { + use super::*; + use crate::config::{ExtraPort, ServiceConfig, ServiceRun}; + + #[test] + fn extra_ports_in_env_honor_slot_stride() { + let svc = ServiceConfig { + name: "api".into(), + base_port: 3000, + run: ServiceRun::Native, + compose: None, + command: None, + port_env: vec![], + debug_port: None, + extra_ports: vec![ExtraPort { + base_port: 9200, + port_env: "DEBUG_PORT".into(), + container_port: None, + }], + publish_primary: None, + host_port: None, + }; + let mut np = IndexMap::new(); + np.insert("api".to_string(), 3020u16); + // stride 10, slot 2 → extra port 9200 + 20, matching primary spacing + let env = build_env(2, 10, "s", "host", &np, &[], &[&svc]); + assert_eq!(env["DEBUG_PORT"], "9220"); + } +} diff --git a/src/main.rs b/src/main.rs index 737ec58..80fb8df 100644 --- a/src/main.rs +++ b/src/main.rs @@ -499,7 +499,7 @@ fn prompt_branch_name() -> Result { /// 4. cwd in main worktree / repo root → prompt for branch name. fn resolve_slug_and_branch( arg: &Option, - guard: &state::StateGuard, + state: &state::State, _root: &std::path::Path, ) -> Result<(String, String, bool, Option)> { if let Some(input) = arg { @@ -510,8 +510,7 @@ fn resolve_slug_and_branch( let cwd = std::env::current_dir().context("could not determine current directory")?; // 1. Inside an ecluse-registered worktree → reuse stored slug/branch. - if let Some(session) = guard - .state + if let Some(session) = state .sessions .iter() .find(|s| cwd.starts_with(std::path::Path::new(&s.worktree_path))) @@ -545,11 +544,7 @@ fn ensure_session_settled(session: &state::Session) -> Result<()> { Ok(()) } -fn resolve_slug_from_args( - arg: Option<&str>, - guard: &state::StateGuard, - hint: &str, -) -> Result { +fn resolve_slug_from_args(arg: Option<&str>, state: &state::State, hint: &str) -> Result { match arg { Some(s) => { validate_slug(s)?; @@ -559,8 +554,7 @@ fn resolve_slug_from_args( let cwd = std::env::current_dir().context("could not determine current directory")?; // Inside an active ecluse session — use it. - if let Some(session) = guard - .state + if let Some(session) = state .sessions .iter() .find(|s| cwd.starts_with(std::path::Path::new(&s.worktree_path))) @@ -679,18 +673,20 @@ fn cmd_up(args: cli::UpArgs) -> Result<()> { parse_service_filter(&args.services, &config)?; // Resolve slug + branch from a read-only snapshot. Resolution can prompt - // for a branch name; neither the prompt nor the provisioning below may - // run under the exclusive lock, or every other ecluse command in this - // repo blocks on us until it times out. - let (slug, branch, implicit_reuse, worktree_override) = { + // for a branch name; the prompt must not hold ANY lock (a shared lock + // still blocks exclusive acquirers), so clone the state and release the + // guard before resolving. + let snapshot = { let guard = state::StateGuard::acquire_shared(&root)?; - resolve_slug_and_branch(&args.slug, &guard, &root)? + guard.state.clone() }; + let (slug, branch, implicit_reuse, worktree_override) = + resolve_slug_and_branch(&args.slug, &snapshot, &root)?; validate_branch(&branch)?; // Short exclusive section: route to resume, or reserve the slot with a // pending session, then release the lock for the slow provisioning work. - let slot = { + let (slot, op_id) = { let mut guard = state::StateGuard::acquire(&root)?; if let Some(existing) = guard.state.find_session(&slug).cloned() { @@ -731,6 +727,7 @@ fn cmd_up(args: cli::UpArgs) -> Result<()> { let planned_worktree = worktree_override.clone().unwrap_or_else(|| { worktree::WorktreeManager::new(root.clone()).worktree_path(&config, &slug) }); + let op_id = state::new_op_id(); guard.state.add_session(state::Session { slug: slug.clone(), mode: config.mode.clone(), @@ -738,6 +735,10 @@ fn cmd_up(args: cli::UpArgs) -> Result<()> { branch: branch.clone(), worktree_path: planned_worktree.display().to_string(), status: state::SessionStatus::Pending, + pending_op: Some(state::PendingOp { + id: op_id.clone(), + since: chrono::Utc::now().to_rfc3339(), + }), compose_project: None, overlay_file: None, overlay_files: vec![], @@ -752,45 +753,68 @@ fn cmd_up(args: cli::UpArgs) -> Result<()> { services_subset: None, }); guard.commit()?; - slot + (slot, op_id) }; let handler = modes::get_handler(&config); + let no_skip = std::collections::HashSet::new(); + let no_existing = std::collections::HashMap::new(); let result = handler.bring_up( - &slug, - slot, - &branch, + &modes::BringUpRequest { + slug: &slug, + slot, + branch: &branch, + watch: args.watch, + reuse_worktree: args.reuse_worktree || implicit_reuse, + no_inherit_env: args.no_inherit_env, + worktree_override, + port_overrides: &port_overrides, + service_filter: service_filter.as_ref(), + skip_services: &no_skip, + existing_port_overrides: &no_existing, + }, &config, &root, - args.watch, - args.reuse_worktree || implicit_reuse, - args.no_inherit_env, - worktree_override, - &port_overrides, - service_filter.as_ref(), - &std::collections::HashSet::new(), - &std::collections::HashMap::new(), &log, ); // Re-acquire to finalize: replace the pending reservation with the real // session, or drop it when provisioning failed (bring_up rolled back). + // Only the operation that wrote the reservation may finalize it — if + // another command (down/flush) removed or took over the entry meanwhile, + // writing state here would resurrect a session whose resources are gone. let mut guard = state::StateGuard::acquire(&root)?; - guard.state.remove_session(&slug); - match result { - Ok(session) => { - if args.json { - print_up_json(&session, &root)?; - } else { - print_up_summary(&session, &config, &log); + if guard.state.still_owned(&slug, &op_id) { + guard.state.remove_session(&slug); + match result { + Ok(session) => { + if args.json { + print_up_json(&session, &root)?; + } else { + print_up_summary(&session, &config, &log); + } + guard.state.add_session(session); + guard.commit()?; + Ok(()) + } + Err(e) => { + guard.commit()?; + Err(e) } - guard.state.add_session(session); - guard.commit()?; - Ok(()) } - Err(e) => { - guard.commit()?; - Err(e) + } else { + drop(guard); + match result { + Ok(session) => { + log.warn(&format!( + "session '{slug}' was removed or taken over by another command while provisioning; tearing the new resources back down" + )); + let _ = handler.bring_down(&session, &config, &root, false, false, &log); + Err(anyhow::anyhow!( + "session '{slug}' was removed by another command while it was being provisioned; the resources it created were torn down — re-run `ecluse up {slug}`" + )) + } + Err(e) => Err(e), } } } @@ -864,10 +888,10 @@ fn cmd_up_resume( } // Mark pending and release the lock for the health checks + startup. - let mut marked = existing.clone(); - marked.status = state::SessionStatus::Pending; - guard.state.remove_session(&existing.slug); - guard.state.add_session(marked); + let op_id = match guard.state.mark_pending(&existing.slug) { + Some((_, op_id)) => op_id, + None => return Err(error::EcluseError::SessionNotFound(existing.slug.clone()).into()), + }; guard.commit()?; drop(guard); @@ -875,7 +899,28 @@ fn cmd_up_resume( // Re-acquire to finalize: replace with the refreshed session, or restore // the original (still-active) entry when nothing changed or on failure. + // Skip entirely when another command took the session over meanwhile — + // writing here would resurrect an entry that command deleted. let mut guard = state::StateGuard::acquire(&root)?; + if !guard.state.still_owned(&existing.slug, &op_id) { + drop(guard); + if let Ok(Some((updated, _, _))) = &outcome { + log.warn(&format!( + "session '{}' was removed or taken over by another command during resume; stopping the services this resume started", + existing.slug + )); + let handler = modes::get_handler(&config); + let _ = handler.bring_down(updated, &config, &root, true, true, &log); + } + return match outcome { + Err(e) => Err(e), + _ => Err(anyhow::anyhow!( + "session '{}' was removed by another command while it was being resumed; re-run `ecluse up {}`", + existing.slug, + existing.slug + )), + }; + } guard.state.remove_session(&existing.slug); match outcome { Ok(Some((updated, started, skipped))) => { @@ -925,7 +970,6 @@ fn resume_provision( explicit_skip: &std::collections::HashSet, log: &log::StepLogger, ) -> Result> { - let worktree = std::path::Path::new(&existing.worktree_path); let handler = modes::get_handler(config); let mut skip_services: std::collections::HashSet = explicit_skip.clone(); @@ -948,14 +992,11 @@ fn resume_provision( .filter(|s| s.run == config::ServiceRun::Docker) .collect(); - let discovered = if !native_svcs.is_empty() { - sync::find_processes_in_worktree(worktree) - } else { - vec![] - }; - let native_matches = sync::match_services(&native_svcs, &discovered); let docker_matches = if !docker_svcs.is_empty() { - sync::find_docker_services(&docker_svcs, &existing.slug) + sync::find_docker_services( + &docker_svcs, + &modes::compose_project_name(config, &existing.slug), + ) } else { vec![] }; @@ -965,11 +1006,11 @@ fn resume_provision( log.detail(&format!("{}: skipped (--skip)", svc.name)); continue; } - let alive = native_matches - .iter() - .find(|m| m.service_name == svc.name) - .map(|m| process::pid_alive(m.pid)) - .unwrap_or(false); + // Identity-based check: the session's own pid file (token-verified) + // or tmux window — never lsof discovery, whose depth-1 scan misses + // servers with a cwd in a subdirectory and then spawns duplicates. + let expected_port = existing.port_overrides.get(&svc.name).copied(); + let alive = sync::native_service_running(root, existing, &svc.name, expected_port); if alive { log.detail(&format!("{}: \u{2713} already running — skipped", svc.name)); skip_services.insert(svc.name.clone()); @@ -1005,23 +1046,25 @@ fn resume_provision( let service_filter = parse_service_filter(&args.services, config)?; let updated_session = handler.bring_up( - &existing.slug, - existing.slot, - &existing.branch, + &modes::BringUpRequest { + slug: &existing.slug, + slot: existing.slot, + branch: &existing.branch, + watch: args.watch, + reuse_worktree: true, // always reuse-worktree on resume + no_inherit_env: args.no_inherit_env, + // Honor the worktree path recorded in state.json. Without this, bring_up + // recomputes the default `//` location and breaks + // sessions whose worktree lives outside `.ecluse/worktrees/` — e.g. those + // auto-registered from a sibling git worktree directory. + worktree_override: Some(std::path::PathBuf::from(&existing.worktree_path)), + port_overrides: &port_overrides, + service_filter: service_filter.as_ref(), + skip_services: &skip_services, + existing_port_overrides: &existing.port_overrides, + }, config, root, - args.watch, - true, // always reuse-worktree on resume - args.no_inherit_env, - // Honor the worktree path recorded in state.json. Without this, bring_up - // recomputes the default `//` location and breaks - // sessions whose worktree lives outside `.ecluse/worktrees/` — e.g. those - // auto-registered from a sibling git worktree directory. - Some(std::path::PathBuf::from(&existing.worktree_path)), - &port_overrides, - service_filter.as_ref(), - &skip_services, - &existing.port_overrides, log, )?; @@ -1082,7 +1125,7 @@ fn force_kill_session_services( "killing process {} on port {} ({})", pid, port, svc_name )); - terminate_with_grace(pid); + process::kill_pid_with_grace(pid); } } } @@ -1122,23 +1165,6 @@ fn force_kill_session_services( } } -/// SIGTERM, escalating to SIGKILL after a short grace period if still alive. -fn terminate_with_grace(pid: u32) { - let _ = std::process::Command::new("kill") - .args(["-TERM", &pid.to_string()]) - .status(); - let deadline = std::time::Instant::now() + std::time::Duration::from_secs(2); - while process::pid_alive(pid) { - if std::time::Instant::now() >= deadline { - let _ = std::process::Command::new("kill") - .args(["-9", &pid.to_string()]) - .status(); - return; - } - std::thread::sleep(std::time::Duration::from_millis(50)); - } -} - fn print_up_summary(session: &state::Session, _config: &config::Config, log: &log::StepLogger) { println!(); log.success(&format!( @@ -1202,31 +1228,32 @@ fn cmd_down(args: cli::DownArgs) -> Result<()> { // prompt below must never run while holding the exclusive lock. let slug = { let guard = state::StateGuard::acquire_shared(&root)?; - resolve_slug_from_args(args.slug.as_deref(), &guard, "ecluse down ")? + resolve_slug_from_args(args.slug.as_deref(), &guard.state, "ecluse down ")? }; // Short exclusive section: re-verify the session and mark it pending so // the slug + slot stay reserved while teardown runs without the lock. + // Marking takes over the entry — including from a crashed or still-running + // up/down, whose finalize will then stand down via the ownership check. log.step(&format!("Loading session '{slug}'...")); - let session = { + let (session, op_id) = { let mut guard = state::StateGuard::acquire(&root)?; - let current = guard + let (current, op_id) = guard .state - .find_session(&slug) - .ok_or_else(|| error::EcluseError::SessionNotFound(slug.clone()))? - .clone(); + .mark_pending(&slug) + .ok_or_else(|| error::EcluseError::SessionNotFound(slug.clone()))?; if current.status == state::SessionStatus::Pending { log.warn(&format!( - "session '{slug}' has an operation in progress (started {}); tearing it down anyway", - current.started_at + "session '{slug}' has an operation in progress (started {}); taking it over and tearing it down", + current + .pending_op + .as_ref() + .map(|op| op.since.as_str()) + .unwrap_or(current.started_at.as_str()) )); } - let mut marked = current.clone(); - marked.status = state::SessionStatus::Pending; - guard.state.remove_session(&slug); - guard.state.add_session(marked); guard.commit()?; - current + (current, op_id) }; log.detail(&format!("slot {}, mode: {}", session.slot, session.mode)); @@ -1238,7 +1265,7 @@ fn cmd_down(args: cli::DownArgs) -> Result<()> { Ok(k) => k, Err(e) => { // Aborted at the prompt — restore the session before bailing out. - restore_session(&root, &session)?; + restore_session(&root, &session, &op_id)?; return Err(e); } }; @@ -1254,16 +1281,27 @@ fn cmd_down(args: cli::DownArgs) -> Result<()> { ); let mut guard = state::StateGuard::acquire(&root)?; - guard.state.remove_session(&slug); - if let Err(e) = result { - // Teardown failed — keep the session visible so it can be retried. - let mut restored = session; - restored.status = state::SessionStatus::Active; - guard.state.add_session(restored); + if guard.state.still_owned(&slug, &op_id) { + guard.state.remove_session(&slug); + if let Err(e) = result { + // Teardown failed — keep the session visible so it can be retried. + let mut restored = session; + restored.status = state::SessionStatus::Active; + restored.pending_op = None; + guard.state.add_session(restored); + guard.commit()?; + return Err(e); + } guard.commit()?; - return Err(e); + } else { + // Another command took the session over during teardown — leave the + // entry to its new owner, but still report our own outcome. + drop(guard); + log.warn(&format!( + "session '{slug}' was taken over by another command during teardown; leaving its state entry alone" + )); + result?; } - guard.commit()?; if args.keep_branch { eprintln!( @@ -1288,11 +1326,17 @@ fn cmd_down(args: cli::DownArgs) -> Result<()> { /// Put a session back into state with Active status (used when an operation /// that marked it Pending aborts or fails without changing anything durable). -fn restore_session(root: &std::path::Path, session: &state::Session) -> Result<()> { +/// No-op when `op_id` no longer owns the entry — another command took the +/// session over and restoring would clobber its work. +fn restore_session(root: &std::path::Path, session: &state::Session, op_id: &str) -> Result<()> { let mut guard = state::StateGuard::acquire(root)?; + if !guard.state.still_owned(&session.slug, op_id) { + return Ok(()); + } guard.state.remove_session(&session.slug); let mut restored = session.clone(); restored.status = state::SessionStatus::Active; + restored.pending_op = None; guard.state.add_session(restored); guard.commit() } @@ -1340,20 +1384,16 @@ fn cmd_shutdown(args: cli::ShutdownArgs) -> Result<()> { // Re-verify under the lock (another command may have removed it) and // mark pending for the unlocked teardown. - let current = { + let (current, op_id) = { let mut guard = state::StateGuard::acquire(&root)?; - match guard.state.find_session(&session.slug).cloned() { + match guard.state.mark_pending(&session.slug) { None => { log.detail("already removed — skipped"); continue; } - Some(current) => { - let mut marked = current.clone(); - marked.status = state::SessionStatus::Pending; - guard.state.remove_session(&session.slug); - guard.state.add_session(marked); + Some((current, op_id)) => { guard.commit()?; - current + (current, op_id) } } }; @@ -1361,13 +1401,20 @@ fn cmd_shutdown(args: cli::ShutdownArgs) -> Result<()> { match handler.bring_down(¤t, &config, &root, args.keep_volumes, keep_wt, &log) { Ok(()) => { let mut guard = state::StateGuard::acquire(&root)?; - guard.state.remove_session(¤t.slug); - guard.commit()?; + if guard.state.still_owned(¤t.slug, &op_id) { + guard.state.remove_session(¤t.slug); + guard.commit()?; + } else { + log.warn(&format!( + "'{}' was taken over by another command during teardown; leaving its state entry alone", + current.slug + )); + } } Err(e) => { log.warn(&format!("'{}' failed: {}", current.slug, e)); failed.push(current.slug.clone()); - restore_session(&root, ¤t)?; + restore_session(&root, ¤t, &op_id)?; } } } @@ -1502,6 +1549,22 @@ fn cmd_ls(args: cli::LsArgs) -> Result<()> { for w in process::check_processes_alive(&s.process_manager, &s.spawn_result(), &s.slug) { log.warn(&format!("[{}] {}", s.slug, w)); } + // A pending entry only lives for the duration of one up/down; one that + // sticks around means the owning command crashed and the slot leaks. + if let Some(op) = &s.pending_op { + if let Ok(since) = chrono::DateTime::parse_from_rfc3339(&op.since) { + let age = chrono::Utc::now().signed_duration_since(since); + if age > chrono::Duration::minutes(15) { + log.warn(&format!( + "session '{}' has been pending for {} minutes — if the owning ecluse command crashed, run `ecluse down {}` to clean it up and free slot {}", + s.slug, + age.num_minutes(), + s.slug, + s.slot + )); + } + } + } } Ok(()) @@ -1513,7 +1576,7 @@ fn cmd_shell(args: cli::ShellArgs) -> Result<()> { let (_, root) = config::Config::find_and_load()?; let guard = state::StateGuard::acquire_shared(&root)?; - let slug = resolve_slug_from_args(args.slug.as_deref(), &guard, "ecluse shell ")?; + let slug = resolve_slug_from_args(args.slug.as_deref(), &guard.state, "ecluse shell ")?; let session = guard .state @@ -1627,7 +1690,7 @@ fn cmd_env(args: cli::EnvArgs) -> Result<()> { let (_, root) = config::Config::find_and_load()?; let guard = state::StateGuard::acquire_shared(&root)?; - let slug = resolve_slug_from_args(args.slug.as_deref(), &guard, "ecluse env ")?; + let slug = resolve_slug_from_args(args.slug.as_deref(), &guard.state, "ecluse env ")?; let session = guard .state .find_session(&slug) @@ -1727,6 +1790,10 @@ fn cmd_sync(args: cli::SyncArgs) -> Result<()> { // Acquire state lock. let mut guard = state::StateGuard::acquire(&root)?; let existing = guard.state.find_session(&slug).cloned(); + if let Some(ref s) = existing { + // Never overwrite an entry another command is mid-operating on. + ensure_session_settled(s)?; + } let update_mode = existing.is_some(); // Allocate or reuse slot. @@ -1775,7 +1842,7 @@ fn cmd_sync(args: cli::SyncArgs) -> Result<()> { // Detect docker services. let docker_matches = if !docker_svcs.is_empty() { log.step("Detecting docker services..."); - sync::find_docker_services(&docker_svcs, &slug) + sync::find_docker_services(&docker_svcs, &modes::compose_project_name(&config, &slug)) } else { vec![] }; @@ -1828,16 +1895,19 @@ fn cmd_sync(args: cli::SyncArgs) -> Result<()> { native_ports.insert(m.service_name.clone(), port); } } - let docker_svcs = config.docker_services(); - let docker_svcs_ref: Vec<&config::ServiceConfig> = docker_svcs.to_vec(); + let all_svc_configs: Vec<&config::ServiceConfig> = native_svcs + .iter() + .chain(config.docker_services().iter()) + .copied() + .collect(); let env_map = env::build_env( slot, + config.slot_stride, &slug, &config.mode.to_string(), &native_ports, &docker_matches, - &native_svcs, - &docker_svcs_ref, + &all_svc_configs, ); env::write_env_file(&worktree_path, &env_map)?; @@ -1853,6 +1923,7 @@ fn cmd_sync(args: cli::SyncArgs) -> Result<()> { branch, worktree_path: worktree_path.display().to_string(), status: state::SessionStatus::Active, + pending_op: None, app_port, port_overrides, process_manager: Some(process::ProcessManager::Nohup), @@ -2105,7 +2176,7 @@ fn cmd_status(args: cli::StatusArgs) -> Result<()> { let (config, root) = config::Config::find_and_load()?; let guard = state::StateGuard::acquire_shared(&root)?; - let slug = resolve_slug_from_args(args.slug.as_deref(), &guard, "ecluse status ")?; + let slug = resolve_slug_from_args(args.slug.as_deref(), &guard.state, "ecluse status ")?; let session = guard .state .find_session(&slug) @@ -2113,8 +2184,6 @@ fn cmd_status(args: cli::StatusArgs) -> Result<()> { .clone(); ensure_session_settled(&session)?; - let worktree = std::path::Path::new(&session.worktree_path); - // Build per-service health status. let native_svcs: Vec<&config::ServiceConfig> = config .services @@ -2128,18 +2197,12 @@ fn cmd_status(args: cli::StatusArgs) -> Result<()> { .filter(|s| s.run == config::ServiceRun::Docker) .collect(); - // Discover processes once for all native services. - let discovered = if !native_svcs.is_empty() { - sync::find_processes_in_worktree(worktree) - } else { - vec![] - }; - - let native_matches = sync::match_services(&native_svcs, &discovered); - // Docker: find running containers. let docker_matches = if !docker_svcs.is_empty() { - sync::find_docker_services(&docker_svcs, &session.slug) + sync::find_docker_services( + &docker_svcs, + &modes::compose_project_name(&config, &session.slug), + ) } else { vec![] }; @@ -2163,59 +2226,17 @@ fn cmd_status(args: cli::StatusArgs) -> Result<()> { } }); - let matched = native_matches.iter().find(|m| m.service_name == svc.name); - let (healthy, pid, port) = match matched { - Some(m) => { - let alive = process::pid_alive(m.pid); - // Verify the matched process actually owns the expected port. - // If it doesn't, the service is listening on the wrong port - // (e.g. external task runner inherited the wrong env) — that's - // a health failure, not "discovered new port". - let on_expected_port = match expected_port { - Some(p) => sync::subtree_owns_port(m.pid, p), - None => true, // no expected port → just check liveness - }; - (alive && on_expected_port, Some(m.pid), expected_port) - } - None => { - let pid_file = root - .join(".ecluse") - .join("pids") - .join(&session.slug) - .join(format!("{}.pid", svc.name)); - if pid_file.exists() { - // Nohup-managed: verify the stored PID is alive AND on the right port. - if let Ok(content) = std::fs::read_to_string(&pid_file) { - if let Ok(pid) = content.trim().parse::() { - let alive = process::pid_alive(pid); - let on_expected_port = match expected_port { - Some(p) => sync::subtree_owns_port(pid, p), - None => true, - }; - (alive && on_expected_port, Some(pid), expected_port) - } else { - (false, None, expected_port) - } - } else { - (false, None, expected_port) - } - } else if matches!(session.process_manager, Some(process::ProcessManager::Tmux)) { - // Tmux-managed: verify the pane's process subtree owns the port. - if let Some(ref tmux_session) = session.tmux_session { - let healthy = if let Some(port) = expected_port { - sync::tmux_window_owns_port(tmux_session, &svc.name, port) - } else { - sync::tmux_window_exists(tmux_session, &svc.name) - }; - (healthy, None, expected_port) - } else { - (false, None, expected_port) - } - } else { - (false, None, expected_port) - } - } - }; + // Identity first: the session's own pid file (token-verified) or tmux + // window decides health — never an lsof scan that can misattribute a + // neighbor's process. + let pid_file = root + .join(".ecluse") + .join("pids") + .join(&session.slug) + .join(format!("{}.pid", svc.name)); + let recorded_pid = process::read_pid_file(&pid_file).map(|(pid, _)| pid); + let healthy = sync::native_service_running(&root, &session, &svc.name, expected_port); + let (healthy, pid, port) = (healthy, recorded_pid, expected_port); let tmux_window = if matches!(session.process_manager, Some(process::ProcessManager::Tmux)) { Some(svc.name.clone()) diff --git a/src/modes/container.rs b/src/modes/container.rs index 064eb90..79162db 100644 --- a/src/modes/container.rs +++ b/src/modes/container.rs @@ -10,39 +10,100 @@ use crate::hooks; use crate::log::StepLogger; use crate::rollback::Rollback; use crate::state::Session; -use crate::validate; use crate::worktree::WorktreeManager; -use super::{group_by_compose, overlay_name_for_compose, tear_down_all_overlays}; +use super::{tear_down_all_overlays, BringUpRequest, DockerStartup}; pub struct ContainerMode; +/// Bring up the whole root compose file (no `[[services]]` declared): every +/// service in it is started under the session's project with offset ports. +fn start_whole_compose_file( + req: &BringUpRequest, + config: &Config, + root: &Path, + rollback: &mut Rollback, + log: &StepLogger, +) -> Result { + let project = super::compose_project_name(config, req.slug); + let overlay_dir = root.join(".ecluse").join("overlays"); + std::fs::create_dir_all(&overlay_dir).context("failed to create overlays directory")?; + let rollback_volumes = !req.reuse_worktree; + + let compose_path = compose::find_compose_file(root).ok_or_else(|| { + crate::error::EcluseError::ComposeFileNotFound(root.display().to_string()) + })?; + let compose_data = compose::parse(&compose_path)?; + + let all_svc_names: Vec = compose_data.services.keys().cloned().collect(); + log.step(&format!( + "Starting docker services: {}...", + all_svc_names.join(", ") + )); + + let overlay_path = overlay_dir.join(format!("{}.yml", req.slug)); + let yaml = compose::generate_overlay( + &compose_data, + req.slot as u16, + &project, + None, + &config.prefix, + req.slot, + )?; + std::fs::write(&overlay_path, &yaml).context("failed to write overlay file")?; + { + let overlay = overlay_path.clone(); + rollback.push(move || { + let _ = std::fs::remove_file(&overlay); + }); + } + + let compose_str = compose_path.to_string_lossy().to_string(); + let overlay_str = overlay_path.to_string_lossy().to_string(); + + docker::compose_up( + &project, + &compose_str, + Some(&overlay_str), + req.watch, + &std::collections::HashMap::new(), + )?; + { + let (p, c, o) = (project.clone(), compose_str.clone(), overlay_str.clone()); + rollback.push(move || { + let _ = docker::compose_down(&p, &c, Some(&o), rollback_volumes); + }); + } + + let allocated_ports = compose_data + .services + .iter() + .filter_map(|(name, svc)| { + compose::service_host_port(svc, req.slot as u16).map(|p| { + log.detail(&format!("{name}: {p}")); + (name.clone(), p) + }) + }) + .collect(); + + Ok(DockerStartup { + allocated_ports, + compose_overlays: vec![crate::state::ComposeOverlay { + compose: compose_str, + overlay: overlay_str.clone(), + }], + written_overlays: vec![overlay_str], + }) +} + impl super::ModeHandler for ContainerMode { fn bring_up( &self, - slug: &str, - slot: u8, - branch: &str, + req: &BringUpRequest, config: &Config, root: &Path, - watch: bool, - reuse_worktree: bool, - no_inherit_env: bool, - worktree_override: Option, - port_overrides: &std::collections::HashMap, - service_filter: Option<&std::collections::HashSet>, - skip_services: &std::collections::HashSet, - existing_port_overrides: &std::collections::HashMap, log: &StepLogger, ) -> Result { - let wt = WorktreeManager::new(root.to_owned()); - let worktree_path = worktree_override.unwrap_or_else(|| wt.worktree_path(config, slug)); - - let suffix = format!("{}_{}", config.prefix, slug); - let project = format!("{}_{}", config.prefix, slug); - let overlay_dir = root.join(".ecluse").join("overlays"); - std::fs::create_dir_all(&overlay_dir).context("failed to create overlays directory")?; - // pre_up: before anything exists — runs from repo root, no env vars yet if let Some(cmd) = &config.hooks.pre_up { log.step("Running pre_up hook..."); @@ -53,252 +114,30 @@ impl super::ModeHandler for ContainerMode { // Every step below registers its undo; any early return tears down // exactly what was created so far, in reverse order. let mut rollback = Rollback::new(); - // Only delete volumes the rollback created: on resume the session's - // existing data volumes must survive a failed re-up. - let rollback_volumes = !reuse_worktree; - - let docker_svcs_config: Vec<_> = config - .docker_services() - .into_iter() - .filter(|s| service_filter.is_none_or(|f| f.contains(&s.name))) - .collect(); - - let mut allocated_ports: Vec<(String, u16)> = vec![]; - let mut written_overlays: Vec = vec![]; - let mut compose_overlays: Vec = vec![]; - - // Copy ports for skipped docker services from existing session. - for svc in &docker_svcs_config { - if skip_services.contains(&svc.name) { - if let Some(&p) = existing_port_overrides.get(&svc.name) { - log.detail(&format!("{}: already running — skipped", svc.name)); - allocated_ports.push((svc.name.clone(), p)); - } - } - } - - let docker_svcs_to_start: Vec<_> = docker_svcs_config - .iter() - .filter(|s| !skip_services.contains(&s.name)) - .copied() - .collect(); - if !docker_svcs_config.is_empty() { - if !docker_svcs_to_start.is_empty() { - let groups = group_by_compose(root, &docker_svcs_to_start)?; - - for (compose_path, svcs) in &groups { - let svc_names: Vec = svcs.iter().map(|s| s.name.clone()).collect(); - log.step(&format!( - "Starting docker services: {}...", - svc_names.join(", ") - )); - - let compose_data = compose::parse(compose_path)?; - - // Build port_map: services that publish their primary base_port to the host. - // Services with suppress_primary_publish are excluded — their only host-side - // publish is via extra_port_map. - let mut port_map: std::collections::HashMap = - std::collections::HashMap::new(); - for s in svcs { - if s.suppress_primary_publish() { - if let Some(ep) = s.extra_ports.first() { - let hp = ep.base_port.saturating_add(slot as u16); - allocated_ports.push((s.name.clone(), hp)); - log.detail(&format!("{}: {hp} (via extra_ports)", s.name)); - } - } else { - let host_port = if let Some(&p) = port_overrides.get(&s.name) { - p - } else { - validate::find_free_port(config, s, slot)? - }; - allocated_ports.push((s.name.clone(), host_port)); - log.detail(&format!("{}: {host_port}", s.name)); - port_map.insert(s.name.clone(), (host_port, s.base_port)); - } - } - - let overlay_name = overlay_name_for_compose(slug, compose_path, root); - let overlay_path = overlay_dir.join(&overlay_name); - - // Build extra_port_map using container_port from ExtraPort when set. - let extra_port_map: std::collections::HashMap> = svcs - .iter() - .filter_map(|s| { - let extras: Vec<(u16, u16)> = s - .extra_port_mappings() - .into_iter() - .map(|(host_base, cport)| { - (host_base.saturating_add(slot as u16), cport) - }) - .collect(); - if extras.is_empty() { - None - } else { - Some((s.name.clone(), extras)) - } - }) - .collect(); - - // Build env map for compose interpolation: ECLUSE__PORT + extra_ports vars - let mut compose_env: std::collections::HashMap = port_map - .iter() - .map(|(n, (hp, _))| { - (format!("ECLUSE_{}_PORT", n.to_uppercase()), hp.to_string()) - }) - .collect(); - for svc in svcs { - for ep in &svc.extra_ports { - let host_port = ep.base_port.saturating_add(slot as u16); - compose_env.insert(ep.port_env.clone(), host_port.to_string()); - } - } - - let yaml = compose::generate_overlay_with_ports( - &compose_data, - &port_map, - &extra_port_map, - &suffix, - None, - &config.prefix, - slot, - )?; - std::fs::write(&overlay_path, &yaml).context("failed to write overlay file")?; - { - let overlay = overlay_path.clone(); - rollback.push(move || { - let _ = std::fs::remove_file(&overlay); - }); - } - - let compose_str = compose_path.to_string_lossy().to_string(); - let overlay_str = overlay_path.to_string_lossy().to_string(); - - docker::compose_up( - &project, - &compose_str, - Some(&overlay_str), - watch, - &compose_env, - )?; - { - let (p, c, o) = (project.clone(), compose_str.clone(), overlay_str.clone()); - rollback.push(move || { - let _ = docker::compose_down(&p, &c, Some(&o), rollback_volumes); - }); - } - - compose_overlays.push(crate::state::ComposeOverlay { - compose: compose_str, - overlay: overlay_str.clone(), - }); - written_overlays.push(overlay_str); - } - } // end if !docker_svcs_to_start.is_empty() + let docker_svcs_config = super::filtered_docker_services(config, req); + let docker = if docker_svcs_config.is_empty() { + start_whole_compose_file(req, config, root, &mut rollback, log)? } else { - let compose_path = compose::find_compose_file(root).ok_or_else(|| { - crate::error::EcluseError::ComposeFileNotFound(root.display().to_string()) - })?; - let compose_data = compose::parse(&compose_path)?; - - let all_svc_names: Vec = compose_data.services.keys().cloned().collect(); - log.step(&format!( - "Starting docker services: {}...", - all_svc_names.join(", ") - )); - - let overlay_path = overlay_dir.join(format!("{}.yml", slug)); - let yaml = compose::generate_overlay( - &compose_data, - slot as u16, - &suffix, - None, - &config.prefix, - slot, - )?; - std::fs::write(&overlay_path, &yaml).context("failed to write overlay file")?; - { - let overlay = overlay_path.clone(); - rollback.push(move || { - let _ = std::fs::remove_file(&overlay); - }); - } - - let compose_str = compose_path.to_string_lossy().to_string(); - let overlay_str = overlay_path.to_string_lossy().to_string(); - - docker::compose_up( - &project, - &compose_str, - Some(&overlay_str), - watch, - &std::collections::HashMap::new(), - )?; - { - let (p, c, o) = (project.clone(), compose_str.clone(), overlay_str.clone()); - rollback.push(move || { - let _ = docker::compose_down(&p, &c, Some(&o), rollback_volumes); - }); - } - - compose_overlays.push(crate::state::ComposeOverlay { - compose: compose_str, - overlay: overlay_str.clone(), - }); - - allocated_ports = compose_data - .services - .iter() - .filter_map(|(name, svc)| { - compose::service_host_port(svc, slot as u16).map(|p| { - log.detail(&format!("{name}: {p}")); - (name.clone(), p) - }) - }) - .collect(); - - written_overlays.push(overlay_str); - } + super::start_docker_services(req, config, root, false, &mut rollback, log)? + }; - if reuse_worktree { - if !worktree_path.exists() { - return Err(anyhow::anyhow!( - "worktree not found at {}; remove --reuse-worktree or run ecluse up without it", - worktree_path.display() - )); - } - log.step("Reusing existing worktree..."); - log.detail(&worktree_path.display().to_string()); - } else { - log.step(&format!("Creating worktree (branch: {branch})...")); - log.detail(&worktree_path.display().to_string()); - wt.create(&worktree_path, branch)?; - { - let root_owned = root.to_owned(); - let wt_path = worktree_path.clone(); - rollback.push(move || { - let _ = WorktreeManager::new(root_owned).remove(&wt_path); - }); - } - } + let worktree_path = super::ensure_worktree(req, config, root, &mut rollback, log)?; - if !no_inherit_env && !config.inherit_env.is_empty() { + if !req.no_inherit_env && !config.inherit_env.is_empty() { log.step("Inheriting env files..."); crate::worktree::inherit_env_files(root, &worktree_path, &config.inherit_env, log)?; } log.step("Writing .env.ecluse..."); - let docker_svcs_ref: Vec<&crate::config::ServiceConfig> = docker_svcs_config.to_vec(); let env_map = env::build_env( - slot, - slug, + req.slot, + config.slot_stride, + req.slug, "container", &indexmap::IndexMap::new(), - &allocated_ports, - &[], - &docker_svcs_ref, + &docker.allocated_ports, + &docker_svcs_config, ); env::write_env_file(&worktree_path, &env_map)?; @@ -318,24 +157,25 @@ impl super::ModeHandler for ContainerMode { rollback.disarm(); - let app_port = allocated_ports.first().map(|(_, p)| *p); + let app_port = docker.allocated_ports.first().map(|(_, p)| *p); let stored_port_overrides: std::collections::HashMap = - allocated_ports.iter().cloned().collect(); + docker.allocated_ports.iter().cloned().collect(); - let primary_overlay = written_overlays.first().cloned(); - let extra_overlays: Vec = written_overlays.into_iter().skip(1).collect(); + let primary_overlay = docker.written_overlays.first().cloned(); + let extra_overlays: Vec = docker.written_overlays.into_iter().skip(1).collect(); Ok(Session { - slug: slug.to_string(), + slug: req.slug.to_string(), mode: crate::config::Mode::Container, - slot, - branch: branch.to_string(), + slot: req.slot, + branch: req.branch.to_string(), worktree_path: worktree_path.display().to_string(), status: crate::state::SessionStatus::Active, - compose_project: Some(project), + pending_op: None, + compose_project: Some(super::compose_project_name(config, req.slug)), overlay_file: primary_overlay, overlay_files: extra_overlays, - compose_overlays, + compose_overlays: docker.compose_overlays, app_port, started_at: Utc::now().to_rfc3339(), port_overrides: stored_port_overrides, @@ -343,7 +183,7 @@ impl super::ModeHandler for ContainerMode { tmux_session: None, pid_files: vec![], log_dir: None, - services_subset: service_filter.map(|f| { + services_subset: req.service_filter.map(|f| { let mut v: Vec = f.iter().cloned().collect(); v.sort(); v @@ -370,11 +210,11 @@ impl super::ModeHandler for ContainerMode { let docker_svcs_ref: Vec<&crate::config::ServiceConfig> = docker_svcs.to_vec(); let env_map = env::build_env( session.slot, + config.slot_stride, &session.slug, "container", &indexmap::IndexMap::new(), &allocated_ports, - &[], &docker_svcs_ref, ); @@ -478,6 +318,7 @@ mod tests { branch: "feat-worker".into(), worktree_path: dir.path().join("wt").display().to_string(), status: crate::state::SessionStatus::Active, + pending_op: None, compose_project: Some("ecluse_feat-worker".into()), overlay_file: Some(root_overlay.display().to_string()), overlay_files: vec![], diff --git a/src/modes/host.rs b/src/modes/host.rs index 6ae6ec9..63b3fb6 100644 --- a/src/modes/host.rs +++ b/src/modes/host.rs @@ -1,6 +1,5 @@ use anyhow::Result; use chrono::Utc; -use indexmap::IndexMap; use std::path::Path; use crate::config::Config; @@ -10,35 +9,24 @@ use crate::log::StepLogger; use crate::process; use crate::rollback::Rollback; use crate::state::Session; -use crate::validate; use crate::worktree::WorktreeManager; +use super::BringUpRequest; + pub struct HostMode; impl super::ModeHandler for HostMode { fn bring_up( &self, - slug: &str, - slot: u8, - branch: &str, + req: &BringUpRequest, config: &Config, root: &Path, - _watch: bool, - reuse_worktree: bool, - no_inherit_env: bool, - worktree_override: Option, - port_overrides: &std::collections::HashMap, - service_filter: Option<&std::collections::HashSet>, - skip_services: &std::collections::HashSet, - existing_port_overrides: &std::collections::HashMap, log: &StepLogger, ) -> Result { - let wt = WorktreeManager::new(root.to_owned()); - let worktree_path = worktree_override.unwrap_or_else(|| wt.worktree_path(config, slug)); let native_svcs: Vec<_> = config .native_services() .into_iter() - .filter(|s| service_filter.is_none_or(|f| f.contains(&s.name))) + .filter(|s| req.service_filter.is_none_or(|f| f.contains(&s.name))) .collect(); // pre_up: before anything exists — runs from repo root, no env vars yet @@ -48,51 +36,42 @@ impl super::ModeHandler for HostMode { hooks::run(cmd, root, &std::collections::HashMap::new())?; } + // Every step below registers its undo; any early return tears down + // exactly what was created so far, in reverse order. + let mut rollback = Rollback::new(); + log.step("Allocating ports..."); - let native_ports = native_ports_for_slot( + // Historical quirk kept on purpose: host mode allocates ports for all + // native services even when --services filters which ones spawn. + let native_ports = super::native_ports_for_slot( config, - slot, - port_overrides, - skip_services, - existing_port_overrides, + req.slot, + req.port_overrides, + req.skip_services, + req.existing_port_overrides, + None, )?; for (name, port) in &native_ports { log.detail(&format!("{name}: {port}")); } - // Every step below registers its undo; any early return tears down - // exactly what was created so far, in reverse order. - let mut rollback = Rollback::new(); - - if reuse_worktree { - if !worktree_path.exists() { - return Err(anyhow::anyhow!( - "worktree not found at {}; remove --reuse-worktree or run ecluse up without it", - worktree_path.display() - )); - } - log.step("Reusing existing worktree..."); - log.detail(&worktree_path.display().to_string()); - } else { - log.step(&format!("Creating worktree (branch: {branch})...")); - log.detail(&worktree_path.display().to_string()); - wt.create(&worktree_path, branch)?; - { - let root_owned = root.to_owned(); - let wt_path = worktree_path.clone(); - rollback.push(move || { - let _ = WorktreeManager::new(root_owned).remove(&wt_path); - }); - } - } + let worktree_path = super::ensure_worktree(req, config, root, &mut rollback, log)?; - if !no_inherit_env && !config.inherit_env.is_empty() { + if !req.no_inherit_env && !config.inherit_env.is_empty() { log.step("Inheriting env files..."); crate::worktree::inherit_env_files(root, &worktree_path, &config.inherit_env, log)?; } log.step("Writing .env.ecluse..."); - let env_map = env::build_env(slot, slug, "host", &native_ports, &[], &native_svcs, &[]); + let env_map = env::build_env( + req.slot, + config.slot_stride, + req.slug, + "host", + &native_ports, + &[], + &native_svcs, + ); env::write_env_file(&worktree_path, &env_map)?; // pre_spawn: env is written, services not yet started — use for derived env (URLs etc.) @@ -102,38 +81,17 @@ impl super::ModeHandler for HostMode { hooks::run(cmd, &worktree_path, &env_map)?; } - let global = process::load_global_config()?; - - let svcs_to_spawn: Vec<_> = native_svcs - .iter() - .filter(|s| !skip_services.contains(&s.name)) - .copied() - .collect(); + super::check_extra_ports(config, &native_svcs, req.skip_services, req.slot, log)?; - if svcs_to_spawn.iter().any(|s| s.command.is_some()) { - log.step(&format!( - "Spawning native services ({})...", - global.process_manager - )); - for svc in &svcs_to_spawn { - if let Some(cmd) = &svc.command { - let port = native_ports.get(&svc.name).copied().unwrap_or(0); - log.detail(&format!("{} on port {} — {}", svc.name, port, cmd)); - } - } - } - let spawn = process::spawn_services( - &global.process_manager, - slug, - &svcs_to_spawn, + let (spawn, used_pm) = super::spawn_native_services( + req, + &native_svcs, + &native_ports, &worktree_path, &env_map, + &mut rollback, + log, )?; - if spawn.tmux_session.is_some() || !spawn.pid_files.is_empty() { - let manager = global.process_manager.clone(); - let spawned = spawn.clone(); - rollback.push(move || process::kill_services(&manager, &spawned)); - } // post_up: all services spawned, full env available if let Some(cmd) = &config.hooks.post_up { @@ -145,7 +103,7 @@ impl super::ModeHandler for HostMode { rollback.disarm(); let pm = if spawn.tmux_session.is_some() || !spawn.pid_files.is_empty() { - Some(global.process_manager) + Some(used_pm) } else { None }; @@ -155,12 +113,13 @@ impl super::ModeHandler for HostMode { native_ports.iter().map(|(k, v)| (k.clone(), *v)).collect(); Ok(Session { - slug: slug.to_string(), + slug: req.slug.to_string(), mode: crate::config::Mode::Host, - slot, - branch: branch.to_string(), + slot: req.slot, + branch: req.branch.to_string(), worktree_path: worktree_path.display().to_string(), status: crate::state::SessionStatus::Active, + pending_op: None, compose_project: None, overlay_file: None, overlay_files: vec![], @@ -172,7 +131,7 @@ impl super::ModeHandler for HostMode { tmux_session: spawn.tmux_session, pid_files: spawn.pid_files, log_dir: spawn.log_dir, - services_subset: service_filter.map(|f| { + services_subset: req.service_filter.map(|f| { let mut v: Vec = f.iter().cloned().collect(); v.sort(); v @@ -189,22 +148,23 @@ impl super::ModeHandler for HostMode { keep_worktree: bool, log: &StepLogger, ) -> Result<()> { - let native_ports = native_ports_for_slot( + let native_ports = super::native_ports_for_slot( config, session.slot, &session.port_overrides, &std::collections::HashSet::new(), &std::collections::HashMap::new(), + None, )?; let native_svcs = config.native_services(); let env_map = env::build_env( session.slot, + config.slot_stride, &session.slug, "host", &native_ports, &[], &native_svcs, - &[], ); // pre_down: before services are killed — app can drain/flush. @@ -246,59 +206,6 @@ impl super::ModeHandler for HostMode { } } -/// Build the native port map for a slot, falling back to "app" on 3000+slot -/// when no [[services]] are defined. Skipped services copy their port from -/// `existing` instead of calling find_free_port. -fn native_ports_for_slot( - config: &Config, - slot: u8, - overrides: &std::collections::HashMap, - skip: &std::collections::HashSet, - existing: &std::collections::HashMap, -) -> Result> { - let native = config.native_services(); - if native.is_empty() { - let port = if let Some(&p) = overrides.get("app").or_else(|| existing.get("app")) { - p - } else { - let fallback = crate::config::ServiceConfig { - name: "app".into(), - base_port: 3000, - run: crate::config::ServiceRun::Native, - compose: None, - command: None, - port_env: vec![], - debug_port: None, - extra_ports: vec![], - host_port: None, - }; - validate::find_free_port(config, &fallback, slot)? - }; - let mut m = IndexMap::new(); - m.insert("app".to_string(), port); - Ok(m) - } else { - native - .iter() - .map(|s| { - let port = if let Some(&p) = overrides.get(&s.name) { - p - } else if skip.contains(&s.name) { - existing.get(&s.name).copied().ok_or_else(|| { - anyhow::anyhow!( - "service '{}' is skipped but has no recorded port; run ecluse up without --skip or provide --port {}=", - s.name, s.name - ) - })? - } else { - validate::find_free_port(config, s, slot)? - }; - Ok((s.name.clone(), port)) - }) - .collect() - } -} - #[cfg(test)] mod tests { use super::*; @@ -343,22 +250,23 @@ mod tests { fn bring_up(config: &Config, root: &Path, slug: &str, reuse: bool) -> Result { let log = crate::log::StepLogger::new(true); - HostMode.bring_up( + let port_overrides = std::collections::HashMap::new(); + let skip = std::collections::HashSet::new(); + let existing = std::collections::HashMap::new(); + let req = BringUpRequest { slug, - 1, - slug, - config, - root, - false, - reuse, - true, - None, - &std::collections::HashMap::new(), - None, - &std::collections::HashSet::new(), - &std::collections::HashMap::new(), - &log, - ) + slot: 1, + branch: slug, + watch: false, + reuse_worktree: reuse, + no_inherit_env: true, + worktree_override: None, + port_overrides: &port_overrides, + service_filter: None, + skip_services: &skip, + existing_port_overrides: &existing, + }; + HostMode.bring_up(&req, config, root, &log) } #[test] diff --git a/src/modes/hybrid.rs b/src/modes/hybrid.rs index 6c62c75..42f2cc0 100644 --- a/src/modes/hybrid.rs +++ b/src/modes/hybrid.rs @@ -12,39 +12,111 @@ use crate::log::StepLogger; use crate::process; use crate::rollback::Rollback; use crate::state::Session; -use crate::validate; use crate::worktree::WorktreeManager; -use super::{group_by_compose, overlay_name_for_compose, tear_down_all_overlays}; +use super::{tear_down_all_overlays, BringUpRequest, DockerStartup}; pub struct HybridMode; +/// Fallback when no docker `[[services]]` are declared: start every service in +/// the root compose file that is NOT labeled as the app (label-based split). +fn start_labeled_data_services( + req: &BringUpRequest, + config: &Config, + root: &Path, + rollback: &mut Rollback, + log: &StepLogger, +) -> Result { + let project = super::compose_project_name(config, req.slug); + let overlay_dir = root.join(".ecluse").join("overlays"); + std::fs::create_dir_all(&overlay_dir).context("failed to create overlays directory")?; + let rollback_volumes = !req.reuse_worktree; + + let compose_path = compose::find_compose_file(root).ok_or_else(|| { + crate::error::EcluseError::ComposeFileNotFound(root.display().to_string()) + })?; + let compose_data = compose::parse(&compose_path)?; + + let app_svcs = compose::app_services(&compose_data, &config.app_label, &config.app_label_value); + let data_svcs = + compose::data_services(&compose_data, &config.app_label, &config.app_label_value); + if app_svcs.is_empty() { + tracing::warn!( + "No service labeled {}={} found; treating all services as data.", + config.app_label, + config.app_label_value + ); + } + + log.step(&format!( + "Starting docker services: {}...", + data_svcs.join(", ") + )); + + let overlay_path = overlay_dir.join(format!("{}.yml", req.slug)); + let yaml = compose::generate_overlay( + &compose_data, + req.slot as u16, + &project, + Some(&data_svcs), + &config.prefix, + req.slot, + )?; + std::fs::write(&overlay_path, &yaml).context("failed to write overlay file")?; + { + let overlay = overlay_path.clone(); + rollback.push(move || { + let _ = std::fs::remove_file(&overlay); + }); + } + + let compose_str = compose_path.to_string_lossy().to_string(); + let overlay_str = overlay_path.to_string_lossy().to_string(); + let data_refs: Vec<&str> = data_svcs.iter().map(|s| s.as_str()).collect(); + + docker::compose_up_services( + &project, + &compose_str, + Some(&overlay_str), + &data_refs, + req.watch, + &std::collections::HashMap::new(), + )?; + { + let (p, c, o) = (project.clone(), compose_str.clone(), overlay_str.clone()); + rollback.push(move || { + let _ = docker::compose_down(&p, &c, Some(&o), rollback_volumes); + }); + } + + let mut allocated_ports = vec![]; + for (name, svc) in &compose_data.services { + if data_svcs.contains(name) { + if let Some(p) = compose::service_host_port(svc, req.slot as u16) { + log.detail(&format!("{name}: {p}")); + allocated_ports.push((name.clone(), p)); + } + } + } + + Ok(DockerStartup { + allocated_ports, + compose_overlays: vec![crate::state::ComposeOverlay { + compose: compose_str, + overlay: overlay_str.clone(), + }], + written_overlays: vec![overlay_str], + }) +} + impl super::ModeHandler for HybridMode { fn bring_up( &self, - slug: &str, - slot: u8, - branch: &str, + req: &BringUpRequest, config: &Config, root: &Path, - watch: bool, - reuse_worktree: bool, - no_inherit_env: bool, - worktree_override: Option, - port_overrides: &std::collections::HashMap, - service_filter: Option<&std::collections::HashSet>, - skip_services: &std::collections::HashSet, - existing_port_overrides: &std::collections::HashMap, log: &StepLogger, ) -> Result { - let wt = WorktreeManager::new(root.to_owned()); - let worktree_path = worktree_override.unwrap_or_else(|| wt.worktree_path(config, slug)); - - let suffix = format!("{}_{}", config.prefix, slug); - let project = format!("{}_{}", config.prefix, slug); - let overlay_dir = root.join(".ecluse").join("overlays"); - std::fs::create_dir_all(&overlay_dir).context("failed to create overlays directory")?; - // pre_up: before anything exists — runs from repo root, no env vars yet if let Some(cmd) = &config.hooks.pre_up { log.step("Running pre_up hook..."); @@ -55,320 +127,53 @@ impl super::ModeHandler for HybridMode { // Every step below registers its undo; any early return tears down // exactly what was created so far, in reverse order. let mut rollback = Rollback::new(); - // Only delete volumes the rollback created: on resume the session's - // existing data volumes must survive a failed re-up. - let rollback_volumes = !reuse_worktree; - - let docker_svcs_config: Vec<_> = config - .docker_services() - .into_iter() - .filter(|s| service_filter.is_none_or(|f| f.contains(&s.name))) - .collect(); - - let mut allocated_docker_ports: Vec<(String, u16)> = vec![]; - let mut written_overlays: Vec = vec![]; - let mut compose_overlays: Vec = vec![]; - // Copy ports for skipped docker services from existing session. - for svc in &docker_svcs_config { - if skip_services.contains(&svc.name) { - if let Some(&p) = existing_port_overrides.get(&svc.name) { - log.detail(&format!("{}: already running — skipped", svc.name)); - allocated_docker_ports.push((svc.name.clone(), p)); - } - } - } - - let docker_svcs_to_start: Vec<_> = docker_svcs_config - .iter() - .filter(|s| !skip_services.contains(&s.name)) - .copied() - .collect(); - - if !docker_svcs_config.is_empty() { - if !docker_svcs_to_start.is_empty() { - let groups = group_by_compose(root, &docker_svcs_to_start)?; - - for (compose_path, svcs) in &groups { - let svc_names: Vec = svcs.iter().map(|s| s.name.clone()).collect(); - log.step(&format!( - "Starting docker services: {}...", - svc_names.join(", ") - )); - - let compose_data = compose::parse(compose_path)?; - - // Build port_map: services that publish their primary base_port to the host. - // Services with suppress_primary_publish (any extra_port has container_port set) - // are excluded — their only host-side publish is via extra_port_map. - let mut port_map: std::collections::HashMap = - std::collections::HashMap::new(); - for s in svcs { - if s.suppress_primary_publish() { - // Track the first extra_port host port as the "primary" for state/ls. - if let Some(ep) = s.extra_ports.first() { - let hp = ep.base_port.saturating_add(slot as u16); - allocated_docker_ports.push((s.name.clone(), hp)); - log.detail(&format!("{}: {hp} (via extra_ports)", s.name)); - } - } else { - let host_port = if let Some(&p) = port_overrides.get(&s.name) { - p - } else { - validate::find_free_port(config, s, slot)? - }; - allocated_docker_ports.push((s.name.clone(), host_port)); - log.detail(&format!("{}: {host_port}", s.name)); - port_map.insert(s.name.clone(), (host_port, s.base_port)); - } - } - - let overlay_name = overlay_name_for_compose(slug, compose_path, root); - let overlay_path = overlay_dir.join(&overlay_name); - - // Build extra_port_map: service_name → [(host_port, container_port)] - // Uses container_port from ExtraPort when set (e.g. 11532→5432), - // otherwise falls back to base_port (11532→11532). - let extra_port_map: std::collections::HashMap> = svcs - .iter() - .filter_map(|s| { - let extras: Vec<(u16, u16)> = s - .extra_port_mappings() - .into_iter() - .map(|(host_base, cport)| { - (host_base.saturating_add(slot as u16), cport) - }) - .collect(); - if extras.is_empty() { - None - } else { - Some((s.name.clone(), extras)) - } - }) - .collect(); - - // Build env map for compose interpolation: ECLUSE__PORT + extra_ports vars - let mut compose_env: std::collections::HashMap = port_map - .iter() - .map(|(n, (hp, _))| { - (format!("ECLUSE_{}_PORT", n.to_uppercase()), hp.to_string()) - }) - .collect(); - for svc in svcs { - for ep in &svc.extra_ports { - let host_port = ep.base_port.saturating_add(slot as u16); - compose_env.insert(ep.port_env.clone(), host_port.to_string()); - } - } - - let yaml = compose::generate_overlay_with_ports( - &compose_data, - &port_map, - &extra_port_map, - &suffix, - Some(&svc_names), - &config.prefix, - slot, - )?; - std::fs::write(&overlay_path, &yaml).context("failed to write overlay file")?; - { - let overlay = overlay_path.clone(); - rollback.push(move || { - let _ = std::fs::remove_file(&overlay); - }); - } - - let compose_str = compose_path.to_string_lossy().to_string(); - let overlay_str = overlay_path.to_string_lossy().to_string(); - let svc_refs: Vec<&str> = svc_names.iter().map(|s| s.as_str()).collect(); - - docker::compose_up_services( - &project, - &compose_str, - Some(&overlay_str), - &svc_refs, - watch, - &compose_env, - )?; - { - let (p, c, o) = (project.clone(), compose_str.clone(), overlay_str.clone()); - rollback.push(move || { - let _ = docker::compose_down(&p, &c, Some(&o), rollback_volumes); - }); - } - - compose_overlays.push(crate::state::ComposeOverlay { - compose: compose_str, - overlay: overlay_str.clone(), - }); - written_overlays.push(overlay_str); - } - } // end if !docker_svcs_to_start.is_empty() + let docker_svcs_config = super::filtered_docker_services(config, req); + let docker = if docker_svcs_config.is_empty() { + start_labeled_data_services(req, config, root, &mut rollback, log)? } else { - let compose_path = compose::find_compose_file(root).ok_or_else(|| { - crate::error::EcluseError::ComposeFileNotFound(root.display().to_string()) - })?; - let compose_data = compose::parse(&compose_path)?; - - let app_svcs = - compose::app_services(&compose_data, &config.app_label, &config.app_label_value); - let data_svcs = - compose::data_services(&compose_data, &config.app_label, &config.app_label_value); - if app_svcs.is_empty() { - tracing::warn!( - "No service labeled {}={} found; treating all services as data.", - config.app_label, - config.app_label_value - ); - } - - log.step(&format!( - "Starting docker services: {}...", - data_svcs.join(", ") - )); - - let overlay_path = overlay_dir.join(format!("{}.yml", slug)); - let yaml = compose::generate_overlay( - &compose_data, - slot as u16, - &suffix, - Some(&data_svcs), - &config.prefix, - slot, - )?; - std::fs::write(&overlay_path, &yaml).context("failed to write overlay file")?; - { - let overlay = overlay_path.clone(); - rollback.push(move || { - let _ = std::fs::remove_file(&overlay); - }); - } - - let compose_str = compose_path.to_string_lossy().to_string(); - let overlay_str = overlay_path.to_string_lossy().to_string(); - let data_refs: Vec<&str> = data_svcs.iter().map(|s| s.as_str()).collect(); - - docker::compose_up_services( - &project, - &compose_str, - Some(&overlay_str), - &data_refs, - watch, - &std::collections::HashMap::new(), - )?; - { - let (p, c, o) = (project.clone(), compose_str.clone(), overlay_str.clone()); - rollback.push(move || { - let _ = docker::compose_down(&p, &c, Some(&o), rollback_volumes); - }); - } - - compose_overlays.push(crate::state::ComposeOverlay { - compose: compose_str, - overlay: overlay_str.clone(), - }); - - for (name, svc) in &compose_data.services { - if data_svcs.contains(name) { - if let Some(p) = compose::service_host_port(svc, slot as u16) { - log.detail(&format!("{name}: {p}")); - allocated_docker_ports.push((name.clone(), p)); - } - } - } - - written_overlays.push(overlay_str); - } + super::start_docker_services(req, config, root, true, &mut rollback, log)? + }; - if reuse_worktree { - if !worktree_path.exists() { - return Err(anyhow::anyhow!( - "worktree not found at {}; remove --reuse-worktree or run ecluse up without it", - worktree_path.display() - )); - } - log.step("Reusing existing worktree..."); - log.detail(&worktree_path.display().to_string()); - } else { - log.step(&format!("Creating worktree (branch: {branch})...")); - log.detail(&worktree_path.display().to_string()); - wt.create(&worktree_path, branch)?; - { - let root_owned = root.to_owned(); - let wt_path = worktree_path.clone(); - rollback.push(move || { - let _ = WorktreeManager::new(root_owned).remove(&wt_path); - }); - } - } + let worktree_path = super::ensure_worktree(req, config, root, &mut rollback, log)?; log.step("Allocating native ports..."); let native_svcs: Vec<_> = config .native_services() .into_iter() - .filter(|s| service_filter.is_none_or(|f| f.contains(&s.name))) + .filter(|s| req.service_filter.is_none_or(|f| f.contains(&s.name))) .collect(); - let native_ports: IndexMap = if native_svcs.is_empty() { - let port = if let Some(&p) = port_overrides - .get("app") - .or_else(|| existing_port_overrides.get("app")) - { - p - } else { - let fallback = crate::config::ServiceConfig { - name: "app".into(), - base_port: 3000, - run: crate::config::ServiceRun::Native, - compose: None, - command: None, - port_env: vec![], - debug_port: None, - extra_ports: vec![], - host_port: None, - }; - validate::find_free_port(config, &fallback, slot)? - }; - let mut m = IndexMap::new(); - m.insert("app".to_string(), port); - log.detail(&format!("app: {port}")); - m - } else { - native_svcs - .iter() - .map(|s| { - let port = if let Some(&p) = port_overrides.get(&s.name) { - p - } else if skip_services.contains(&s.name) { - existing_port_overrides.get(&s.name).copied().ok_or_else(|| { - anyhow::anyhow!( - "service '{}' is skipped but has no recorded port; run ecluse up without --skip or provide --port {}=", - s.name, s.name - ) - })? - } else { - validate::find_free_port(config, s, slot)? - }; - log.detail(&format!("{}: {port}", s.name)); - Ok((s.name.clone(), port)) - }) - .collect::>>()? - }; + let native_ports: IndexMap = super::native_ports_for_slot( + config, + req.slot, + req.port_overrides, + req.skip_services, + req.existing_port_overrides, + req.service_filter, + )?; + for (name, port) in &native_ports { + log.detail(&format!("{name}: {port}")); + } - if !no_inherit_env && !config.inherit_env.is_empty() { + if !req.no_inherit_env && !config.inherit_env.is_empty() { log.step("Inheriting env files..."); crate::worktree::inherit_env_files(root, &worktree_path, &config.inherit_env, log)?; } log.step("Writing .env.ecluse..."); - let docker_svcs_ref: Vec<&crate::config::ServiceConfig> = docker_svcs_config.to_vec(); + let all_svc_configs: Vec<&crate::config::ServiceConfig> = native_svcs + .iter() + .chain(docker_svcs_config.iter()) + .copied() + .collect(); let env_map = env::build_env( - slot, - slug, + req.slot, + config.slot_stride, + req.slug, "hybrid", &native_ports, - &allocated_docker_ports, - &native_svcs, - &docker_svcs_ref, + &docker.allocated_ports, + &all_svc_configs, ); env::write_env_file(&worktree_path, &env_map)?; @@ -379,38 +184,17 @@ impl super::ModeHandler for HybridMode { hooks::run(cmd, &worktree_path, &env_map)?; } - let global = process::load_global_config()?; - - let native_svcs_to_spawn: Vec<_> = native_svcs - .iter() - .filter(|s| !skip_services.contains(&s.name)) - .copied() - .collect(); + super::check_extra_ports(config, &native_svcs, req.skip_services, req.slot, log)?; - if native_svcs_to_spawn.iter().any(|s| s.command.is_some()) { - log.step(&format!( - "Spawning native services ({})...", - global.process_manager - )); - for svc in &native_svcs_to_spawn { - if let Some(cmd) = &svc.command { - let port = native_ports.get(&svc.name).copied().unwrap_or(0); - log.detail(&format!("{} on port {} — {}", svc.name, port, cmd)); - } - } - } - let spawn = process::spawn_services( - &global.process_manager, - slug, - &native_svcs_to_spawn, + let (spawn, used_pm) = super::spawn_native_services( + req, + &native_svcs, + &native_ports, &worktree_path, &env_map, + &mut rollback, + log, )?; - if spawn.tmux_session.is_some() || !spawn.pid_files.is_empty() { - let manager = global.process_manager.clone(); - let spawned = spawn.clone(); - rollback.push(move || process::kill_services(&manager, &spawned)); - } // post_up: all services up and spawned, full env available if let Some(cmd) = &config.hooks.post_up { @@ -422,7 +206,7 @@ impl super::ModeHandler for HybridMode { rollback.disarm(); let pm = if spawn.tmux_session.is_some() || !spawn.pid_files.is_empty() { - Some(global.process_manager) + Some(used_pm) } else { None }; @@ -431,24 +215,25 @@ impl super::ModeHandler for HybridMode { let mut all_ports: std::collections::HashMap = native_ports.iter().map(|(k, v)| (k.clone(), *v)).collect(); - for (name, port) in &allocated_docker_ports { + for (name, port) in &docker.allocated_ports { all_ports.insert(name.clone(), *port); } - let primary_overlay = written_overlays.first().cloned(); - let extra_overlays: Vec = written_overlays.into_iter().skip(1).collect(); + let primary_overlay = docker.written_overlays.first().cloned(); + let extra_overlays: Vec = docker.written_overlays.into_iter().skip(1).collect(); Ok(Session { - slug: slug.to_string(), + slug: req.slug.to_string(), mode: crate::config::Mode::Hybrid, - slot, - branch: branch.to_string(), + slot: req.slot, + branch: req.branch.to_string(), worktree_path: worktree_path.display().to_string(), status: crate::state::SessionStatus::Active, - compose_project: Some(project), + pending_op: None, + compose_project: Some(super::compose_project_name(config, req.slug)), overlay_file: primary_overlay, overlay_files: extra_overlays, - compose_overlays, + compose_overlays: docker.compose_overlays, app_port, started_at: Utc::now().to_rfc3339(), port_overrides: all_ports, @@ -456,7 +241,7 @@ impl super::ModeHandler for HybridMode { tmux_session: spawn.tmux_session, pid_files: spawn.pid_files, log_dir: spawn.log_dir, - services_subset: service_filter.map(|f| { + services_subset: req.service_filter.map(|f| { let mut v: Vec = f.iter().cloned().collect(); v.sort(); v @@ -500,16 +285,19 @@ impl super::ModeHandler for HybridMode { .filter(|(k, _)| !native_names.contains(k.as_str()) && k.as_str() != "app") .map(|(k, v)| (k.clone(), *v)) .collect(); - let docker_svcs = config.docker_services(); - let docker_svcs_ref: Vec<&crate::config::ServiceConfig> = docker_svcs.to_vec(); + let all_svc_configs: Vec<&crate::config::ServiceConfig> = native + .iter() + .chain(config.docker_services().iter()) + .copied() + .collect(); let env_map = env::build_env( session.slot, + config.slot_stride, &session.slug, "hybrid", &native_ports, &allocated_docker_ports, - &native, - &docker_svcs_ref, + &all_svc_configs, ); // pre_down: before services are killed — app can drain/flush. diff --git a/src/modes/mod.rs b/src/modes/mod.rs index 1a829ff..83cec78 100644 --- a/src/modes/mod.rs +++ b/src/modes/mod.rs @@ -2,33 +2,47 @@ pub mod container; pub mod host; pub mod hybrid; -use anyhow::Result; +use anyhow::{Context, Result}; use indexmap::IndexMap; -use std::path::Path; +use std::collections::{HashMap, HashSet}; +use std::path::{Path, PathBuf}; use crate::compose; use crate::config::{Config, Mode, ServiceConfig}; use crate::docker; +use crate::log::StepLogger; +use crate::process::{ProcessManager, SpawnResult}; +use crate::rollback::Rollback; use crate::state::Session; +use crate::worktree::WorktreeManager; + +/// Everything `bring_up` needs beyond config/root/log, bundled so the trait +/// signature stays stable as options are added. +pub struct BringUpRequest<'a> { + pub slug: &'a str, + pub slot: u8, + pub branch: &'a str, + pub watch: bool, + pub reuse_worktree: bool, + pub no_inherit_env: bool, + pub worktree_override: Option, + /// Explicit --port name=value pins. + pub port_overrides: &'a HashMap, + /// --services subset; None means all services. + pub service_filter: Option<&'a HashSet>, + /// Services to leave untouched (already running, or --skip). + pub skip_services: &'a HashSet, + /// Ports recorded for the existing session when resuming. + pub existing_port_overrides: &'a HashMap, +} pub trait ModeHandler { - #[allow(clippy::too_many_arguments)] fn bring_up( &self, - slug: &str, - slot: u8, - branch: &str, + req: &BringUpRequest, config: &Config, root: &Path, - watch: bool, - reuse_worktree: bool, - no_inherit_env: bool, - worktree_override: Option, - port_overrides: &std::collections::HashMap, - service_filter: Option<&std::collections::HashSet>, - skip_services: &std::collections::HashSet, - existing_port_overrides: &std::collections::HashMap, - log: &crate::log::StepLogger, + log: &StepLogger, ) -> Result; fn bring_down( @@ -38,7 +52,7 @@ pub trait ModeHandler { root: &Path, keep_volumes: bool, keep_worktree: bool, - log: &crate::log::StepLogger, + log: &StepLogger, ) -> Result<()>; } @@ -57,6 +71,399 @@ pub fn get_handler_for_mode(mode: &Mode) -> Box { } } +// ── Shared bring_up building blocks ─────────────────────────────────────────── + +/// The docker `[[services]]` selected by the request's --services filter. +pub(crate) fn filtered_docker_services<'c>( + config: &'c Config, + req: &BringUpRequest, +) -> Vec<&'c ServiceConfig> { + config + .docker_services() + .into_iter() + .filter(|s| req.service_filter.is_none_or(|f| f.contains(&s.name))) + .collect() +} + +/// Probe the extra_ports of non-skipped services. Primary ports auto-bump on +/// collision, but extra ports are deterministic — an occupied one would +/// surface as a raw bind failure from the service or container. strict_port +/// makes it a hard error; otherwise it is warned about up front. +pub(crate) fn check_extra_ports( + config: &Config, + svcs: &[&ServiceConfig], + skip: &HashSet, + slot: u8, + log: &StepLogger, +) -> Result<()> { + for svc in svcs { + if skip.contains(&svc.name) { + continue; + } + for (base, env_key) in svc.all_extra_ports() { + let port = ServiceConfig::extra_port_for_slot(base, slot, config.slot_stride); + if crate::validate::port_occupied(port) { + if config.strict_port { + return Err(crate::error::EcluseError::PortInUse { + port, + pid: crate::validate::port_listener(port).unwrap_or(0), + } + .into()); + } + log.warn(&format!( + "extra port {} ({}) for service '{}' is already in use; the service may fail to bind", + port, env_key, svc.name + )); + } + } + } + Ok(()) +} + +/// What `start_docker_services` brought up (or copied from the existing session). +#[derive(Default)] +pub(crate) struct DockerStartup { + pub allocated_ports: Vec<(String, u16)>, + pub written_overlays: Vec, + pub compose_overlays: Vec, +} + +/// Bring up the `[[services]]`-declared docker services, one compose group at a +/// time, registering an undo with `rollback` after each successful step. +/// `limit_to_listed` scopes the overlay and `compose up` to the listed services +/// (hybrid); container mode brings up whole compose files. +pub(crate) fn start_docker_services( + req: &BringUpRequest, + config: &Config, + root: &Path, + limit_to_listed: bool, + rollback: &mut Rollback, + log: &StepLogger, +) -> Result { + let project = compose_project_name(config, req.slug); + let overlay_dir = root.join(".ecluse").join("overlays"); + std::fs::create_dir_all(&overlay_dir).context("failed to create overlays directory")?; + // Only delete volumes the rollback created: on resume the session's + // existing data volumes must survive a failed re-up. + let rollback_volumes = !req.reuse_worktree; + + let docker_svcs_config = filtered_docker_services(config, req); + let mut out = DockerStartup::default(); + + // Copy ports for skipped docker services from the existing session. + for svc in &docker_svcs_config { + if req.skip_services.contains(&svc.name) { + if let Some(&p) = req.existing_port_overrides.get(&svc.name) { + log.detail(&format!("{}: already running — skipped", svc.name)); + out.allocated_ports.push((svc.name.clone(), p)); + } + } + } + + let docker_svcs_to_start: Vec<_> = docker_svcs_config + .iter() + .filter(|s| !req.skip_services.contains(&s.name)) + .copied() + .collect(); + + if docker_svcs_to_start.is_empty() { + return Ok(out); + } + + let groups = group_by_compose(root, &docker_svcs_to_start)?; + for (compose_path, svcs) in &groups { + check_extra_ports(config, svcs, &HashSet::new(), req.slot, log)?; + let svc_names: Vec = svcs.iter().map(|s| s.name.clone()).collect(); + log.step(&format!( + "Starting docker services: {}...", + svc_names.join(", ") + )); + + let compose_data = compose::parse(compose_path)?; + + // Build port_map: services that publish their primary base_port to the host. + // Services with suppress_primary_publish are excluded — their only host-side + // publish is via extra_port_map. + let mut port_map: HashMap = HashMap::new(); + for s in svcs { + if s.suppress_primary_publish() { + // Track the first extra_port host port as the "primary" for state/ls. + if let Some(ep) = s.extra_ports.first() { + let hp = ServiceConfig::extra_port_for_slot( + ep.base_port, + req.slot, + config.slot_stride, + ); + out.allocated_ports.push((s.name.clone(), hp)); + log.detail(&format!("{}: {hp} (via extra_ports)", s.name)); + } + } else { + let host_port = if let Some(&p) = req.port_overrides.get(&s.name) { + p + } else { + crate::validate::find_free_port(config, s, req.slot)? + }; + out.allocated_ports.push((s.name.clone(), host_port)); + log.detail(&format!("{}: {host_port}", s.name)); + port_map.insert(s.name.clone(), (host_port, s.base_port)); + } + } + + let overlay_name = overlay_name_for_compose(req.slug, compose_path, root); + let overlay_path = overlay_dir.join(&overlay_name); + + // Build extra_port_map using container_port from ExtraPort when set. + let extra_port_map: HashMap> = svcs + .iter() + .filter_map(|s| { + let extras: Vec<(u16, u16)> = s + .extra_port_mappings() + .into_iter() + .map(|(host_base, cport)| { + ( + ServiceConfig::extra_port_for_slot( + host_base, + req.slot, + config.slot_stride, + ), + cport, + ) + }) + .collect(); + if extras.is_empty() { + None + } else { + Some((s.name.clone(), extras)) + } + }) + .collect(); + + // Env map for compose interpolation: ECLUSE__PORT + extra_ports vars. + let mut compose_env: HashMap = port_map + .iter() + .map(|(n, (hp, _))| (format!("ECLUSE_{}_PORT", n.to_uppercase()), hp.to_string())) + .collect(); + for svc in svcs { + for ep in &svc.extra_ports { + let host_port = + ServiceConfig::extra_port_for_slot(ep.base_port, req.slot, config.slot_stride); + compose_env.insert(ep.port_env.clone(), host_port.to_string()); + } + } + + let scope: Option<&[String]> = if limit_to_listed { + Some(&svc_names) + } else { + None + }; + let yaml = compose::generate_overlay_with_ports( + &compose_data, + &port_map, + &extra_port_map, + &project, + scope, + &config.prefix, + req.slot, + )?; + std::fs::write(&overlay_path, &yaml).context("failed to write overlay file")?; + { + let overlay = overlay_path.clone(); + rollback.push(move || { + let _ = std::fs::remove_file(&overlay); + }); + } + + let compose_str = compose_path.to_string_lossy().to_string(); + let overlay_str = overlay_path.to_string_lossy().to_string(); + + if limit_to_listed { + let svc_refs: Vec<&str> = svc_names.iter().map(|s| s.as_str()).collect(); + docker::compose_up_services( + &project, + &compose_str, + Some(&overlay_str), + &svc_refs, + req.watch, + &compose_env, + )?; + } else { + docker::compose_up( + &project, + &compose_str, + Some(&overlay_str), + req.watch, + &compose_env, + )?; + } + { + let (p, c, o) = (project.clone(), compose_str.clone(), overlay_str.clone()); + rollback.push(move || { + let _ = docker::compose_down(&p, &c, Some(&o), rollback_volumes); + }); + } + + out.compose_overlays.push(crate::state::ComposeOverlay { + compose: compose_str, + overlay: overlay_str.clone(), + }); + out.written_overlays.push(overlay_str); + } + + Ok(out) +} + +pub(crate) fn compose_project_name(config: &Config, slug: &str) -> String { + format!("{}_{}", config.prefix, slug) +} + +/// Resolve and (unless reusing) create the session worktree, registering its +/// removal with `rollback`. +pub(crate) fn ensure_worktree( + req: &BringUpRequest, + config: &Config, + root: &Path, + rollback: &mut Rollback, + log: &StepLogger, +) -> Result { + let wt = WorktreeManager::new(root.to_owned()); + let worktree_path = req + .worktree_override + .clone() + .unwrap_or_else(|| wt.worktree_path(config, req.slug)); + + if req.reuse_worktree { + if !worktree_path.exists() { + return Err(anyhow::anyhow!( + "worktree not found at {}; remove --reuse-worktree or run ecluse up without it", + worktree_path.display() + )); + } + log.step("Reusing existing worktree..."); + log.detail(&worktree_path.display().to_string()); + } else { + log.step(&format!("Creating worktree (branch: {})...", req.branch)); + log.detail(&worktree_path.display().to_string()); + wt.create(&worktree_path, req.branch)?; + { + let root_owned = root.to_owned(); + let wt_path = worktree_path.clone(); + rollback.push(move || { + let _ = WorktreeManager::new(root_owned).remove(&wt_path); + }); + } + } + Ok(worktree_path) +} + +/// Build the native port map for a slot, falling back to "app" on 3000+slot +/// when no native `[[services]]` match. Skipped services copy their port from +/// `existing` instead of probing. +/// +/// `filter` limits which services get ports (hybrid honors --services here; +/// host historically allocates all native ports regardless — pass None there). +pub(crate) fn native_ports_for_slot( + config: &Config, + slot: u8, + overrides: &HashMap, + skip: &HashSet, + existing: &HashMap, + filter: Option<&HashSet>, +) -> Result> { + let native: Vec<&ServiceConfig> = config + .native_services() + .into_iter() + .filter(|s| filter.is_none_or(|f| f.contains(&s.name))) + .collect(); + if native.is_empty() { + let port = if let Some(&p) = overrides.get("app").or_else(|| existing.get("app")) { + p + } else { + let fallback = ServiceConfig { + name: "app".into(), + base_port: 3000, + run: crate::config::ServiceRun::Native, + compose: None, + command: None, + port_env: vec![], + debug_port: None, + extra_ports: vec![], + publish_primary: None, + host_port: None, + }; + crate::validate::find_free_port(config, &fallback, slot)? + }; + let mut m = IndexMap::new(); + m.insert("app".to_string(), port); + Ok(m) + } else { + native + .iter() + .map(|s| { + let port = if let Some(&p) = overrides.get(&s.name) { + p + } else if skip.contains(&s.name) { + existing.get(&s.name).copied().ok_or_else(|| { + anyhow::anyhow!( + "service '{}' is skipped but has no recorded port; run ecluse up without --skip or provide --port {}=", + s.name, s.name + ) + })? + } else { + crate::validate::find_free_port(config, s, slot)? + }; + Ok((s.name.clone(), port)) + }) + .collect() + } +} + +/// Spawn the non-skipped native services with the configured process manager, +/// registering a kill with `rollback`. Returns the spawn result and the +/// manager that was used. +pub(crate) fn spawn_native_services( + req: &BringUpRequest, + native_svcs: &[&ServiceConfig], + native_ports: &IndexMap, + worktree_path: &Path, + env_map: &HashMap, + rollback: &mut Rollback, + log: &StepLogger, +) -> Result<(SpawnResult, ProcessManager)> { + let global = crate::process::load_global_config()?; + + let svcs_to_spawn: Vec<&ServiceConfig> = native_svcs + .iter() + .filter(|s| !req.skip_services.contains(&s.name)) + .copied() + .collect(); + + if svcs_to_spawn.iter().any(|s| s.command.is_some()) { + log.step(&format!( + "Spawning native services ({})...", + global.process_manager + )); + for svc in &svcs_to_spawn { + if let Some(cmd) = &svc.command { + let port = native_ports.get(&svc.name).copied().unwrap_or(0); + log.detail(&format!("{} on port {} — {}", svc.name, port, cmd)); + } + } + } + let spawn = crate::process::spawn_services( + &global.process_manager, + req.slug, + &svcs_to_spawn, + worktree_path, + env_map, + )?; + if spawn.tmux_session.is_some() || !spawn.pid_files.is_empty() { + let manager = global.process_manager.clone(); + let spawned = spawn.clone(); + rollback.push(move || crate::process::kill_services(&manager, &spawned)); + } + Ok((spawn, global.process_manager)) +} + // ── Shared helpers for multi-compose-file support ───────────────────────────── /// Group docker services by the compose file they belong to. @@ -170,6 +577,7 @@ mod tests { port_env: vec![], debug_port: None, extra_ports: vec![], + publish_primary: None, host_port: None, } } @@ -337,3 +745,86 @@ mod tests { assert!(result.unwrap().ends_with("worker/docker-compose.yml")); } } + +#[cfg(test)] +mod extra_port_tests { + use super::*; + use crate::config::{ExtraPort, HookConfig, ServiceRun}; + + fn config_with(strict: bool, stride: u8, svc: ServiceConfig) -> Config { + Config { + mode: Mode::Host, + max_slots: 8, + prefix: "ecluse".into(), + worktree_dir: ".ecluse/worktrees".into(), + app_label: "ecluse.role".into(), + app_label_value: "app".into(), + strict_port: strict, + port_search_range: 10, + slot_stride: stride, + services: vec![svc], + hooks: HookConfig::default(), + inherit_env: vec![], + } + } + + fn svc_with_extra(base: u16) -> ServiceConfig { + ServiceConfig { + name: "api".into(), + base_port: 3000, + run: ServiceRun::Native, + compose: None, + command: Some("sleep 1".into()), + port_env: vec![], + debug_port: None, + extra_ports: vec![ExtraPort { + base_port: base, + port_env: "DBG".into(), + container_port: None, + }], + publish_primary: None, + host_port: None, + } + } + + // Occupy a real port, point an extra_port at it: strict errors, lax warns. + #[test] + fn occupied_extra_port_errors_in_strict_mode() { + let listener = std::net::TcpListener::bind("127.0.0.1:0").unwrap(); + let port = listener.local_addr().unwrap().port(); + if !crate::validate::port_occupied(port) { + // Port probing is best-effort (lsof); in environments where it + // cannot observe sockets the production check is a no-op too. + return; + } + // slot 1, stride 1 → extra port = base + 1 = port + let svc = svc_with_extra(port - 1); + let config = config_with(true, 1, svc.clone()); + let log = crate::log::StepLogger::new(true); + let err = check_extra_ports(&config, &[&svc], &HashSet::new(), 1, &log).unwrap_err(); + assert!(err.to_string().contains(&port.to_string()), "got: {}", err); + } + + #[test] + fn occupied_extra_port_warns_but_passes_in_lax_mode() { + let listener = std::net::TcpListener::bind("127.0.0.1:0").unwrap(); + let port = listener.local_addr().unwrap().port(); + let svc = svc_with_extra(port - 1); + let config = config_with(false, 1, svc.clone()); + let log = crate::log::StepLogger::new(true); + check_extra_ports(&config, &[&svc], &HashSet::new(), 1, &log).unwrap(); + } + + #[test] + fn skipped_services_are_not_probed() { + let listener = std::net::TcpListener::bind("127.0.0.1:0").unwrap(); + let port = listener.local_addr().unwrap().port(); + let svc = svc_with_extra(port - 1); + let config = config_with(true, 1, svc.clone()); + let log = crate::log::StepLogger::new(true); + let mut skip = HashSet::new(); + skip.insert("api".to_string()); + // The occupied port belongs to the already-running service itself. + check_extra_ports(&config, &[&svc], &skip, 1, &log).unwrap(); + } +} diff --git a/src/process.rs b/src/process.rs index 9fa5cf7..cc53cfb 100644 --- a/src/process.rs +++ b/src/process.rs @@ -115,14 +115,18 @@ pub fn kill_services(manager: &ProcessManager, result: &SpawnResult) { } } -/// Check whether spawned nohup processes are still alive. +/// Check whether spawned services are still alive via their pid files +/// (written for both nohup- and tmux-managed sessions). /// Returns warning strings for any that have died. pub fn check_processes_alive( manager: &Option, result: &SpawnResult, slug: &str, ) -> Vec { - if !matches!(manager, Some(ProcessManager::Nohup)) { + if !matches!( + manager, + Some(ProcessManager::Nohup) | Some(ProcessManager::Tmux) + ) { return vec![]; } let mut warnings = vec![]; @@ -131,17 +135,27 @@ pub fn check_processes_alive( .file_stem() .and_then(|s| s.to_str()) .unwrap_or("unknown"); - if let Ok(content) = std::fs::read_to_string(pid_file) { - if let Ok(pid) = content.trim().parse::() { - if !pid_alive(pid) { - let log_hint = result - .log_dir - .as_ref() - .map(|d| d.join(format!("{}.log", service)).display().to_string()) - .unwrap_or_else(|| format!(".ecluse/logs/{}/{}.log", slug, service)); + let inspect_hint = match &result.tmux_session { + Some(session) => format!("inspect with `tmux attach -t {}`", session), + None => { + let log = result + .log_dir + .as_ref() + .map(|d| d.join(format!("{}.log", service)).display().to_string()) + .unwrap_or_else(|| format!(".ecluse/logs/{}/{}.log", slug, service)); + format!("check {}", log) + } + }; + match read_pid_file(pid_file) { + None => warnings.push(format!( + "service '{}' has no pid file (likely killed); run `ecluse up` to restart it", + service + )), + Some((pid, token)) => { + if !pid_file_alive(pid, &token) { warnings.push(format!( - "service '{}' (PID {}) is not running — check {}", - service, pid, log_hint + "service '{}' (PID {}) is not running — {}", + service, pid, inspect_hint )); } } @@ -159,6 +173,57 @@ pub fn pid_alive(pid: u32) -> bool { .unwrap_or(false) } +/// Opaque start-time token for a PID (`ps -o lstart=`). Two processes that +/// recycle the same PID get different tokens, so a stored (pid, token) pair +/// identifies exactly one process incarnation. +pub fn pid_start_token(pid: u32) -> Option { + let out = Command::new("ps") + .args(["-p", &pid.to_string(), "-o", "lstart="]) + .output() + .ok()?; + if !out.status.success() { + return None; + } + let s = String::from_utf8_lossy(&out.stdout).trim().to_string(); + if s.is_empty() { + None + } else { + Some(s) + } +} + +/// Write `` plus its start token. Readers treat a mismatched token as a +/// stale file (the PID was recycled by an unrelated process since the write). +pub fn write_pid_file_with_token(path: &Path, pid: u32) -> std::io::Result<()> { + let token = pid_start_token(pid).unwrap_or_default(); + std::fs::write(path, format!("{pid}\n{token}\n")) +} + +/// Parse a pid file: first line pid, optional second line start token +/// (absent in files written by older versions). +pub fn read_pid_file(path: &Path) -> Option<(u32, Option)> { + let content = std::fs::read_to_string(path).ok()?; + let mut lines = content.lines(); + let pid = lines.next()?.trim().parse().ok()?; + let token = lines + .next() + .map(|l| l.trim().to_string()) + .filter(|t| !t.is_empty()); + Some((pid, token)) +} + +/// True when the PID exists AND — if a token was recorded — it is still the +/// same process incarnation the pid file was written for. +pub fn pid_file_alive(pid: u32, token: &Option) -> bool { + if !pid_alive(pid) { + return false; + } + match token { + None => true, // legacy pid file without token + Some(t) => pid_start_token(pid).as_deref() == Some(t.as_str()), + } +} + fn shell_escape(s: &str) -> String { // Wrap in single quotes, escaping any single quotes in the value format!("'{}'", s.replace('\'', "'\\''")) @@ -210,7 +275,7 @@ fn build_source_preamble(worktree: &Path) -> String { files .iter() .filter(|f| worktree.join(f).exists()) - .map(|f| format!("set -a; source {}; set +a", shell_escape(f))) + .map(|f| format!("set -a; . {}; set +a", shell_escape(f))) .collect::>() .join("; ") } @@ -271,18 +336,19 @@ fn spawn_tmux( let session = tmux_session_name(slug); let merged_env = merge_worktree_env(worktree, env); - // Write merged env to a file so tmux windows source it rather than receiving - // a multi-KB export string through send-keys (safe for any env size). + // Window 0 stays a plain shell (handy for `ecluse shell` attaches); each + // service runs as its own window's process — no send-keys typing, so a + // command that fails to start is a detectable pane death, not a silently + // ignored keystroke. let preamble_path = write_env_preamble_file(worktree, slug, &merged_env); - // Build the source preamble: ecluse preamble file first, then the worktree env - // files (.env → .env.local → .env.ecluse) so manual restarts (↑ Enter) also - // have the correct environment. The preamble file is sourced first so the - // worktree files can override individual vars if needed. - let mut source_parts: Vec = Vec::new(); + let mut source_parts: Vec = vec![format!( + "cd {}", + shell_escape(&worktree.display().to_string()) + )]; if let Some(ref p) = preamble_path { source_parts.push(format!( - "set -a; source {}; set +a", + "set -a; . {}; set +a", shell_escape(&p.display().to_string()) )); } @@ -290,10 +356,6 @@ fn spawn_tmux( if !worktree_files.is_empty() { source_parts.push(worktree_files); } - source_parts.push(format!( - "cd {}", - shell_escape(&worktree.display().to_string()) - )); let setup_cmd = source_parts.join("; "); // Kill any stale tmux session with this name (processes exited but shell remains). @@ -304,15 +366,24 @@ fn spawn_tmux( .ok(); } - // Create detached session let status = Command::new("tmux") - .args(["new-session", "-d", "-s", &session, "-x", "220", "-y", "50"]) + .args([ + "new-session", + "-d", + "-s", + &session, + "-n", + "shell", + "-x", + "220", + "-y", + "50", + ]) .status() .map_err(|e| crate::error::EcluseError::SpawnFailed { service: "tmux".into(), reason: e.to_string(), })?; - if !status.success() { return Err(crate::error::EcluseError::SpawnFailed { service: "tmux".into(), @@ -321,47 +392,181 @@ fn spawn_tmux( .into()); } - for (i, svc) in services.iter().enumerate() { - let cmd = svc.command.as_deref().unwrap(); - let target = if i == 0 { - format!("{}:0", session) - } else { - format!("{}:{}", session, svc.name) - }; + // Keep dead panes around so a crashed service's output stays inspectable + // (and so the death is observable below instead of closing the window). + // A session-scoped hook applies the window option at creation time — + // setting it after new-window would race an instantly-dying command. + Command::new("tmux") + .args([ + "set-hook", + "-t", + &session, + "after-new-window", + "set-option -w remain-on-exit on", + ]) + .output() + .ok(); + // The shell window should have the session env + worktree cwd too. + Command::new("tmux") + .args([ + "send-keys", + "-t", + &format!("{}:shell", session), + &setup_cmd, + "Enter", + ]) + .output() + .ok(); - if i == 0 { - Command::new("tmux") - .args(["rename-window", "-t", &format!("{}:0", session), &svc.name]) - .status() - .ok(); - } else { - Command::new("tmux") - .args(["new-window", "-t", &session, "-n", &svc.name]) - .status() - .ok(); + let ecluse_dir = ecluse_dir_for(worktree); + let pid_dir = ecluse_dir.join("pids").join(slug); + std::fs::create_dir_all(&pid_dir)?; + + let cleanup = |pid_files: &[PathBuf]| { + let _ = Command::new("tmux") + .args(["kill-session", "-t", &session]) + .output(); + for pf in pid_files { + let _ = std::fs::remove_file(pf); } + }; - // Source env + cd to worktree, then run the service command. - // Two separate send-keys calls keeps each line short; the setup line - // is a handful of file paths, never a large export blob. - Command::new("tmux") - .args(["send-keys", "-t", &target, &setup_cmd, "Enter"]) + let mut pid_files: Vec = vec![]; + for svc in services { + let cmd = svc.command.as_deref().unwrap(); + let full_cmd = format!("{}; exec sh -c {}", setup_cmd, shell_escape(cmd)); + let created = Command::new("tmux") + .args(["new-window", "-t", &session, "-n", &svc.name, &full_cmd]) .status() - .ok(); + .map(|st| st.success()) + .unwrap_or(false); + if !created { + cleanup(&pid_files); + return Err(crate::error::EcluseError::SpawnFailed { + service: svc.name.clone(), + reason: "tmux new-window failed".into(), + } + .into()); + } - Command::new("tmux") - .args(["send-keys", "-t", &target, cmd, "Enter"]) - .status() - .ok(); + if let Some(pane_pid) = tmux_pane_pid_for(&session, &svc.name) { + let pid_path = pid_dir.join(format!("{}.pid", svc.name)); + write_pid_file_with_token(&pid_path, pane_pid)?; + pid_files.push(pid_path); + } + } + + // Catch instant failures: a service command that dies within the grace + // window (typo, missing binary, port conflict) fails the spawn instead of + // reporting a "ready" session with dead services. + let deadline = std::time::Instant::now() + std::time::Duration::from_millis(1500); + loop { + if let Some((window, status)) = first_dead_pane(&session) { + let output = tmux_pane_tail(&session, &window, 5); + cleanup(&pid_files); + return Err(crate::error::EcluseError::SpawnFailed { + service: window, + reason: format!( + "command exited immediately (status {}); last output:\n{}", + status, + output.trim_end() + ), + } + .into()); + } + if std::time::Instant::now() >= deadline { + break; + } + std::thread::sleep(std::time::Duration::from_millis(150)); } Ok(SpawnResult { tmux_session: Some(session), - pid_files: vec![], + pid_files, log_dir: None, }) } +/// Pane PID of the named window in `session`. +fn tmux_pane_pid_for(session: &str, window: &str) -> Option { + let out = Command::new("tmux") + .args([ + "list-panes", + "-t", + &format!("{}:{}", session, window), + "-F", + "#{pane_pid}", + ]) + .output() + .ok()?; + if !out.status.success() { + return None; + } + String::from_utf8_lossy(&out.stdout) + .lines() + .next()? + .trim() + .parse() + .ok() +} + +/// First service pane that has died, as (window_name, exit_status). +/// The plain "shell" window is exempt. +fn first_dead_pane(session: &str) -> Option<(String, String)> { + let out = Command::new("tmux") + .args([ + "list-panes", + "-s", + "-t", + session, + "-F", + "#{window_name}|#{pane_dead}|#{pane_dead_status}", + ]) + .output() + .ok()?; + if !out.status.success() { + return None; + } + let stdout = String::from_utf8_lossy(&out.stdout); + for line in stdout.lines() { + let mut parts = line.split('|'); + let window = parts.next()?.to_string(); + let dead = parts.next()? == "1"; + let status = parts.next().unwrap_or("").to_string(); + if dead && window != "shell" { + return Some((window, status)); + } + } + None +} + +/// Last `n` lines of a window's pane output (best effort). +fn tmux_pane_tail(session: &str, window: &str, n: usize) -> String { + let out = Command::new("tmux") + .args([ + "capture-pane", + "-p", + "-t", + &format!("{}:{}", session, window), + ]) + .output(); + match out { + Ok(o) if o.status.success() => { + let text = String::from_utf8_lossy(&o.stdout); + let lines: Vec<&str> = text.lines().filter(|l| !l.trim().is_empty()).collect(); + lines + .iter() + .rev() + .take(n) + .rev() + .cloned() + .collect::>() + .join("\n") + } + _ => String::new(), + } +} + fn kill_tmux(result: &SpawnResult) { if let Some(session) = &result.tmux_session { Command::new("tmux") @@ -369,6 +574,9 @@ fn kill_tmux(result: &SpawnResult) { .output() .ok(); } + for pid_file in &result.pid_files { + let _ = std::fs::remove_file(pid_file); + } } fn spawn_nohup( @@ -443,14 +651,26 @@ fn spawn_one_nohup( reason: e.to_string(), })?; - std::fs::write(&pid_path, child.id().to_string())?; + write_pid_file_with_token(&pid_path, child.id())?; Ok(pid_path) } fn kill_nohup(result: &SpawnResult) { for pid_file in &result.pid_files { - if let Ok(content) = std::fs::read_to_string(pid_file) { - if let Ok(pid) = content.trim().parse::() { + if let Some((pid, token)) = read_pid_file(pid_file) { + let leader_alive = pid_alive(pid); + if leader_alive && !pid_file_alive(pid, &token) { + // The PID was recycled by an unrelated process — never signal + // it (or its group, which now belongs to that process). + tracing::warn!( + "PID {} from {} was recycled by another process; not signaling it", + pid, + pid_file.display() + ); + } else if leader_alive || target_alive(&format!("-{}", pid)) { + // Leader verified, or leader gone but its group still has our + // orphaned children (a pgid stays allocated while any member + // lives, so it cannot have been recycled). kill_process_group(pid); } } @@ -465,23 +685,33 @@ fn kill_nohup(result: &SpawnResult) { /// orphan the service's children — the `sh -c` wrapper dies while the actual /// server keeps running and holds the port. fn kill_process_group(pgid: u32) { - let group = format!("-{}", pgid); - let _ = Command::new("kill").args(["-TERM", "--", &group]).output(); + signal_with_grace(&format!("-{}", pgid)); +} + +/// TERM a single process, escalating to KILL after the grace period. +pub fn kill_pid_with_grace(pid: u32) { + signal_with_grace(&pid.to_string()); +} + +/// SIGTERM `target` (a pid, or "-pgid" for a whole group), poll for it to +/// disappear, and SIGKILL whatever survives the 2s grace period. +fn signal_with_grace(target: &str) { + let _ = Command::new("kill").args(["-TERM", "--", target]).output(); let deadline = std::time::Instant::now() + std::time::Duration::from_secs(2); - while process_group_alive(pgid) { + while target_alive(target) { if std::time::Instant::now() >= deadline { - let _ = Command::new("kill").args(["-KILL", "--", &group]).output(); + let _ = Command::new("kill").args(["-KILL", "--", target]).output(); return; } std::thread::sleep(std::time::Duration::from_millis(50)); } } -/// True while any process in the group still exists (kill -0 on the group). -fn process_group_alive(pgid: u32) -> bool { +/// True while the target (pid or process group) still exists (kill -0). +fn target_alive(target: &str) -> bool { Command::new("kill") - .args(["-0", "--", &format!("-{}", pgid)]) + .args(["-0", "--", target]) .output() .map(|o| o.status.success()) .unwrap_or(false) @@ -556,6 +786,7 @@ mod tests { port_env: vec![], debug_port: None, extra_ports: vec![], + publish_primary: None, host_port: None, }; let result = spawn_services( @@ -627,6 +858,7 @@ mod tests { port_env: vec![], debug_port: None, extra_ports: vec![], + publish_primary: None, host_port: None, }; let result = spawn_services( @@ -711,6 +943,7 @@ mod tests { port_env: vec![], debug_port: None, extra_ports: vec![], + publish_primary: None, host_port: None, } } @@ -726,6 +959,44 @@ mod tests { cond() } + // ── pid start tokens ────────────────────────────────────────────────────── + + #[test] + fn pid_token_roundtrip_for_live_process() { + let dir = TempDir::new().unwrap(); + let my_pid = std::process::id(); + let path = dir.path().join("self.pid"); + write_pid_file_with_token(&path, my_pid).unwrap(); + let (pid, token) = read_pid_file(&path).unwrap(); + assert_eq!(pid, my_pid); + assert!(token.is_some(), "live process must get a start token"); + assert!(pid_file_alive(pid, &token)); + } + + #[test] + fn pid_file_alive_rejects_recycled_pid() { + // Same PID, forged token from "another incarnation". + let my_pid = std::process::id(); + let forged = Some("Wed Jan 1 00:00:00 1986".to_string()); + assert!(!pid_file_alive(my_pid, &forged)); + } + + #[test] + fn pid_file_alive_accepts_legacy_tokenless_files() { + let my_pid = std::process::id(); + assert!(pid_file_alive(my_pid, &None)); + } + + #[test] + fn read_pid_file_parses_legacy_single_line_format() { + let dir = TempDir::new().unwrap(); + let path = dir.path().join("legacy.pid"); + std::fs::write(&path, "4242").unwrap(); + let (pid, token) = read_pid_file(&path).unwrap(); + assert_eq!(pid, 4242); + assert!(token.is_none()); + } + // The service command spawns a child; killing the session must take the // whole process group down, not just the `sh -c` group leader. #[test] @@ -827,3 +1098,90 @@ mod tests { ); } } + +#[cfg(test)] +mod tmux_tests { + use super::*; + use tempfile::TempDir; + + fn tmux_available() -> bool { + binary_available("tmux") + } + + fn native_svc(name: &str, command: &str) -> crate::config::ServiceConfig { + crate::config::ServiceConfig { + name: name.into(), + base_port: 3000, + run: crate::config::ServiceRun::Native, + compose: None, + command: Some(command.into()), + port_env: vec![], + debug_port: None, + extra_ports: vec![], + publish_primary: None, + host_port: None, + } + } + + // A command that dies instantly must fail the spawn (and clean up the + // session) instead of reporting a "ready" session with dead services. + #[test] + fn spawn_tmux_detects_instantly_failing_command() { + if !tmux_available() { + return; + } + let dir = TempDir::new().unwrap(); + std::fs::create_dir_all(dir.path().join(".ecluse")).unwrap(); + let svc = native_svc("broken", "definitely-not-a-binary-xyz"); + let err = spawn_services( + &ProcessManager::Tmux, + "tmux-fail-test", + &[&svc], + dir.path(), + &std::collections::HashMap::new(), + ) + .unwrap_err(); + assert!(err.to_string().contains("broken"), "got: {}", err); + assert!( + !tmux_session_exists("ecluse-tmux-fail-test"), + "failed spawn must not leave the tmux session behind" + ); + } + + // A long-running service gets a recorded, token-verified pane PID, and + // kill_services takes the whole session (and pid files) down. + #[test] + fn spawn_tmux_records_pane_pids_and_kills_cleanly() { + if !tmux_available() { + return; + } + let dir = TempDir::new().unwrap(); + std::fs::create_dir_all(dir.path().join(".ecluse")).unwrap(); + let svc = native_svc("sleeper", "sleep 300"); + let result = spawn_services( + &ProcessManager::Tmux, + "tmux-ok-test", + &[&svc], + dir.path(), + &std::collections::HashMap::new(), + ) + .unwrap(); + + assert_eq!(result.tmux_session.as_deref(), Some("ecluse-tmux-ok-test")); + assert_eq!(result.pid_files.len(), 1, "pane pid must be recorded"); + let (pid, token) = read_pid_file(&result.pid_files[0]).unwrap(); + assert!(pid_file_alive(pid, &token), "pane process must be running"); + + kill_services(&ProcessManager::Tmux, &result); + assert!(!tmux_session_exists("ecluse-tmux-ok-test")); + assert!( + !result.pid_files[0].exists(), + "pid file must be removed on kill" + ); + let deadline = std::time::Instant::now() + std::time::Duration::from_secs(5); + while pid_alive(pid) && std::time::Instant::now() < deadline { + std::thread::sleep(std::time::Duration::from_millis(50)); + } + assert!(!pid_alive(pid), "pane process must die with the session"); + } +} diff --git a/src/slot.rs b/src/slot.rs index c42efb4..1e9b49c 100644 --- a/src/slot.rs +++ b/src/slot.rs @@ -38,6 +38,7 @@ mod tests { branch: format!("branch-{}", slot), worktree_path: format!("/tmp/wt-{}", slot), status: crate::state::SessionStatus::Active, + pending_op: None, compose_project: None, overlay_file: None, overlay_files: vec![], diff --git a/src/state.rs b/src/state.rs index 53c2085..dbd592e 100644 --- a/src/state.rs +++ b/src/state.rs @@ -57,6 +57,27 @@ fn is_active(status: &SessionStatus) -> bool { *status == SessionStatus::Active } +/// Identity of the in-flight operation that marked a session Pending. +/// +/// `id` lets the owning command verify nothing took the session over while it +/// worked without holding the lock — a finalize that has lost ownership must +/// not write state (it would resurrect an entry another command deleted). +/// `since` lets `ls` flag entries whose owning operation likely crashed. +#[derive(Debug, Clone, PartialEq, Deserialize, Serialize)] +pub struct PendingOp { + pub id: String, + pub since: String, +} + +/// Fresh operation id: unique enough to distinguish two concurrent commands. +pub fn new_op_id() -> String { + format!( + "{}-{}", + std::process::id(), + chrono::Utc::now().timestamp_nanos_opt().unwrap_or_default() + ) +} + #[derive(Debug, Clone, Deserialize, Serialize)] pub struct Session { pub slug: String, @@ -68,6 +89,10 @@ pub struct Session { /// so state.json files written by older versions load unchanged. #[serde(default, skip_serializing_if = "is_active")] pub status: SessionStatus, + /// Present iff status == Pending: identifies the operation that owns this + /// entry. Maintained by `State::mark_pending` / the finalize paths. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub pending_op: Option, pub compose_project: Option, /// Legacy: primary overlay path. Still written for older binaries; /// teardown prefers `compose_overlays`. @@ -135,6 +160,30 @@ impl State { pub fn used_slots(&self) -> Vec { self.sessions.iter().map(|s| s.slot).collect() } + + /// Mark `slug` Pending under a fresh operation id, taking ownership of the + /// entry (including from a previous operation that crashed mid-flight). + /// Returns the session as it was before marking plus the op id the caller + /// must present to `still_owned` when finalizing. + pub fn mark_pending(&mut self, slug: &str) -> Option<(Session, String)> { + let pos = self.sessions.iter().position(|s| s.slug == slug)?; + let original = self.sessions[pos].clone(); + let op_id = new_op_id(); + self.sessions[pos].status = SessionStatus::Pending; + self.sessions[pos].pending_op = Some(PendingOp { + id: op_id.clone(), + since: chrono::Utc::now().to_rfc3339(), + }); + Some((original, op_id)) + } + + /// True while the Pending entry written under `op_id` is still in place — + /// i.e. no other command removed or took over the session in the meantime. + pub fn still_owned(&self, slug: &str, op_id: &str) -> bool { + self.find_session(slug) + .and_then(|s| s.pending_op.as_ref()) + .is_some_and(|op| op.id == op_id) + } } pub struct StateGuard { @@ -269,6 +318,7 @@ mod tests { branch: format!("branch/{}", slug), worktree_path: format!("/tmp/{}", slug), status: SessionStatus::Active, + pending_op: None, compose_project: None, overlay_file: None, overlay_files: vec![], @@ -449,6 +499,7 @@ mod tests { branch: "branch/pm-sess".into(), worktree_path: "/tmp/pm-sess".into(), status: SessionStatus::Active, + pending_op: None, compose_project: None, overlay_file: None, overlay_files: vec![], @@ -485,6 +536,7 @@ mod tests { branch: "branch/nohup-sess".into(), worktree_path: "/tmp/nohup-sess".into(), status: SessionStatus::Active, + pending_op: None, compose_project: None, overlay_file: None, overlay_files: vec![], @@ -518,6 +570,7 @@ mod tests { branch: "branch/compose-sess".into(), worktree_path: "/tmp/wt".into(), status: SessionStatus::Active, + pending_op: None, compose_project: Some("ecluse_compose-sess".into()), overlay_file: Some("/tmp/overlay.yml".into()), overlay_files: vec![], @@ -636,6 +689,63 @@ mod tests { assert_eq!(back.status, SessionStatus::Active); } + // ── mark_pending / still_owned ──────────────────────────────────────────── + + #[test] + fn mark_pending_sets_status_and_op() { + let mut state = State::default(); + state.add_session(make_session("busy", 1)); + let (original, op_id) = state.mark_pending("busy").unwrap(); + assert_eq!(original.status, SessionStatus::Active); + let s = state.find_session("busy").unwrap(); + assert_eq!(s.status, SessionStatus::Pending); + assert_eq!(s.pending_op.as_ref().unwrap().id, op_id); + assert!(state.still_owned("busy", &op_id)); + } + + #[test] + fn mark_pending_missing_session_returns_none() { + let mut state = State::default(); + assert!(state.mark_pending("ghost").is_none()); + } + + // A second mark_pending takes the entry over: the first operation's + // finalize must stand down instead of resurrecting a deleted session. + #[test] + fn second_mark_pending_takes_over_ownership() { + let mut state = State::default(); + state.add_session(make_session("busy", 1)); + let (_, first_op) = state.mark_pending("busy").unwrap(); + let (taken_over, second_op) = state.mark_pending("busy").unwrap(); + assert_eq!(taken_over.status, SessionStatus::Pending); + assert!(!state.still_owned("busy", &first_op)); + assert!(state.still_owned("busy", &second_op)); + } + + #[test] + fn still_owned_false_after_removal() { + let mut state = State::default(); + state.add_session(make_session("busy", 1)); + let (_, op_id) = state.mark_pending("busy").unwrap(); + state.remove_session("busy"); + assert!(!state.still_owned("busy", &op_id)); + } + + #[test] + fn new_op_ids_are_unique() { + assert_ne!(new_op_id(), new_op_id()); + } + + #[test] + fn pending_op_roundtrips_in_state_json() { + let mut state = State::default(); + state.add_session(make_session("busy", 1)); + let (_, op_id) = state.mark_pending("busy").unwrap(); + let json = serde_json::to_string(&state).unwrap(); + let back: State = serde_json::from_str(&json).unwrap(); + assert!(back.still_owned("busy", &op_id)); + } + #[test] fn pending_sessions_still_reserve_slots() { let mut state = State::default(); diff --git a/src/sync.rs b/src/sync.rs index 2ee55f6..ab8fe33 100644 --- a/src/sync.rs +++ b/src/sync.rs @@ -50,6 +50,7 @@ pub fn match_services( processes: &[DiscoveredProcess], ) -> Vec { let mut matches = Vec::new(); + let mut claimed: std::collections::HashSet = std::collections::HashSet::new(); for svc in services { let command = match &svc.command { @@ -62,9 +63,11 @@ pub fn match_services( continue; } + // Skip processes already claimed by another service — two services + // with overlapping command tokens must not both "own" one process. let root = processes .iter() - .find(|p| cmdline_matches(&p.cmdline, &tokens)); + .find(|p| !claimed.contains(&p.pid) && cmdline_matches(&p.cmdline, &tokens)); let (pid, port) = match root { Some(proc) => { @@ -79,6 +82,7 @@ pub fn match_services( None => continue, }; + claimed.insert(pid); matches.push(ServiceMatch { service_name: svc.name.clone(), pid, @@ -89,15 +93,21 @@ pub fn match_services( matches } -/// Detect running docker containers related to `slug` and match them to docker services. +/// Detect the session's running docker services and their published host ports. /// -/// Runs `docker ps` and filters containers whose name contains the slug. -/// For each docker service, returns the first host port bound to the container's -/// `base_port` (or any port the container exposes if base_port doesn't match). +/// Matches by the compose project label docker-compose stamps on every +/// container it starts — exact, never by name substring (slug `feat-a` must +/// not adopt `feat-ab`'s containers). `project` is `_`. /// Best-effort: returns empty if docker is unavailable or nothing matches. -pub fn find_docker_services(services: &[&ServiceConfig], slug: &str) -> Vec<(String, u16)> { +pub fn find_docker_services(services: &[&ServiceConfig], project: &str) -> Vec<(String, u16)> { let output = match docker::docker_cmd() - .args(["ps", "--format", "{{.Names}}\t{{.Ports}}"]) + .args([ + "ps", + "--filter", + &format!("label=com.docker.compose.project={}", project), + "--format", + "{{.Label \"com.docker.compose.service\"}}\t{{.Ports}}", + ]) .output() { Ok(o) if o.status.success() => o, @@ -105,23 +115,17 @@ pub fn find_docker_services(services: &[&ServiceConfig], slug: &str) -> Vec<(Str }; let stdout = String::from_utf8_lossy(&output.stdout); - // Parse lines into (container_name, ports_string) + // Parse lines into (compose_service_name, ports_string) let containers: Vec<(&str, &str)> = stdout .lines() .filter_map(|line| line.split_once('\t')) .collect(); let mut result = Vec::new(); - for svc in services { - // Find a container whose name contains the slug and (optionally) the service name. let container = containers .iter() - .find(|(name, _)| { - name.contains(slug) && (name.contains(&svc.name) || containers.len() == 1) - }) - .or_else(|| containers.iter().find(|(name, _)| name.contains(slug))); - + .find(|(service_label, _)| *service_label == svc.name); if let Some((_, ports_str)) = container { if let Some(port) = parse_host_port(ports_str, svc.base_port) { result.push((svc.name.clone(), port)); @@ -132,6 +136,40 @@ pub fn find_docker_services(services: &[&ServiceConfig], slug: &str) -> Vec<(Str result } +/// True when a native service of `session` is demonstrably running: its pid +/// file points at a live, token-verified process (that owns `expected_port` +/// when one is recorded), or — for tmux-managed sessions without a pid file — +/// the service's tmux window owns the port. +pub fn native_service_running( + root: &Path, + session: &crate::state::Session, + svc_name: &str, + expected_port: Option, +) -> bool { + let pid_file = root + .join(".ecluse") + .join("pids") + .join(&session.slug) + .join(format!("{}.pid", svc_name)); + if let Some((pid, token)) = crate::process::read_pid_file(&pid_file) { + if !crate::process::pid_file_alive(pid, &token) { + return false; + } + return match expected_port { + Some(p) => subtree_owns_port(pid, p), + None => true, + }; + } + // No pid file — legacy tmux sessions only have window names to go by. + if let Some(ref tmux_session) = session.tmux_session { + return match expected_port { + Some(p) => tmux_window_owns_port(tmux_session, svc_name, p), + None => tmux_window_exists(tmux_session, svc_name), + }; + } + false +} + /// Write a PID file for a discovered process at the standard ecluse path. /// /// Path: `/pids//.pid` @@ -144,7 +182,7 @@ pub fn write_pid_file( let pid_dir = ecluse_dir.join("pids").join(slug); std::fs::create_dir_all(&pid_dir)?; let pid_path = pid_dir.join(format!("{}.pid", service)); - std::fs::write(&pid_path, pid.to_string())?; + crate::process::write_pid_file_with_token(&pid_path, pid)?; Ok(pid_path) } @@ -382,6 +420,7 @@ pub(crate) fn make_native_svc(name: &str, base_port: u16, command: &str) -> Serv port_env: vec![], debug_port: None, extra_ports: vec![], + publish_primary: None, host_port: None, } } @@ -397,6 +436,7 @@ pub(crate) fn make_docker_svc(name: &str, base_port: u16) -> ServiceConfig { port_env: vec![], debug_port: None, extra_ports: vec![], + publish_primary: None, host_port: None, } } @@ -625,8 +665,10 @@ mod tests { let dir = TempDir::new().unwrap(); let pid_path = write_pid_file(dir.path(), "my-slug", "api", 12345).unwrap(); assert!(pid_path.ends_with("pids/my-slug/api.pid")); - let content = std::fs::read_to_string(&pid_path).unwrap(); - assert_eq!(content, "12345"); + // First line is the pid; second line is the start token (empty for a + // pid that is not running). + let (pid, _token) = crate::process::read_pid_file(&pid_path).unwrap(); + assert_eq!(pid, 12345); } #[test] diff --git a/src/validate.rs b/src/validate.rs index d2fda2a..5857bba 100644 --- a/src/validate.rs +++ b/src/validate.rs @@ -65,6 +65,16 @@ fn port_in_use_by_docker(port: u16) -> bool { .any(|line| line.contains(&format!(":{port}->"))) } +/// Best-effort: PID of the process listening on `port`, if any (lsof). +pub fn port_listener(port: u16) -> Option { + port_listener_pid(port) +} + +/// Best-effort: whether anything (host process or docker container) holds `port`. +pub fn port_occupied(port: u16) -> bool { + port_listener_pid(port).is_some() || port_in_use_by_docker(port) +} + /// Find a free port for a service on a given slot. /// /// Tries: `nominal`, `nominal + max_slots`, `nominal + 2*max_slots`, … @@ -241,6 +251,15 @@ pub fn validate_config(config: &Config) -> Result> { )); } + if svc.publish_primary.is_none() + && svc.extra_ports.iter().any(|ep| ep.container_port.is_some()) + { + warnings.push(format!( + "service '{}': primary port publication is implicitly suppressed because an extra_port sets container_port; set `publish_primary = false` explicitly (the implicit rule is deprecated)", + svc.name + )); + } + if svc.host_port.is_some() && svc.extra_ports.iter().any(|ep| ep.container_port.is_some()) { warnings.push(format!( "service '{}': host_port and extra_ports[].container_port are both set; \ @@ -377,6 +396,7 @@ mod tests { port_env: vec![], debug_port: None, extra_ports: vec![], + publish_primary: None, host_port: None, } } @@ -391,6 +411,7 @@ mod tests { port_env: vec![], debug_port: None, extra_ports: vec![], + publish_primary: None, host_port: None, } } @@ -612,6 +633,7 @@ mod tests { port_env: vec![], debug_port: None, extra_ports: vec![], + publish_primary: None, host_port: None, }], ); @@ -635,6 +657,7 @@ mod tests { port_env: vec![], debug_port: None, extra_ports: vec![], + publish_primary: None, host_port: None, }], ); @@ -657,6 +680,7 @@ mod tests { port_env: vec![], debug_port: None, extra_ports: vec![], + publish_primary: None, host_port: None, }], ); diff --git a/src/whose_pid.rs b/src/whose_pid.rs index 774ac78..85af7f2 100644 --- a/src/whose_pid.rs +++ b/src/whose_pid.rs @@ -45,8 +45,16 @@ fn match_pid_files(root: &Path, session: &Session, pid: u32) -> Option if path.extension().and_then(|s| s.to_str()) != Some("pid") { continue; } - let content = std::fs::read_to_string(&path).ok()?; - let tracked_pid: u32 = content.trim().parse().ok()?; + let Some((tracked_pid, token)) = crate::process::read_pid_file(&path) else { + continue; + }; + // A live PID whose start token no longer matches was recycled by an + // unrelated process — it must not be attributed to this session. + if crate::process::pid_alive(tracked_pid) + && !crate::process::pid_file_alive(tracked_pid, &token) + { + continue; + } // Match either the tracked PID directly or any descendant of it. if tracked_pid == pid || is_descendant(tracked_pid, pid) { let service = path.file_stem().and_then(|s| s.to_str()).map(String::from); @@ -150,6 +158,7 @@ mod tests { branch: format!("branch/{}", slug), worktree_path: format!("/tmp/{}", slug), status: crate::state::SessionStatus::Active, + pending_op: None, compose_project: None, overlay_file: None, overlay_files: vec![], diff --git a/tests/docker_e2e.rs b/tests/docker_e2e.rs new file mode 100644 index 0000000..766830f --- /dev/null +++ b/tests/docker_e2e.rs @@ -0,0 +1,343 @@ +// Docker-gated end-to-end tests: real `docker compose` against a local daemon. +// Every test no-ops (with a note) when docker is unavailable — they run on CI's +// ubuntu runners and on any dev machine with a daemon, and skip elsewhere +// (macOS runners have no docker). +// +// The image comes from the ECR public mirror: identical to docker.io alpine, +// but not subject to Docker Hub's unauthenticated pull rate limits, which +// shared CI runner IPs regularly exhaust. + +use std::path::PathBuf; +use std::process::Command; +use std::sync::OnceLock; + +const TEST_IMAGE: &str = "public.ecr.aws/docker/library/alpine:3.20"; + +/// Pull the test image exactly once for the whole suite. Tests run in +/// parallel; four concurrent pulls trip the registry's per-IP rate limit on +/// shared CI runners, so the first caller pulls (with retries) and the rest +/// wait on the OnceLock. An unobtainable image is treated like an absent +/// daemon: skip loudly rather than fail on registry weather. +fn test_image_ready() -> bool { + static IMAGE_READY: OnceLock = OnceLock::new(); + *IMAGE_READY.get_or_init(|| { + let present = Command::new("docker") + .args(["image", "inspect", TEST_IMAGE]) + .output() + .map(|o| o.status.success()) + .unwrap_or(false); + if present { + return true; + } + for attempt in 0..3 { + if attempt > 0 { + std::thread::sleep(std::time::Duration::from_secs(10)); + } + let ok = Command::new("docker") + .args(["pull", TEST_IMAGE]) + .output() + .map(|o| o.status.success()) + .unwrap_or(false); + if ok { + return true; + } + } + eprintln!( + "skipping docker e2e: cannot pull {} (registry rate limit?)", + TEST_IMAGE + ); + false + }) +} + +fn ecluse_bin() -> PathBuf { + env!("CARGO_BIN_EXE_ecluse").into() +} + +fn docker_available() -> bool { + let daemon = Command::new("docker") + .arg("info") + .output() + .map(|o| o.status.success()) + .unwrap_or(false); + let compose = Command::new("docker") + .args(["compose", "version"]) + .output() + .map(|o| o.status.success()) + .unwrap_or(false); + daemon && compose +} + +fn setup_repo(dir: &std::path::Path) { + Command::new("git") + .args(["init"]) + .current_dir(dir) + .output() + .unwrap(); + Command::new("git") + .args(["-c", "commit.gpgsign=false"]) + .args(["commit", "--allow-empty", "-m", "init"]) + .current_dir(dir) + .env("GIT_AUTHOR_NAME", "test") + .env("GIT_AUTHOR_EMAIL", "test@test.com") + .env("GIT_COMMITTER_NAME", "test") + .env("GIT_COMMITTER_EMAIL", "test@test.com") + .output() + .unwrap(); +} + +/// HOME is pointed at the repo so the developer's real global config (and its +/// process_manager) never leaks into test behavior. +fn ecluse(dir: &std::path::Path, args: &[&str]) -> std::process::Output { + Command::new(ecluse_bin()) + .args(args) + .current_dir(dir) + .env("HOME", dir) + .output() + .expect("failed to run ecluse") +} + +fn stdout(output: &std::process::Output) -> String { + String::from_utf8_lossy(&output.stdout).to_string() +} + +fn stderr(output: &std::process::Output) -> String { + String::from_utf8_lossy(&output.stderr).to_string() +} + +/// Names of running containers belonging to a compose project. +fn project_containers(project: &str) -> Vec { + let out = Command::new("docker") + .args([ + "ps", + "--filter", + &format!("label=com.docker.compose.project={}", project), + "--format", + "{{.Names}}", + ]) + .output() + .unwrap(); + String::from_utf8_lossy(&out.stdout) + .lines() + .map(|l| l.trim().to_string()) + .filter(|l| !l.is_empty()) + .collect() +} + +/// Remove anything a previous (crashed) run of this project left behind. +fn docker_nuke(project: &str) { + let out = Command::new("docker") + .args([ + "ps", + "-aq", + "--filter", + &format!("label=com.docker.compose.project={}", project), + ]) + .output() + .unwrap(); + for id in String::from_utf8_lossy(&out.stdout).lines() { + let _ = Command::new("docker") + .args(["rm", "-f", id.trim()]) + .output(); + } +} + +fn write_compose(dir: &std::path::Path, rel: &str, services: &[&str]) { + let mut body = String::from("services:\n"); + for svc in services { + body.push_str(&format!( + " {}:\n image: {}\n command: sleep 300\n", + svc, TEST_IMAGE + )); + } + let path = dir.join(rel); + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent).unwrap(); + } + std::fs::write(path, body).unwrap(); +} + +#[test] +fn hybrid_lifecycle_starts_and_stops_containers() { + if !docker_available() || !test_image_ready() { + eprintln!("skipping: docker or test image unavailable"); + return; + } + let repo = tempfile::tempdir().unwrap(); + setup_repo(repo.path()); + docker_nuke("ecluse_e2e-hy"); + + write_compose(repo.path(), "docker-compose.yml", &["db"]); + std::fs::write( + repo.path().join(".ecluse.toml"), + r#"mode = "hybrid" +inherit_env = [] + +[[services]] +name = "db" +run = "docker" +base_port = 5480 +"#, + ) + .unwrap(); + + let up = ecluse(repo.path(), &["up", "e2e-hy"]); + assert!(up.status.success(), "up failed: {}", stderr(&up)); + + let containers = project_containers("ecluse_e2e-hy"); + assert_eq!(containers.len(), 1, "got containers: {:?}", containers); + + // The session records the (compose, overlay) pair and the allocated port. + let state = std::fs::read_to_string(repo.path().join(".ecluse/state.json")).unwrap(); + assert!(state.contains("compose_overlays"), "got: {}", state); + assert!(state.contains("\"db\": 5481"), "got: {}", state); + + // status sees the container as up, via the compose project label. + let status = ecluse(repo.path(), &["status", "e2e-hy", "--json"]); + let parsed: serde_json::Value = serde_json::from_str(&stdout(&status)).unwrap(); + let db = parsed["services"] + .as_array() + .unwrap() + .iter() + .find(|s| s["name"] == "db") + .unwrap() + .clone(); + assert_eq!(db["healthy"], true, "got: {}", db); + + let down = ecluse(repo.path(), &["down", "--delete-worktree", "e2e-hy"]); + assert!(down.status.success(), "down failed: {}", stderr(&down)); + assert!( + project_containers("ecluse_e2e-hy").is_empty(), + "containers must be gone after down" + ); + assert!( + !repo.path().join(".ecluse/overlays/e2e-hy.yml").exists(), + "overlay must be removed" + ); +} + +#[test] +fn failed_post_up_rolls_back_containers() { + if !docker_available() || !test_image_ready() { + eprintln!("skipping: docker or test image unavailable"); + return; + } + let repo = tempfile::tempdir().unwrap(); + setup_repo(repo.path()); + docker_nuke("ecluse_e2e-rb"); + + write_compose(repo.path(), "docker-compose.yml", &["db"]); + std::fs::write( + repo.path().join(".ecluse.toml"), + r#"mode = "hybrid" +inherit_env = [] + +[[services]] +name = "db" +run = "docker" +base_port = 5460 + +[hooks] +post_up = "false" +"#, + ) + .unwrap(); + + let up = ecluse(repo.path(), &["up", "e2e-rb"]); + assert!(!up.status.success(), "up must fail on post_up"); + + assert!( + project_containers("ecluse_e2e-rb").is_empty(), + "rollback must stop the containers it started" + ); + assert!( + !repo.path().join(".ecluse/worktrees/e2e-rb").exists(), + "rollback must remove the fresh worktree" + ); + let ls = ecluse(repo.path(), &["ls"]); + assert!( + stdout(&ls).contains("no active sessions"), + "pending reservation must be cleared: {}", + stdout(&ls) + ); +} + +// The #9 regression scenario, end to end: a hyphenated slug whose suffix +// matches a real subdirectory with its own compose file. Teardown must use +// the recorded pairs, not filename parsing. +#[test] +fn hyphenated_slug_multi_compose_teardown() { + if !docker_available() || !test_image_ready() { + eprintln!("skipping: docker or test image unavailable"); + return; + } + let repo = tempfile::tempdir().unwrap(); + setup_repo(repo.path()); + docker_nuke("ecluse_feat-worker"); + + write_compose(repo.path(), "docker-compose.yml", &["db"]); + write_compose(repo.path(), "worker/docker-compose.yml", &["queue"]); + std::fs::write( + repo.path().join(".ecluse.toml"), + r#"mode = "hybrid" +inherit_env = [] + +[[services]] +name = "db" +run = "docker" +base_port = 5470 + +[[services]] +name = "queue" +run = "docker" +base_port = 5700 +compose = "worker/docker-compose.yml" +"#, + ) + .unwrap(); + + let up = ecluse(repo.path(), &["up", "feat-worker"]); + assert!(up.status.success(), "up failed: {}", stderr(&up)); + assert_eq!( + project_containers("ecluse_feat-worker").len(), + 2, + "both compose groups must be up" + ); + + let down = ecluse(repo.path(), &["down", "--delete-worktree", "feat-worker"]); + assert!(down.status.success(), "down failed: {}", stderr(&down)); + assert!( + project_containers("ecluse_feat-worker").is_empty(), + "both compose groups must be torn down despite the slug/subdir collision" + ); +} + +#[test] +fn container_mode_runs_whole_compose_file() { + if !docker_available() || !test_image_ready() { + eprintln!("skipping: docker or test image unavailable"); + return; + } + let repo = tempfile::tempdir().unwrap(); + setup_repo(repo.path()); + docker_nuke("ecluse_e2e-ct"); + + write_compose(repo.path(), "docker-compose.yml", &["web", "cache"]); + std::fs::write( + repo.path().join(".ecluse.toml"), + "mode = \"container\"\ninherit_env = []\n", + ) + .unwrap(); + + let up = ecluse(repo.path(), &["up", "e2e-ct"]); + assert!(up.status.success(), "up failed: {}", stderr(&up)); + assert_eq!( + project_containers("ecluse_e2e-ct").len(), + 2, + "container mode must bring up every service in the file" + ); + + let down = ecluse(repo.path(), &["down", "--delete-worktree", "e2e-ct"]); + assert!(down.status.success(), "down failed: {}", stderr(&down)); + assert!(project_containers("ecluse_e2e-ct").is_empty()); +} diff --git a/tests/integration.rs b/tests/integration.rs index 8c216cc..4e1cc15 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -24,10 +24,18 @@ fn setup_repo(dir: &std::path::Path) { .unwrap(); } +/// Run ecluse with HOME pointed at the repo dir: tests must never read or +/// write the developer's real ~/.config/ecluse/config.toml (whose +/// process_manager would otherwise leak into spawn behavior). +fn ecluse_cmd(dir: &std::path::Path) -> Command { + let mut cmd = Command::new(ecluse_bin()); + cmd.current_dir(dir).env("HOME", dir); + cmd +} + fn ecluse(dir: &std::path::Path, args: &[&str]) -> std::process::Output { - Command::new(ecluse_bin()) + ecluse_cmd(dir) .args(args) - .current_dir(dir) .output() .expect("failed to run ecluse") } @@ -509,9 +517,8 @@ post_up = "sleep 3" ) .unwrap(); - let mut slow_up = Command::new(ecluse_bin()) + let mut slow_up = ecluse_cmd(repo.path()) .args(["up", "slow-sess"]) - .current_dir(repo.path()) .stdout(std::process::Stdio::null()) .stderr(std::process::Stdio::null()) .spawn() @@ -579,9 +586,8 @@ post_up = "sleep 3" ) .unwrap(); - let mut slow_up = Command::new(ecluse_bin()) + let mut slow_up = ecluse_cmd(repo.path()) .args(["up", "busy-sess"]) - .current_dir(repo.path()) .stdout(std::process::Stdio::null()) .stderr(std::process::Stdio::null()) .spawn() @@ -638,3 +644,99 @@ post_up = "false" stdout(&out) ); } + +// ── ownership tokens: no session resurrection ───────────────────────────────── + +#[test] +fn down_during_slow_up_does_not_resurrect_the_session() { + let repo = tmp_repo(); + ecluse(repo.path(), &["init", "--mode", "host", "--yes"]); + std::fs::write( + repo.path().join(".ecluse.toml"), + r#"mode = "host" +inherit_env = [] + +[hooks] +post_up = "sleep 3" +"#, + ) + .unwrap(); + + let mut slow_up = ecluse_cmd(repo.path()) + .args(["up", "race-sess"]) + .stdout(std::process::Stdio::null()) + .stderr(std::process::Stdio::null()) + .spawn() + .unwrap(); + + // Wait for the pending reservation, then take the session over with down. + let state_path = repo.path().join(".ecluse/state.json"); + let deadline = std::time::Instant::now() + std::time::Duration::from_secs(10); + while !std::fs::read_to_string(&state_path) + .unwrap_or_default() + .contains("race-sess") + { + assert!(std::time::Instant::now() < deadline); + std::thread::sleep(std::time::Duration::from_millis(50)); + } + let down = ecluse(repo.path(), &["down", "--delete-worktree", "race-sess"]); + assert!(down.status.success(), "{}", stderr(&down)); + + // The up must notice the takeover: non-zero exit, no resurrected entry, + // and no leftover worktree. + let status = slow_up.wait().unwrap(); + assert!( + !status.success(), + "up must fail when its session was taken over mid-provisioning" + ); + let ls = ecluse(repo.path(), &["ls"]); + assert!( + stdout(&ls).contains("no active sessions"), + "no resurrection: got {}", + stdout(&ls) + ); + assert!( + !repo.path().join(".ecluse/worktrees/race-sess").exists(), + "worktree must not survive" + ); +} + +#[test] +fn ls_warns_about_stale_pending_sessions() { + let repo = tmp_repo(); + ecluse(repo.path(), &["init", "--mode", "host", "--yes"]); + // Forge a pending entry whose owning operation "crashed" an hour ago. + std::fs::write( + repo.path().join(".ecluse/state.json"), + r#"{ + "version": 1, + "sessions": [{ + "slug": "crashed-sess", + "mode": "host", + "slot": 1, + "branch": "crashed-sess", + "worktree_path": "/tmp/nope", + "status": "pending", + "pending_op": { "id": "1-1", "since": "2026-06-11T08:00:00Z" }, + "compose_project": null, + "overlay_file": null, + "app_port": null, + "started_at": "2026-06-11T08:00:00Z" + }] +}"#, + ) + .unwrap(); + + let out = ecluse(repo.path(), &["ls"]); + assert!(out.status.success(), "{}", stderr(&out)); + assert!( + stdout(&out).contains("crashed-sess (pending)"), + "got: {}", + stdout(&out) + ); + assert!( + stderr(&out).contains("has been pending for") && stderr(&out).contains("ecluse down"), + "got: {}", + stderr(&out) + ); +}