From f41a90d3753617c766f05677a604b2746d9cd36a Mon Sep 17 00:00:00 2001 From: Autumn Wyborny Date: Sun, 26 Apr 2026 17:43:02 -0700 Subject: [PATCH 1/3] Phase B/C/D/E: PR shepherding pipeline + Vibe Check Ship the auto-PR-comment loop end-to-end: - RepoWatchTask polls GitHub, hashes PR state, consults an NDJSON idempotency ledger, emits PrOpened/PrUpdated/PrClosed on an internal EventBus. Per-PR MonitorPrTasks subscribe and re-prompt the agent on each update. Idempotent `devdev repo watch/unwatch`. - `devdev_ask` MCP tool: universal approval seam. Agent calls it with kind={post_review,post_comment,request_token,question}; daemon routes through ApprovalGate and surfaces a host-derived short-lived `gh` token on approval. - `devdev init` Vibe Check scribe writes `.devdev/*.md` preference files; `devdev preferences {list,edit}` for inspection. - Scenario S07 covers the user-surface plumbing. - ROADMAP/cap-doc/CHANGELOG refreshed. Validated Win + WSL: clippy clean, full test suite green except pre-existing WinFSP env failure. --- .devdev/pr-review.md | 27 + .devdev/rust-style.md | 14 + .devdev/vibe.md | 13 + .gitignore | 2 + CHANGELOG.md | 22 + README.md | 19 +- ROADMAP.md | 30 +- crates/devdev-cli/src/daemon_cli.rs | 365 +++++++++++++- crates/devdev-cli/src/lib.rs | 1 + crates/devdev-cli/src/main.rs | 33 +- crates/devdev-cli/src/preferences.rs | 264 ++++++++++ crates/devdev-cli/src/vibe_check_prompt.md | 34 ++ .../tests/acceptance_preferences.rs | 71 +++ crates/devdev-daemon/src/dispatch.rs | 252 +++++++++- crates/devdev-daemon/src/ledger.rs | 302 ++++++++++++ crates/devdev-daemon/src/lib.rs | 3 + crates/devdev-daemon/src/mcp/mod.rs | 4 +- crates/devdev-daemon/src/mcp/provider.rs | 248 +++++++++- crates/devdev-daemon/src/mcp/tools.rs | 90 ++++ crates/devdev-daemon/src/runner.rs | 47 ++ crates/devdev-daemon/src/secrets.rs | 98 ++++ .../devdev-daemon/tests/e2e_pr_shepherding.rs | 464 ++++++++---------- crates/devdev-integrations/src/github.rs | 30 ++ crates/devdev-integrations/src/github_mock.rs | 20 + crates/devdev-integrations/src/lib.rs | 17 + .../catalog/S07-repo-watch-task-lifecycle.md | 57 +++ crates/devdev-scenarios/tests/scenarios.rs | 96 ++++ crates/devdev-tasks/src/agent.rs | 15 + crates/devdev-tasks/src/events.rs | 145 ++++++ crates/devdev-tasks/src/ledger.rs | 50 ++ crates/devdev-tasks/src/lib.rs | 8 + crates/devdev-tasks/src/monitor_pr.rs | 274 +++++------ crates/devdev-tasks/src/repo_watch.rs | 424 ++++++++++++++++ .../tests/acceptance_monitor_pr.rs | 276 +++++------ .../capabilities/22-monitor-pr-task.md | 2 +- .../capabilities/24-e2e-pr-shepherding.md | 2 +- docs/internals/capabilities/25-vibe-check.md | 15 +- .../capabilities/27-idempotency-ledger.md | 2 +- 38 files changed, 3242 insertions(+), 594 deletions(-) create mode 100644 .devdev/pr-review.md create mode 100644 .devdev/rust-style.md create mode 100644 .devdev/vibe.md create mode 100644 crates/devdev-cli/src/preferences.rs create mode 100644 crates/devdev-cli/src/vibe_check_prompt.md create mode 100644 crates/devdev-cli/tests/acceptance_preferences.rs create mode 100644 crates/devdev-daemon/src/ledger.rs create mode 100644 crates/devdev-daemon/src/runner.rs create mode 100644 crates/devdev-daemon/src/secrets.rs create mode 100644 crates/devdev-scenarios/catalog/S07-repo-watch-task-lifecycle.md create mode 100644 crates/devdev-tasks/src/agent.rs create mode 100644 crates/devdev-tasks/src/events.rs create mode 100644 crates/devdev-tasks/src/ledger.rs create mode 100644 crates/devdev-tasks/src/repo_watch.rs diff --git a/.devdev/pr-review.md b/.devdev/pr-review.md new file mode 100644 index 0000000..7c084b5 --- /dev/null +++ b/.devdev/pr-review.md @@ -0,0 +1,27 @@ +# PR review + +What to flag: + +- **Correctness gaps.** The change does the wrong thing, or the + right thing only for the happy path. Concrete repro > vibes. +- **Public API churn.** New pub items, breaking signatures, new + required deps. Worth a sentence even when the change is fine. +- **Unjustified scope creep.** "Drive-by refactors" inside an + otherwise tight PR. Ask whether they belong in their own commit. +- **Test debt.** New behaviour without coverage, or a fix without + a regression test. + +What to skip: + +- Style nits the formatter would catch. `cargo fmt` exists. +- Renaming preferences. The name in the diff is fine. +- Speculative "what if someday" objections. Cross that bridge later. +- Restating what the diff already says. + +Tone: + +- One thread per concern. Don't pile observations into a single + comment. +- Quote the line you mean. Anchor the comment to the code. +- If approving, say so plainly. No ceremony. +- Sign off with the takeaway, not with apologies. diff --git a/.devdev/rust-style.md b/.devdev/rust-style.md new file mode 100644 index 0000000..aec2b34 --- /dev/null +++ b/.devdev/rust-style.md @@ -0,0 +1,14 @@ +# Rust style + +- Edition 2024. Use `let ... else`, `if let && ...`, `?` over + `match err`. +- Errors: `thiserror` for libraries, `anyhow` for binaries. No + ad-hoc `String` errors at module boundaries. +- Tests live next to the code they cover — `#[cfg(test)] mod tests` + for unit tests, `tests/` dir for integration tests. +- No `unwrap()` in non-test code unless the invariant is one line + away. +- Small modules over giant ones. If `mod.rs` exceeds ~400 lines, + it wants splitting. +- Public items get a one-line doc comment. Internal items only get + comments when the *why* isn't obvious from the *what*. diff --git a/.devdev/vibe.md b/.devdev/vibe.md new file mode 100644 index 0000000..85b0af6 --- /dev/null +++ b/.devdev/vibe.md @@ -0,0 +1,13 @@ +# Vibe + +Empiricism, brevity, wit. In that order. + +- **Empirical first.** Don't claim something works until tests pass. + Don't claim a fix without reproducing the bug. "It compiles" is + not "it works." +- **Short over clever.** A two-line fix beats a refactor. If a + three-paragraph comment is necessary, the code probably isn't. +- **Wit, not snark.** A good review sounds like a colleague at a + whiteboard, not a linter. Find the real point. Make it once. + +If you can't decide between "say it" and "shut up," shut up. diff --git a/.gitignore b/.gitignore index 83a3f54..535c34c 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,8 @@ # Scratch / tempfile output /tmp/ /target/tmp/ +run_id.txt +/.devdev-runtime/ # IDE .idea/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 58a0203..3e21009 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,24 @@ release is cut. ## [Unreleased] ### Added +- **PR shepherding pipeline**: `devdev repo watch /` polls + GitHub, hashes PR state, consults an append-only NDJSON idempotency + ledger, and emits `PrOpened`/`PrUpdated`/`PrClosed` events on an + internal `EventBus`. Per-PR `MonitorPrTask`s subscribe and re-prompt + the agent on each update. Idempotent watch / unwatch via + `repo/watch` + `repo/unwatch` IPC. New scenario S07 covers the + user-surface plumbing. +- **`devdev_ask` MCP tool**: universal approval seam exposed to ACP + agents. Takes `kind={post_review,post_comment,request_token,question}` + and routes through `ApprovalGate`. On approval for the + external-action kinds, the response includes a host-derived + short-lived `GH_TOKEN` so the agent can run `gh` itself — no typed + `post_review` adapter path. +- **Vibe Check**: `devdev init` runs a scribe session that writes + `.devdev/*.md` preference files in the user's voice. `devdev + preferences list` discovers preferences across repo, parents, and + `~/.devdev/` with repo-wins precedence; `devdev preferences edit + ` opens `$EDITOR`. - `devdev-workspace`: standalone crate README covering the library entry points, minimal example, and platform matrix. - `ROADMAP.md`: Today / Next / Aspirational breakdown. @@ -15,6 +33,10 @@ release is cut. - `rust-toolchain.toml`: pinned compiler. - MIT `LICENSE`. +### Removed +- `placeholder_review_fn` agent-callback seam — superseded by the + event-driven `MonitorPrTask` + `devdev_ask` flow described above. + ### Changed - Root `README.md` rewritten for the two-audience split (workspace-curious vs DevDev-hosting). Explicit non-claim on diff --git a/README.md b/README.md index 4f1bed6..a518a5b 100644 --- a/README.md +++ b/README.md @@ -30,9 +30,12 @@ in [ROADMAP.md](ROADMAP.md). (`cargo`, `git`, `rg`, language servers) can operate in. Start at [`crates/devdev-workspace/README.md`](crates/devdev-workspace/README.md). - **DevDev-hosting.** You want to run the full agent product locally - (PR monitoring, preferences-as-Markdown, approval gates). Today - you're early — several loops are still behind placeholders, tracked - in [ROADMAP.md](ROADMAP.md). + (PR shepherding, preferences-as-Markdown, approval gates). The + end-to-end loop — `devdev init` → `devdev repo watch` → agent + reviews PRs as they appear — works against the mock GitHub + adapter today; live `gh` posting is gated behind `devdev_ask` + approvals. See [ROADMAP.md](ROADMAP.md) for what's shipped vs. + in flight. ## Quickstart: the workspace library @@ -71,13 +74,19 @@ From source: ``` cargo install --git https://github.com/goldenwitch/devdev devdev-cli -devdev up # starts the daemon -devdev down # stops it +devdev up # starts the daemon +devdev init # interview yourself; writes .devdev/*.md +devdev repo watch owner/name # poll GitHub for PR events +devdev preferences list # show discovered .devdev/*.md +devdev down # stops the daemon ``` DevDev expects a logged-in [GitHub Copilot CLI](https://github.com/github/copilot-cli) (`copilot --acp` must work) and, for GitHub adapters, either a `gh auth login` session or a `GH_TOKEN` / `GITHUB_TOKEN` env var. +When the agent wants to post a review or comment it calls the +`devdev_ask` MCP tool; the daemon prompts you for approval and, on +“yes”, hands the agent a short-lived `gh` token to act with. ## Platform matrix diff --git a/ROADMAP.md b/ROADMAP.md index 9cd0b7d..758afc0 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -37,6 +37,28 @@ Works end-to-end and is exercised by tests on every push. - Scenario harness: user-surface scenarios drive only the `devdev` binary + IPC + checkpoints + documented env vars. +**Repo watch → event pipeline (cap 26 / cap 27)** + +- `devdev repo watch /` polls GitHub, hashes PR state, + consults an append-only NDJSON idempotency ledger, and emits + `PrOpened` / `PrUpdated` / `PrClosed` events on an internal + `EventBus`. Per-PR `MonitorPrTask`s subscribe and re-prompt the + agent on update. +- `devdev_ask` MCP tool: the universal approval seam. Agent calls it + with `kind={post_review,post_comment,request_token,question}`; + daemon routes through `ApprovalGate` and — on approval, for the + external-action kinds — surfaces a host-derived short-lived + `GH_TOKEN` so the agent can run `gh` itself. No typed adapter path. + +**Vibe Check (cap 25)** + +- `devdev init` runs a scribe session that writes `.devdev/*.md` + preference files in the user's voice (one topic per file, append + `## Revision ` on revisits). +- `devdev preferences list` discovers preferences across repo / + parents / `~/.devdev/` with repo-wins precedence; `devdev + preferences edit` opens `$EDITOR`. + **Scenario catalog status** | ID | Status | @@ -51,16 +73,14 @@ Works end-to-end and is exercised by tests on every push. What we're actively working on to close the DevDev-hosting loop. -- **Wire `placeholder_review_fn`.** The agent-callback seam in - `crates/devdev-cli/src/daemon_cli.rs` is still a placeholder. Real - target: `MonitorPrTask` driving the same seam with real PR state. - **Scout routing.** Pick the right model/agent per task class instead of one-size-fits-all. -- **Idempotency ledger.** Durable record of work already done so an - agent restart doesn't re-do the same thing. - **Full ACP session backend (S03/S04).** Enough plumbing that the agent's tool calls and mid-session events are observable from the scenario surface. +- **End-to-end PR shepherding scenario (S07).** Drives `devdev init` + → `devdev repo watch` → mock GH adapter → asserts the agent gets + re-prompted with preference context on each PR update. ### Explicitly not on this list diff --git a/crates/devdev-cli/src/daemon_cli.rs b/crates/devdev-cli/src/daemon_cli.rs index 2abd1a2..394f03e 100644 --- a/crates/devdev-cli/src/daemon_cli.rs +++ b/crates/devdev-cli/src/daemon_cli.rs @@ -14,11 +14,10 @@ //! * The [`AcpSessionBackend`](crate::acp_backend::AcpSessionBackend) //! is live — `devdev send` spawns `copilot --acp --allow-all-tools`, //! multiplexes sessions, and surfaces agent replies (proven by the -//! gated `live_mcp` tests). The remaining stub is -//! [`placeholder_review_fn`]: `MonitorPrTask`'s review callback -//! returns an empty string, so `task/add monitor_pr` succeeds but -//! posts no review text. Wiring it to a per-task router session is -//! cap 22's work. +//! gated `live_mcp` tests). MonitorPr tasks now drive the agent +//! via `devdev_daemon::runner::RouterRunner`, which holds one +//! session per task and forwards prompts produced by `EventBus` +//! triggers (PR opened/updated/closed). //! * `ApprovalPolicy::AutoApprove` is hard-wired; approval-gate UX //! arrives with the TUI work. @@ -30,14 +29,15 @@ use anyhow::{Context, Result, anyhow}; use clap::Parser; use tokio::sync::{Mutex, watch}; -use devdev_daemon::dispatch::DispatchContext; +use devdev_daemon::dispatch::{DispatchContext, spawn_event_coordinator}; use devdev_daemon::ipc::{IpcClient, IpcServer, read_port}; +use devdev_daemon::ledger::NdjsonLedger; use devdev_daemon::mcp::{DaemonToolProvider, McpServer}; use devdev_daemon::router::SessionRouter; use devdev_daemon::{Daemon, DaemonConfig, DaemonError, server}; use devdev_integrations::{GitHubAdapter, LiveGitHubAdapter, MockGitHubAdapter}; use devdev_tasks::approval::{ApprovalPolicy, approval_channel}; -use devdev_tasks::monitor_pr::ReviewFn; +use devdev_tasks::events::EventBus; use devdev_tasks::registry::TaskRegistry; use crate::acp_backend::AcpSessionBackend; @@ -107,6 +107,69 @@ pub struct StatusArgs { pub json: bool, } +#[derive(Parser, Debug, Clone)] +pub struct RepoWatchArgs { + #[arg(long)] + pub data_dir: Option, + + /// Override poll interval (default: 60 seconds). + #[arg(long)] + pub poll_interval_secs: Option, + + /// Repository in `owner/repo` form. + pub repo: String, +} + +#[derive(Parser, Debug, Clone)] +pub struct RepoUnwatchArgs { + #[arg(long)] + pub data_dir: Option, + + /// Repository in `owner/repo` form. + pub repo: String, +} + +#[derive(Parser, Debug, Clone)] +pub struct InitArgs { + #[arg(long)] + pub data_dir: Option, + + /// Working directory whose `.devdev/` will receive preference + /// files. Defaults to the current directory. + #[arg(long)] + pub workdir: Option, + + /// End the conversation after one round-trip (used by tests). + #[arg(long, hide = true)] + pub one_shot: bool, +} + +#[derive(Parser, Debug, Clone)] +pub struct PreferencesListArgs { + /// Working directory to scan for `.devdev/*.md`. Defaults to CWD. + #[arg(long)] + pub workdir: Option, + + /// Skip the home (`~/.devdev/`) layer. + #[arg(long)] + pub no_home: bool, + + /// Emit JSON (`[{path, title, layer}]`) instead of a human table. + #[arg(long)] + pub json: bool, +} + +#[derive(Parser, Debug, Clone)] +pub struct PreferencesEditArgs { + /// Title (matched case-insensitively against `# Title` lines) or + /// file stem. + pub name: String, + + /// Working directory to scan. Defaults to CWD. + #[arg(long)] + pub workdir: Option, +} + // ─── Helpers ─────────────────────────────────────────────────── fn resolve_data_dir(explicit: Option) -> PathBuf { @@ -133,12 +196,9 @@ fn select_github_adapter(flag: Option<&str>) -> Arc { } } -/// Placeholder review callback. TODO: swap for a real router-backed -/// review once `AcpSessionBackend` is wired. -fn placeholder_review_fn() -> ReviewFn { - Arc::new(|_prompt: String| Box::pin(async move { Ok(String::new()) })) -} - +/// Helper alias used while the agent integration evolved; the actual +/// agent seam is now [`devdev_daemon::runner::RouterRunner`], built +/// per-task in [`DispatchContext::handle_task_add`]. async fn connect_ipc(data_dir: &Path) -> Result { let port = read_port(data_dir) .with_context(|| format!("reading port file in {}", data_dir.display()))? @@ -199,7 +259,33 @@ pub async fn run_up(args: UpArgs) -> Result<()> { // daemon's `Arc>` so an agent-driven `devdev_fs_write` // and a user-driven `fs/read` IPC call observe the same bytes. let fs = Arc::clone(&daemon.fs); - let mcp_provider = Arc::new(DaemonToolProvider::new(Arc::clone(&tasks), Arc::clone(&fs))); + + // Build the shared approval channel + secrets slot up-front so + // both the MCP provider (sender side, via `devdev_ask`) and the + // dispatch IPC (receiver side, via `approval_response`) can hold + // halves. + let policy = ApprovalPolicy::AutoApprove; + let (gate, handle) = approval_channel(policy, APPROVAL_TIMEOUT); + let approval_gate = Arc::new(Mutex::new(gate)); + let approval_handle = Arc::new(Mutex::new(handle)); + let agent_secrets = Arc::new(Mutex::new(devdev_daemon::secrets::AgentSecrets::default())); + + // Best-effort `gh auth token` sample — non-fatal if absent. + match devdev_daemon::secrets::try_read_gh_token().await { + Ok(Some(token)) => { + agent_secrets.lock().await.set_gh_token(Some(token)); + eprintln!("DevDev: gh token sampled (devdev_ask will hand it out on approval)"); + } + Ok(None) => { + eprintln!("DevDev: no gh token (run `gh auth login` to enable PR posting)"); + } + Err(e) => eprintln!("DevDev: gh auth token failed: {e}"), + } + + let mcp_provider = Arc::new( + DaemonToolProvider::new(Arc::clone(&tasks), Arc::clone(&fs)) + .with_ask(Arc::clone(&approval_gate), Arc::clone(&agent_secrets)), + ); let mcp_server = McpServer::start(mcp_provider) .await .context("starting local MCP server")?; @@ -213,10 +299,18 @@ pub async fn run_up(args: UpArgs) -> Result<()> { )); let router = Arc::new(SessionRouter::new(backend)); let github = select_github_adapter(args.github.as_deref()); - let policy = ApprovalPolicy::AutoApprove; - let (_gate, handle) = approval_channel(policy, APPROVAL_TIMEOUT); - let approval_handle = Arc::new(Mutex::new(handle)); - let review_fn = placeholder_review_fn(); + let event_bus = EventBus::new(); + + let ledger_path = data_dir.join("ledger.ndjson"); + let ledger = match NdjsonLedger::open(&ledger_path) { + Ok(l) => Arc::new(l) as Arc, + Err(e) => { + return Err(anyhow!( + "failed to open ledger at {}: {e}", + ledger_path.display() + )); + } + }; let (shutdown_tx, shutdown_rx) = watch::channel(false); @@ -224,13 +318,20 @@ pub async fn run_up(args: UpArgs) -> Result<()> { router, tasks, github, + approval_gate, approval_handle, - review_fn, + event_bus, + ledger, policy, + agent_secrets, shutdown_tx.clone(), fs, )); + // Background coordinator: any PR event published on the bus + // becomes a `MonitorPrTask` (created on first observation). + let coordinator = spawn_event_coordinator(Arc::clone(&ctx), shutdown_tx.subscribe()); + let server_task = tokio::spawn(server::run(Arc::clone(&ctx), server, shutdown_rx)); eprintln!( @@ -251,6 +352,7 @@ pub async fn run_up(args: UpArgs) -> Result<()> { // Let the accept loop observe the flag and exit. let _ = server_task.await; + let _ = coordinator.await; // Stop the MCP server so its port is released before we claim // shutdown is done. Shutdown is graceful — no pending requests. @@ -343,3 +445,228 @@ pub async fn run_status(args: StatusArgs) -> Result<()> { println!("sessions={sessions}"); Ok(()) } + +// ─── repo watch / unwatch ────────────────────────────────────── + +fn split_owner_repo(slug: &str) -> Result<(&str, &str)> { + slug.split_once('/') + .filter(|(o, r)| !o.is_empty() && !r.is_empty() && !r.contains('/')) + .ok_or_else(|| anyhow!("expected `owner/repo`, got `{slug}`")) +} + +pub async fn run_repo_watch(args: RepoWatchArgs) -> Result<()> { + let (owner, repo) = split_owner_repo(&args.repo)?; + let data_dir = resolve_data_dir(args.data_dir); + let mut client = connect_ipc(&data_dir).await?; + let mut params = serde_json::json!({ "owner": owner, "repo": repo }); + if let Some(secs) = args.poll_interval_secs { + params["poll_interval_secs"] = secs.into(); + } + let resp = client + .request("repo/watch", params) + .await + .context("sending repo/watch IPC request")?; + if let Some(err) = resp.error { + return Err(anyhow!("repo/watch failed: {}", err.message)); + } + let result = resp.result.unwrap_or_default(); + let task_id = result["task_id"].as_str().unwrap_or("?"); + let already = result["already_watching"].as_bool().unwrap_or(false); + if already { + println!("already watching {owner}/{repo} as {task_id}"); + } else { + println!("watching {owner}/{repo} as {task_id}"); + } + Ok(()) +} + +pub async fn run_repo_unwatch(args: RepoUnwatchArgs) -> Result<()> { + let (owner, repo) = split_owner_repo(&args.repo)?; + let data_dir = resolve_data_dir(args.data_dir); + let mut client = connect_ipc(&data_dir).await?; + let resp = client + .request( + "repo/unwatch", + serde_json::json!({ "owner": owner, "repo": repo }), + ) + .await + .context("sending repo/unwatch IPC request")?; + if let Some(err) = resp.error { + return Err(anyhow!("repo/unwatch failed: {}", err.message)); + } + println!("unwatched {owner}/{repo}"); + Ok(()) +} + +// ─── init (Vibe Check scribe) ────────────────────────────────── + +const VIBE_CHECK_SYSTEM_PROMPT: &str = include_str!("vibe_check_prompt.md"); +/// Cross-platform home-directory lookup. We deliberately avoid the +/// `dirs` crate to keep `devdev-cli` dependencies lean; the env +/// variables we read are the same ones `dirs` consults first. +fn home_dir() -> Option { + if let Ok(h) = std::env::var("HOME") + && !h.is_empty() + { + return Some(PathBuf::from(h)); + } + if let Ok(p) = std::env::var("USERPROFILE") + && !p.is_empty() + { + return Some(PathBuf::from(p)); + } + None +} +pub async fn run_init(args: InitArgs) -> Result<()> { + let data_dir = resolve_data_dir(args.data_dir); + let workdir = args + .workdir + .clone() + .unwrap_or(std::env::current_dir().context("resolving workdir")?); + + let mut client = connect_ipc(&data_dir).await?; + + // Seed the session with the scribe persona + current workdir hint. + let preamble = format!( + "{VIBE_CHECK_SYSTEM_PROMPT}\n\nWorking directory: {}\n", + workdir.display() + ); + eprintln!("DevDev Vibe Check — interviewing you for preferences. Type a blank line to finish."); + eprintln!( + "(Files will be written under {})\n", + workdir.join(".devdev").display() + ); + + let opening = send_one( + &mut client, + format!("{preamble}\n\nGreet the user and ask the first question."), + ) + .await?; + println!("scribe> {opening}\n"); + + if args.one_shot { + return Ok(()); + } + + let stdin = std::io::stdin(); + loop { + eprint!("you> "); + use std::io::Write; + std::io::stderr().flush().ok(); + let mut line = String::new(); + if stdin.read_line(&mut line)? == 0 { + break; + } + let line = line.trim(); + if line.is_empty() { + break; + } + let reply = send_one(&mut client, line.to_string()).await?; + println!("scribe> {reply}\n"); + } + + eprintln!("\nVibe Check complete. Inspect with `devdev preferences list`."); + Ok(()) +} + +async fn send_one(client: &mut devdev_daemon::ipc::IpcClient, text: String) -> Result { + let resp = client + .request("send", serde_json::json!({ "text": text })) + .await + .context("sending IPC send request")?; + if let Some(err) = resp.error { + return Err(anyhow!("send failed: {}", err.message)); + } + let result = resp + .result + .ok_or_else(|| anyhow!("daemon returned neither result nor error"))?; + Ok(result + .get("response") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string()) +} + +// ─── preferences list / edit ─────────────────────────────────── + +pub fn run_preferences_list(args: PreferencesListArgs) -> Result<()> { + let workdir = args + .workdir + .clone() + .unwrap_or(std::env::current_dir().context("resolving workdir")?); + let home = if args.no_home { None } else { home_dir() }; + let files = crate::preferences::discover(&workdir, home.as_deref()) + .context("discovering preferences")?; + + if args.json { + let json: Vec<_> = files + .iter() + .map(|f| { + serde_json::json!({ + "path": f.path, + "title": f.title, + "layer": format!("{:?}", f.layer), + }) + }) + .collect(); + println!("{}", serde_json::to_string_pretty(&json)?); + return Ok(()); + } + + if files.is_empty() { + println!("(no preferences found — run `devdev init` to create some)"); + return Ok(()); + } + for f in &files { + println!("[{:?}] {} — {}", f.layer, f.title, f.path.display()); + } + Ok(()) +} + +pub fn run_preferences_edit(args: PreferencesEditArgs) -> Result<()> { + let workdir = args + .workdir + .clone() + .unwrap_or(std::env::current_dir().context("resolving workdir")?); + let files = crate::preferences::discover(&workdir, home_dir().as_deref()) + .context("discovering preferences")?; + let needle = args.name.to_lowercase(); + let hit = files.iter().find(|f| { + f.title.to_lowercase() == needle + || f.path + .file_stem() + .and_then(|s| s.to_str()) + .map(|s| s.to_lowercase() == needle) + .unwrap_or(false) + }); + let target = match hit { + Some(f) => f.path.clone(), + None => { + // Default to a new file under repo-local .devdev/. + let safe: String = args + .name + .chars() + .map(|c| if c.is_ascii_alphanumeric() { c } else { '-' }) + .collect(); + workdir.join(".devdev").join(format!("{safe}.md")) + } + }; + if let Some(p) = target.parent() { + std::fs::create_dir_all(p).ok(); + } + let editor = std::env::var("EDITOR").unwrap_or_else(|_| { + if cfg!(windows) { + "notepad".into() + } else { + "nano".into() + } + }); + let status = std::process::Command::new(&editor) + .arg(&target) + .status() + .with_context(|| format!("launching $EDITOR ({editor})"))?; + if !status.success() { + return Err(anyhow!("{editor} exited with {}", status)); + } + Ok(()) +} diff --git a/crates/devdev-cli/src/lib.rs b/crates/devdev-cli/src/lib.rs index aa64345..db88e76 100644 --- a/crates/devdev-cli/src/lib.rs +++ b/crates/devdev-cli/src/lib.rs @@ -2,4 +2,5 @@ pub mod acp_backend; pub mod daemon_cli; +pub mod preferences; pub mod realpath_shim; diff --git a/crates/devdev-cli/src/main.rs b/crates/devdev-cli/src/main.rs index f211aa7..aa12a16 100644 --- a/crates/devdev-cli/src/main.rs +++ b/crates/devdev-cli/src/main.rs @@ -5,7 +5,9 @@ use std::process::ExitCode; use clap::{Parser, Subcommand}; use devdev_cli::daemon_cli::{ - DownArgs, SendArgs, StatusArgs, UpArgs, run_down, run_send, run_status, run_up, + DownArgs, InitArgs, PreferencesEditArgs, PreferencesListArgs, RepoUnwatchArgs, RepoWatchArgs, + SendArgs, StatusArgs, UpArgs, run_down, run_init, run_preferences_edit, run_preferences_list, + run_repo_unwatch, run_repo_watch, run_send, run_status, run_up, }; #[derive(Parser, Debug)] @@ -25,6 +27,30 @@ enum Command { Send(SendArgs), /// Print daemon status. Status(StatusArgs), + /// Run the Vibe Check scribe to record `.devdev/*.md` preferences. + Init(InitArgs), + /// Repository watch operations. + #[command(subcommand)] + Repo(RepoCommand), + /// Inspect or edit `.devdev/*.md` preference files. + #[command(subcommand)] + Preferences(PreferencesCommand), +} + +#[derive(Subcommand, Debug)] +enum RepoCommand { + /// Start polling a `/` for PR events. + Watch(RepoWatchArgs), + /// Stop polling a `/`. + Unwatch(RepoUnwatchArgs), +} + +#[derive(Subcommand, Debug)] +enum PreferencesCommand { + /// List discovered preference files. + List(PreferencesListArgs), + /// Open `$EDITOR` on a preference file (creates one if absent). + Edit(PreferencesEditArgs), } fn main() -> ExitCode { @@ -50,6 +76,11 @@ fn main() -> ExitCode { Command::Down(args) => rt.block_on(run_down(args)), Command::Send(args) => rt.block_on(run_send(args)), Command::Status(args) => rt.block_on(run_status(args)), + Command::Init(args) => rt.block_on(run_init(args)), + Command::Repo(RepoCommand::Watch(args)) => rt.block_on(run_repo_watch(args)), + Command::Repo(RepoCommand::Unwatch(args)) => rt.block_on(run_repo_unwatch(args)), + Command::Preferences(PreferencesCommand::List(args)) => run_preferences_list(args), + Command::Preferences(PreferencesCommand::Edit(args)) => run_preferences_edit(args), }; match result { Ok(()) => ExitCode::SUCCESS, diff --git a/crates/devdev-cli/src/preferences.rs b/crates/devdev-cli/src/preferences.rs new file mode 100644 index 0000000..e7fe0b6 --- /dev/null +++ b/crates/devdev-cli/src/preferences.rs @@ -0,0 +1,264 @@ +//! `.devdev/` preference file discovery + loader. +//! +//! Walks from `start_dir` upward looking for `.devdev/*.md` files, +//! then falls back to `~/.devdev/*.md`. Repo-local files always win +//! when titles collide (later layers shadow earlier ones). +//! +//! Used by: +//! - `MonitorPrTask` (Phase B2) — preference *paths* are injected into +//! the per-PR session prompt so the agent reads them on demand. +//! - The forthcoming `devdev_preferences_list` MCP tool — surfaces +//! the loaded files so the agent can decide which to read. + +use std::path::{Path, PathBuf}; + +/// One discovered preference file. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct PreferenceFile { + /// Absolute path to the `.md` file. + pub path: PathBuf, + /// First-line `# Title` if present, else the file stem. + pub title: String, + /// Full UTF-8 body (lossy on bad bytes — preference files are + /// human-written prose; we never refuse to load). + pub body: String, + /// Discovery layer (`Repo` < `Parent` < `Home`). Earlier layers + /// win; the loader returns files in priority order. + pub layer: PreferenceLayer, +} + +/// Where a preference file came from. Kept as an enum (not a path +/// depth) so we can adjust precedence without re-walking. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum PreferenceLayer { + /// `/.devdev/`. Highest priority. + Repo, + /// Any ancestor's `.devdev/` between repo and home. + Parent, + /// `~/.devdev/`. Lowest priority. + Home, +} + +/// Errors the loader surfaces. We deliberately return `Ok(vec![])` +/// rather than erroring on a missing `.devdev/` directory; the only +/// failure modes are filesystem I/O on directories that *do* exist. +#[derive(thiserror::Error, Debug)] +pub enum PreferenceError { + #[error("read_dir {path}: {source}")] + ReadDir { + path: PathBuf, + #[source] + source: std::io::Error, + }, + #[error("read_file {path}: {source}")] + ReadFile { + path: PathBuf, + #[source] + source: std::io::Error, + }, +} + +/// Walk from `start_dir` up to (but not including) `home_dir` and +/// collect every `.devdev/*.md` file in priority order. Adds +/// `~/.devdev/*.md` last. +/// +/// `home_dir = None` skips the home layer (useful for tests). +pub fn discover( + start_dir: &Path, + home_dir: Option<&Path>, +) -> Result, PreferenceError> { + let mut out = Vec::new(); + let mut seen_titles: std::collections::HashSet = Default::default(); + + let mut layer = PreferenceLayer::Repo; + let mut cursor = Some(start_dir.to_path_buf()); + while let Some(dir) = cursor { + let prefs_dir = dir.join(".devdev"); + if prefs_dir.is_dir() { + for f in load_dir(&prefs_dir, layer)? { + if seen_titles.insert(f.title.clone()) { + out.push(f); + } + } + } + // After the start dir's own `.devdev/`, every ancestor we + // touch is a parent. + layer = PreferenceLayer::Parent; + cursor = dir.parent().map(Path::to_path_buf); + // Stop the moment we'd cross into the home directory; we'll + // load it explicitly below so it lands in the `Home` layer. + if let (Some(home), Some(next)) = (home_dir, cursor.as_deref()) + && next == home + { + break; + } + } + + if let Some(home) = home_dir { + let prefs_dir = home.join(".devdev"); + if prefs_dir.is_dir() { + for f in load_dir(&prefs_dir, PreferenceLayer::Home)? { + if seen_titles.insert(f.title.clone()) { + out.push(f); + } + } + } + } + + Ok(out) +} + +fn load_dir(dir: &Path, layer: PreferenceLayer) -> Result, PreferenceError> { + let mut entries: Vec = std::fs::read_dir(dir) + .map_err(|source| PreferenceError::ReadDir { + path: dir.to_path_buf(), + source, + })? + .filter_map(|r| r.ok().map(|e| e.path())) + .filter(|p| p.extension().and_then(|s| s.to_str()) == Some("md")) + .collect(); + // Stable order so the agent sees the same files in the same order + // every run. Lexicographic on the file name is fine. + entries.sort(); + + let mut out = Vec::with_capacity(entries.len()); + for path in entries { + let body = std::fs::read_to_string(&path).map_err(|source| PreferenceError::ReadFile { + path: path.clone(), + source, + })?; + let title = extract_title(&body).unwrap_or_else(|| { + path.file_stem() + .and_then(|s| s.to_str()) + .unwrap_or("preference") + .to_string() + }); + out.push(PreferenceFile { + path, + title, + body, + layer, + }); + } + Ok(out) +} + +/// Returns the text after the first leading `# ` line, or `None`. +fn extract_title(body: &str) -> Option { + body.lines() + .find(|l| l.starts_with("# ")) + .map(|l| l[2..].trim().to_string()) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + use tempfile::tempdir; + + fn write(path: &Path, contents: &str) { + if let Some(p) = path.parent() { + fs::create_dir_all(p).unwrap(); + } + fs::write(path, contents).unwrap(); + } + + #[test] + fn missing_dir_is_not_error() { + let tmp = tempdir().unwrap(); + let out = discover(tmp.path(), None).expect("ok"); + assert!(out.is_empty()); + } + + #[test] + fn extracts_title_from_h1() { + let tmp = tempdir().unwrap(); + write( + &tmp.path().join(".devdev").join("style.md"), + "# Code Style\n\nUse Rustfmt.\n", + ); + let files = discover(tmp.path(), None).unwrap(); + assert_eq!(files.len(), 1); + assert_eq!(files[0].title, "Code Style"); + assert_eq!(files[0].layer, PreferenceLayer::Repo); + } + + #[test] + fn falls_back_to_stem_when_no_h1() { + let tmp = tempdir().unwrap(); + write( + &tmp.path().join(".devdev").join("vibes.md"), + "Just prose.\n", + ); + let files = discover(tmp.path(), None).unwrap(); + assert_eq!(files[0].title, "vibes"); + } + + #[test] + fn home_layer_loaded_with_lower_priority() { + let tmp = tempdir().unwrap(); + let home = tempdir().unwrap(); + write( + &tmp.path().join(".devdev").join("a.md"), + "# Shared\nrepo wins\n", + ); + write( + &home.path().join(".devdev").join("b.md"), + "# Home Only\nhome\n", + ); + // Title collision: both define "Shared"; repo must win. + write( + &home.path().join(".devdev").join("a.md"), + "# Shared\nhome version\n", + ); + + let files = discover(tmp.path(), Some(home.path())).unwrap(); + // Two unique titles; repo's "Shared" first, home's "Home Only" second. + assert_eq!(files.len(), 2); + assert_eq!(files[0].title, "Shared"); + assert_eq!(files[0].layer, PreferenceLayer::Repo); + assert!(files[0].body.contains("repo wins")); + assert_eq!(files[1].title, "Home Only"); + assert_eq!(files[1].layer, PreferenceLayer::Home); + } + + #[test] + fn parent_layer_loaded_between_repo_and_home() { + let root = tempdir().unwrap(); + let parent = root.path().join("workspaces"); + let repo = parent.join("project"); + fs::create_dir_all(&repo).unwrap(); + write(&repo.join(".devdev").join("a.md"), "# Repo Pref\nlocal\n"); + write( + &parent.join(".devdev").join("b.md"), + "# Parent Pref\nshared\n", + ); + + let files = discover(&repo, None).unwrap(); + assert_eq!(files.len(), 2); + assert_eq!(files[0].layer, PreferenceLayer::Repo); + assert_eq!(files[1].layer, PreferenceLayer::Parent); + assert_eq!(files[1].title, "Parent Pref"); + } + + #[test] + fn non_md_files_are_ignored() { + let tmp = tempdir().unwrap(); + write(&tmp.path().join(".devdev").join("note.txt"), "no"); + write(&tmp.path().join(".devdev").join("a.md"), "# A\n"); + let files = discover(tmp.path(), None).unwrap(); + assert_eq!(files.len(), 1); + assert_eq!(files[0].title, "A"); + } + + #[test] + fn sorted_lexicographically_within_layer() { + let tmp = tempdir().unwrap(); + write(&tmp.path().join(".devdev").join("z.md"), "# Z\n"); + write(&tmp.path().join(".devdev").join("a.md"), "# A\n"); + write(&tmp.path().join(".devdev").join("m.md"), "# M\n"); + let files = discover(tmp.path(), None).unwrap(); + let titles: Vec<_> = files.iter().map(|f| f.title.as_str()).collect(); + assert_eq!(titles, vec!["A", "M", "Z"]); + } +} diff --git a/crates/devdev-cli/src/vibe_check_prompt.md b/crates/devdev-cli/src/vibe_check_prompt.md new file mode 100644 index 0000000..67ba9e6 --- /dev/null +++ b/crates/devdev-cli/src/vibe_check_prompt.md @@ -0,0 +1,34 @@ +You are the DevDev Vibe Check scribe. + +Your job is to interview the user about their coding and PR-review +preferences, and turn the conversation into Markdown files under +`.devdev/`. The user is the source of truth; you are the patient, +curious notetaker. + +Process + +1. Ask one focused question at a time. Topics worth covering: coding + style, review tone, what they care about (correctness, perf, + readability, security), what they ignore, project-specific + conventions, and pet peeves. +2. After each answer, decide whether to write a new file or append a + `## Revision ` section to an existing one. Use + `devdev_fs_write` (MCP tool) with absolute VFS paths under + `/.devdev/` (e.g. `/.devdev/style.md`). Keep titles short — one or + two words — and prose conversational. +3. When the user signals they're done (blank line / "done" / "thanks"), + summarize what you wrote and stop. + +Voice + +Brief and warm. The user values empiricism, brevity, and wit — match +that. Avoid lecturing; ask, don't tell. When you do write a file, +quote a phrase or two from the user verbatim so the file feels like +their voice, not yours. + +Constraints + +- Never overwrite a file silently. Append revision sections when the + user revises a topic. +- Do not invent preferences. If the user is vague, ask a follow-up. +- One file per topic. Do not bundle unrelated topics together. diff --git a/crates/devdev-cli/tests/acceptance_preferences.rs b/crates/devdev-cli/tests/acceptance_preferences.rs new file mode 100644 index 0000000..bba8b84 --- /dev/null +++ b/crates/devdev-cli/tests/acceptance_preferences.rs @@ -0,0 +1,71 @@ +//! Integration tests for `.devdev/` preference loader. + +use std::fs; +use std::path::Path; + +use devdev_cli::preferences::{PreferenceLayer, discover}; + +fn write(path: &Path, contents: &str) { + if let Some(p) = path.parent() { + fs::create_dir_all(p).unwrap(); + } + fs::write(path, contents).unwrap(); +} + +#[test] +fn discovers_repo_only_when_no_home() { + let tmp = tempfile::tempdir().unwrap(); + write( + &tmp.path().join(".devdev").join("style.md"), + "# Style\nbe terse", + ); + let files = discover(tmp.path(), None).unwrap(); + assert_eq!(files.len(), 1); + assert_eq!(files[0].layer, PreferenceLayer::Repo); + assert_eq!(files[0].title, "Style"); + assert!(files[0].body.contains("be terse")); +} + +#[test] +fn precedence_repo_over_parent_over_home() { + let root = tempfile::tempdir().unwrap(); + let home = tempfile::tempdir().unwrap(); + let parent = root.path().join("ws"); + let repo = parent.join("project"); + fs::create_dir_all(&repo).unwrap(); + // Same title in three layers; repo wins. + write(&repo.join(".devdev").join("a.md"), "# Vibes\nrepo wins\n"); + write( + &parent.join(".devdev").join("a.md"), + "# Vibes\nparent loses\n", + ); + write( + &home.path().join(".devdev").join("a.md"), + "# Vibes\nhome loses\n", + ); + + let files = discover(&repo, Some(home.path())).unwrap(); + assert_eq!(files.len(), 1); + assert_eq!(files[0].layer, PreferenceLayer::Repo); + assert!(files[0].body.contains("repo wins")); +} + +#[test] +fn missing_devdev_dir_yields_empty_vec_not_error() { + let tmp = tempfile::tempdir().unwrap(); + let files = discover(tmp.path(), None).unwrap(); + assert!(files.is_empty()); +} + +#[test] +fn home_only_files_loaded_when_repo_has_none() { + let tmp = tempfile::tempdir().unwrap(); + let home = tempfile::tempdir().unwrap(); + write( + &home.path().join(".devdev").join("global.md"), + "# Global\nshared rules\n", + ); + let files = discover(tmp.path(), Some(home.path())).unwrap(); + assert_eq!(files.len(), 1); + assert_eq!(files[0].layer, PreferenceLayer::Home); +} diff --git a/crates/devdev-daemon/src/dispatch.rs b/crates/devdev-daemon/src/dispatch.rs index f26af7a..fdb4218 100644 --- a/crates/devdev-daemon/src/dispatch.rs +++ b/crates/devdev-daemon/src/dispatch.rs @@ -1,5 +1,6 @@ //! IPC method dispatcher — routes incoming requests to subsystems. +use std::collections::HashMap; use std::sync::Arc; use std::time::Duration; @@ -7,30 +8,50 @@ use serde_json::{Value, json}; use tokio::sync::{Mutex, watch}; use devdev_integrations::GitHubAdapter; -use devdev_tasks::approval::{ApprovalHandle, ApprovalPolicy, ApprovalResponse}; -use devdev_tasks::monitor_pr::{MonitorPrTask, ReviewFn}; +use devdev_tasks::approval::{ + ApprovalGate, ApprovalHandle, ApprovalPolicy, ApprovalResponse, approval_channel, +}; +use devdev_tasks::events::EventBus; +use devdev_tasks::ledger::IdempotencyLedger; +use devdev_tasks::monitor_pr::MonitorPrTask; use devdev_tasks::registry::TaskRegistry; +use devdev_tasks::repo_watch::RepoWatchTask; use devdev_workspace::Fs; use crate::ipc::{IpcRequest, IpcResponse}; use crate::router::{SessionHandle, SessionRouter}; +use crate::runner::RouterRunner; +use crate::secrets::AgentSecrets; /// Shared state for the dispatch layer. pub struct DispatchContext { pub router: Arc, pub tasks: Arc>, pub github: Arc, + /// Sender side of the approval channel, used by `devdev_ask` to + /// request user approval before the agent takes external action. + pub approval_gate: Arc>, + /// Receiver side, surfaced through the `approval_response` IPC so + /// a TUI / CLI can pump approvals. pub approval_handle: Arc>, - pub review_fn: ReviewFn, + pub event_bus: EventBus, + pub ledger: Arc, pub approval_policy: ApprovalPolicy, pub approval_timeout: Duration, + /// Host-derived secrets (e.g. `gh auth token`) handed out only on + /// approved `devdev_ask` calls. + pub agent_secrets: Arc>, pub shutdown_tx: watch::Sender, /// Workspace filesystem, shared with the MCP provider so `fs/read` /// IPC calls observe the same bytes the agent wrote via MCP tools. pub fs: Arc>, interactive: Mutex>, /// Log entries per task (task_id → messages). - task_logs: Mutex>>, + task_logs: Mutex>>, + /// Active `RepoWatchTask`s keyed by `(owner, repo)`. + repo_watch_ids: Mutex>, + /// Active `MonitorPrTask`s keyed by `(owner, repo, number)`. + monitor_pr_ids: Mutex>, } impl DispatchContext { @@ -39,9 +60,12 @@ impl DispatchContext { router: Arc, tasks: Arc>, github: Arc, + approval_gate: Arc>, approval_handle: Arc>, - review_fn: ReviewFn, + event_bus: EventBus, + ledger: Arc, approval_policy: ApprovalPolicy, + agent_secrets: Arc>, shutdown_tx: watch::Sender, fs: Arc>, ) -> Self { @@ -49,20 +73,29 @@ impl DispatchContext { router, tasks, github, + approval_gate, approval_handle, - review_fn, + event_bus, + ledger, approval_policy, approval_timeout: Duration::from_secs(300), + agent_secrets, shutdown_tx, fs, interactive: Mutex::new(None), - task_logs: Mutex::new(std::collections::HashMap::new()), + task_logs: Mutex::new(HashMap::new()), + repo_watch_ids: Mutex::new(HashMap::new()), + monitor_pr_ids: Mutex::new(HashMap::new()), } } - /// Set a custom approval timeout (useful for tests). + /// Set a custom approval timeout (useful for tests). Rebuilds the + /// underlying channel so the new timeout is honored. pub fn with_approval_timeout(mut self, timeout: Duration) -> Self { self.approval_timeout = timeout; + let (gate, handle) = approval_channel(self.approval_policy, timeout); + self.approval_gate = Arc::new(Mutex::new(gate)); + self.approval_handle = Arc::new(Mutex::new(handle)); self } @@ -72,6 +105,8 @@ impl DispatchContext { "send" => self.handle_send(req).await, "task/add" => self.handle_task_add(req).await, "task/log" => self.handle_task_log(req).await, + "repo/watch" => self.handle_repo_watch(req).await, + "repo/unwatch" => self.handle_repo_unwatch(req).await, "status" => self.handle_status(req).await, "shutdown" => self.handle_shutdown(req).await, "approval_response" => self.handle_approval(req).await, @@ -170,28 +205,35 @@ impl DispatchContext { self.approval_policy }; - let (gate, handle) = - devdev_tasks::approval::approval_channel(policy, self.approval_timeout); - - // Store the new approval handle (replace previous one if any). - { - let mut ah = self.approval_handle.lock().await; - *ah = handle; + // If the per-task `auto_approve` overrides the daemon's + // policy, swap the active approval channel so `devdev_ask` + // bypasses prompts for this task. + if policy != self.approval_policy { + let (gate, handle) = approval_channel(policy, self.approval_timeout); + *self.approval_gate.lock().await = gate; + *self.approval_handle.lock().await = handle; } - let gate = Arc::new(Mutex::new(gate)); let mut registry = self.tasks.lock().await; let task_id = registry.next_id(); + let runner = Arc::new(RouterRunner::new(Arc::clone(&self.router), task_id.clone())) + as Arc; match MonitorPrTask::new( task_id.clone(), &pr_ref_str, Arc::clone(&self.github), - gate, - Arc::clone(&self.review_fn), + runner, + &self.event_bus, ) { Ok(task) => { + let pr = task.pr_ref().clone(); registry.add(Box::new(task)); + drop(registry); + self.monitor_pr_ids.lock().await.insert( + (pr.owner.clone(), pr.repo.clone(), pr.number), + task_id.clone(), + ); IpcResponse::ok( req.id, json!({ @@ -203,6 +245,133 @@ impl DispatchContext { } } + /// "repo/watch" — start a `RepoWatchTask` for `(owner, repo)`. + /// + /// Idempotent: subsequent calls for the same repo return the + /// existing task id without spawning a duplicate watcher. + async fn handle_repo_watch(&self, req: IpcRequest) -> IpcResponse { + let owner = match req.params["owner"].as_str() { + Some(s) => s.to_string(), + None => return IpcResponse::err(req.id, -32602, "missing params.owner"), + }; + let repo = match req.params["repo"].as_str() { + Some(s) => s.to_string(), + None => return IpcResponse::err(req.id, -32602, "missing params.repo"), + }; + let interval_secs = req + .params + .get("poll_interval_secs") + .and_then(|v| v.as_u64()) + .unwrap_or(60); + + let key = (owner.clone(), repo.clone()); + { + let watches = self.repo_watch_ids.lock().await; + if let Some(id) = watches.get(&key) { + return IpcResponse::ok(req.id, json!({ "task_id": id, "already_watching": true })); + } + } + + let mut registry = self.tasks.lock().await; + let task_id = registry.next_id(); + let task = RepoWatchTask::new( + task_id.clone(), + owner.clone(), + repo.clone(), + Arc::clone(&self.github), + Arc::clone(&self.ledger), + self.event_bus.clone(), + ) + .with_interval(Duration::from_secs(interval_secs)); + registry.add(Box::new(task)); + drop(registry); + + self.repo_watch_ids + .lock() + .await + .insert(key, task_id.clone()); + + IpcResponse::ok( + req.id, + json!({ "task_id": task_id, "already_watching": false }), + ) + } + + /// "repo/unwatch" — cancel an active `RepoWatchTask`. + async fn handle_repo_unwatch(&self, req: IpcRequest) -> IpcResponse { + let owner = match req.params["owner"].as_str() { + Some(s) => s.to_string(), + None => return IpcResponse::err(req.id, -32602, "missing params.owner"), + }; + let repo = match req.params["repo"].as_str() { + Some(s) => s.to_string(), + None => return IpcResponse::err(req.id, -32602, "missing params.repo"), + }; + + let task_id = { + let mut watches = self.repo_watch_ids.lock().await; + match watches.remove(&(owner.clone(), repo.clone())) { + Some(id) => id, + None => { + return IpcResponse::err( + req.id, + -32602, + format!("not watching {owner}/{repo}"), + ); + } + } + }; + + let mut registry = self.tasks.lock().await; + match registry.cancel(&task_id) { + Ok(()) => IpcResponse::ok(req.id, json!({ "task_id": task_id })), + Err(e) => IpcResponse::err(req.id, -1, e.to_string()), + } + } + + /// Ensure a `MonitorPrTask` exists for `(owner, repo, number)`. + /// Used by the event coordinator on first observation of a PR. + /// Returns `(task_id, newly_created)`. When `newly_created` is + /// true the caller should replay the triggering event onto the + /// bus so the freshly-subscribed task observes it. + pub async fn ensure_monitor_pr_task( + &self, + owner: &str, + repo: &str, + number: u64, + ) -> Result<(String, bool), String> { + let key = (owner.to_string(), repo.to_string(), number); + { + let map = self.monitor_pr_ids.lock().await; + if let Some(id) = map.get(&key) { + return Ok((id.clone(), false)); + } + } + + let pr_ref_str = format!("{owner}/{repo}#{number}"); + let mut registry = self.tasks.lock().await; + let task_id = registry.next_id(); + let runner = Arc::new(RouterRunner::new(Arc::clone(&self.router), task_id.clone())) + as Arc; + + let task = MonitorPrTask::new( + task_id.clone(), + &pr_ref_str, + Arc::clone(&self.github), + runner, + &self.event_bus, + ) + .map_err(|e| e.to_string())?; + registry.add(Box::new(task)); + drop(registry); + + self.monitor_pr_ids + .lock() + .await + .insert(key, task_id.clone()); + Ok((task_id, true)) + } + /// "task/log" — return logged messages for a task. async fn handle_task_log(&self, req: IpcRequest) -> IpcResponse { let task_id = match req.params["task_id"].as_str() { @@ -302,6 +471,53 @@ impl DispatchContext { } } +/// Spawn a background coordinator that subscribes to the daemon +/// [`EventBus`] and ensures a [`MonitorPrTask`] exists for every PR +/// it observes. Runs until the watch flag flips. +pub fn spawn_event_coordinator( + ctx: Arc, + mut shutdown: watch::Receiver, +) -> tokio::task::JoinHandle<()> { + let mut rx = ctx.event_bus.subscribe(); + tokio::spawn(async move { + loop { + tokio::select! { + changed = shutdown.changed() => { + if changed.is_err() || *shutdown.borrow() { + break; + } + } + ev = rx.recv() => { + let ev = match ev { + Ok(e) => e, + Err(_) => break, + }; + if let Some((owner, repo, number)) = ev.pr_target() { + let owner = owner.to_string(); + let repo = repo.to_string(); + match ctx + .ensure_monitor_pr_task(&owner, &repo, number) + .await + { + Ok((_, true)) => { + // Newly-created task subscribed *after* this + // event was published; replay so it sees it. + ctx.event_bus.publish(ev.clone()); + } + Ok((_, false)) => {} + Err(e) => { + tracing::warn!( + "event coordinator: ensure_monitor_pr_task failed for {owner}/{repo}#{number}: {e}" + ); + } + } + } + } + } + } + }) +} + /// Extract a PR reference string from free-text description. fn extract_pr_ref(desc: &str) -> Option { // Try "owner/repo#N" pattern. diff --git a/crates/devdev-daemon/src/ledger.rs b/crates/devdev-daemon/src/ledger.rs new file mode 100644 index 0000000..87e299c --- /dev/null +++ b/crates/devdev-daemon/src/ledger.rs @@ -0,0 +1,302 @@ +//! NDJSON file backend for [`devdev_tasks::IdempotencyLedger`]. +//! +//! ## Wire format (v1) +//! +//! `/ledger.ndjson` — one JSON object per line: +//! +//! ```json +//! {"adapter":"github","resource_type":"pr_review","resource_id":"o/r#1","state_hash":"sha:abc","metadata":{},"recorded_at":1714003200,"tombstone":false} +//! ``` +//! +//! Pruning rewrites the file with surviving entries (no in-place edits). + +use std::collections::HashMap; +use std::io::{BufRead, BufReader, Write}; +use std::path::{Path, PathBuf}; +use std::sync::Mutex; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; + +use serde::{Deserialize, Serialize}; + +pub use devdev_tasks::{IdempotencyLedger, LedgerError, LedgerKey}; + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct LedgerEntry { + adapter: String, + resource_type: String, + resource_id: String, + state_hash: String, + #[serde(default)] + metadata: serde_json::Value, + recorded_at: u64, + #[serde(default)] + tombstone: bool, +} + +impl LedgerEntry { + fn key(&self) -> LedgerKey { + LedgerKey::new( + &self.adapter, + &self.resource_type, + &self.resource_id, + &self.state_hash, + ) + } +} + +/// Append-only NDJSON file backend. +#[derive(Debug)] +pub struct NdjsonLedger { + path: PathBuf, + inner: Mutex, +} + +#[derive(Debug)] +struct NdjsonInner { + index: HashMap, +} + +impl NdjsonLedger { + /// Open or create the ledger file at `path`. The parent directory + /// must already exist. + pub fn open(path: impl Into) -> Result { + let path = path.into(); + let mut index = HashMap::new(); + if path.exists() { + let f = std::fs::File::open(&path)?; + for (lineno, line) in BufReader::new(f).lines().enumerate() { + let line = line?; + if line.trim().is_empty() { + continue; + } + let entry: LedgerEntry = serde_json::from_str(&line) + .map_err(|e| LedgerError::Format(format!("line {}: {e}", lineno + 1)))?; + if entry.tombstone { + index.remove(&entry.key()); + } else { + index.insert(entry.key(), entry.recorded_at); + } + } + } + Ok(Self { + path, + inner: Mutex::new(NdjsonInner { index }), + }) + } + + pub fn path(&self) -> &Path { + &self.path + } + + fn append_entry(&self, entry: &LedgerEntry) -> Result<(), LedgerError> { + let mut f = std::fs::OpenOptions::new() + .create(true) + .append(true) + .open(&self.path)?; + let mut line = + serde_json::to_string(entry).map_err(|e| LedgerError::Format(e.to_string()))?; + line.push('\n'); + f.write_all(line.as_bytes())?; + f.sync_data()?; + Ok(()) + } +} + +fn now_unix() -> u64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|d| d.as_secs()) + .unwrap_or(0) +} + +impl IdempotencyLedger for NdjsonLedger { + fn seen(&self, key: &LedgerKey) -> Result { + let inner = self.inner.lock().expect("ledger mutex poisoned"); + Ok(inner.index.contains_key(key)) + } + + fn record(&self, key: &LedgerKey, metadata: serde_json::Value) -> Result<(), LedgerError> { + let recorded_at = now_unix(); + let entry = LedgerEntry { + adapter: key.adapter.clone(), + resource_type: key.resource_type.clone(), + resource_id: key.resource_id.clone(), + state_hash: key.state_hash.clone(), + metadata, + recorded_at, + tombstone: false, + }; + self.append_entry(&entry)?; + let mut inner = self.inner.lock().expect("ledger mutex poisoned"); + inner.index.insert(key.clone(), recorded_at); + Ok(()) + } + + fn prune(&self, older_than: Duration) -> Result { + let cutoff = now_unix().saturating_sub(older_than.as_secs()); + let mut inner = self.inner.lock().expect("ledger mutex poisoned"); + + let survivors: Vec<(LedgerKey, u64)> = inner + .index + .iter() + .filter(|(_, ts)| **ts >= cutoff) + .map(|(k, ts)| (k.clone(), *ts)) + .collect(); + + let removed = inner.index.len() - survivors.len(); + + let tmp = self.path.with_extension("ndjson.tmp"); + { + let mut f = std::fs::File::create(&tmp)?; + for (k, ts) in &survivors { + let entry = LedgerEntry { + adapter: k.adapter.clone(), + resource_type: k.resource_type.clone(), + resource_id: k.resource_id.clone(), + state_hash: k.state_hash.clone(), + metadata: serde_json::Value::Null, + recorded_at: *ts, + tombstone: false, + }; + let mut line = serde_json::to_string(&entry) + .map_err(|e| LedgerError::Format(e.to_string()))?; + line.push('\n'); + f.write_all(line.as_bytes())?; + } + f.sync_data()?; + } + std::fs::rename(&tmp, &self.path)?; + + inner.index = survivors.into_iter().collect(); + Ok(removed) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::tempdir; + + fn key(id: &str, sha: &str) -> LedgerKey { + LedgerKey::new("github", "pr_review", id, sha) + } + + #[test] + fn record_then_seen() { + let dir = tempdir().unwrap(); + let l = NdjsonLedger::open(dir.path().join("ledger.ndjson")).unwrap(); + let k = key("o/r#1", "sha:a"); + assert!(!l.seen(&k).unwrap()); + l.record(&k, serde_json::json!({"note":"first"})).unwrap(); + assert!(l.seen(&k).unwrap()); + } + + #[test] + fn survives_reopen() { + let dir = tempdir().unwrap(); + let path = dir.path().join("ledger.ndjson"); + let k = key("o/r#1", "sha:a"); + { + let l = NdjsonLedger::open(&path).unwrap(); + l.record(&k, serde_json::Value::Null).unwrap(); + } + let l2 = NdjsonLedger::open(&path).unwrap(); + assert!(l2.seen(&k).unwrap()); + } + + #[test] + fn distinct_keys_are_distinct() { + let dir = tempdir().unwrap(); + let l = NdjsonLedger::open(dir.path().join("l.ndjson")).unwrap(); + let k1 = key("o/r#1", "sha:a"); + let k2 = key("o/r#1", "sha:b"); + l.record(&k1, serde_json::Value::Null).unwrap(); + assert!(l.seen(&k1).unwrap()); + assert!(!l.seen(&k2).unwrap()); + } + + #[test] + fn record_is_idempotent() { + let dir = tempdir().unwrap(); + let l = NdjsonLedger::open(dir.path().join("l.ndjson")).unwrap(); + let k = key("o/r#1", "sha:a"); + l.record(&k, serde_json::Value::Null).unwrap(); + l.record(&k, serde_json::Value::Null).unwrap(); + assert!(l.seen(&k).unwrap()); + } + + #[test] + fn prune_removes_old_only() { + let dir = tempdir().unwrap(); + let path = dir.path().join("l.ndjson"); + let l = NdjsonLedger::open(&path).unwrap(); + let k_old = key("o/r#1", "sha:old"); + let k_new = key("o/r#2", "sha:new"); + l.append_entry(&LedgerEntry { + adapter: k_old.adapter.clone(), + resource_type: k_old.resource_type.clone(), + resource_id: k_old.resource_id.clone(), + state_hash: k_old.state_hash.clone(), + metadata: serde_json::Value::Null, + recorded_at: 0, + tombstone: false, + }) + .unwrap(); + drop(l); + let l = NdjsonLedger::open(&path).unwrap(); + l.record(&k_new, serde_json::Value::Null).unwrap(); + assert!(l.seen(&k_old).unwrap()); + assert!(l.seen(&k_new).unwrap()); + + let removed = l.prune(Duration::from_secs(60)).unwrap(); + assert_eq!(removed, 1); + assert!(!l.seen(&k_old).unwrap()); + assert!(l.seen(&k_new).unwrap()); + + drop(l); + let l = NdjsonLedger::open(&path).unwrap(); + assert!(!l.seen(&k_old).unwrap()); + assert!(l.seen(&k_new).unwrap()); + } + + #[test] + fn empty_file_loads_clean() { + let dir = tempdir().unwrap(); + let path = dir.path().join("l.ndjson"); + std::fs::File::create(&path).unwrap(); + let l = NdjsonLedger::open(&path).unwrap(); + assert!(!l.seen(&key("o/r#1", "sha:a")).unwrap()); + } + + #[test] + fn malformed_line_errors() { + let dir = tempdir().unwrap(); + let path = dir.path().join("l.ndjson"); + std::fs::write(&path, "not json\n").unwrap(); + let err = NdjsonLedger::open(&path).unwrap_err(); + assert!(matches!(err, LedgerError::Format(_))); + } + + #[test] + fn concurrent_records_dedup() { + use std::sync::Arc; + use std::thread; + + let dir = tempdir().unwrap(); + let l = Arc::new(NdjsonLedger::open(dir.path().join("l.ndjson")).unwrap()); + let mut handles = vec![]; + for i in 0..16 { + let l = Arc::clone(&l); + handles.push(thread::spawn(move || { + let k = key(&format!("o/r#{i}"), "sha:x"); + l.record(&k, serde_json::Value::Null).unwrap(); + })); + } + for h in handles { + h.join().unwrap(); + } + for i in 0..16 { + assert!(l.seen(&key(&format!("o/r#{i}"), "sha:x")).unwrap()); + } + } +} diff --git a/crates/devdev-daemon/src/lib.rs b/crates/devdev-daemon/src/lib.rs index e13ba7e..12c802c 100644 --- a/crates/devdev-daemon/src/lib.rs +++ b/crates/devdev-daemon/src/lib.rs @@ -7,9 +7,12 @@ pub mod checkpoint; pub mod dispatch; pub mod ipc; +pub mod ledger; pub mod mcp; pub mod pid; pub mod router; +pub mod runner; +pub mod secrets; pub mod server; use std::path::PathBuf; diff --git a/crates/devdev-daemon/src/mcp/mod.rs b/crates/devdev-daemon/src/mcp/mod.rs index a06703b..8e70a6d 100644 --- a/crates/devdev-daemon/src/mcp/mod.rs +++ b/crates/devdev-daemon/src/mcp/mod.rs @@ -15,4 +15,6 @@ mod tools; pub use http::{McpEndpoint, McpServer, McpServerError}; pub use provider::DaemonToolProvider; -pub use tools::{McpProviderError, McpToolProvider, StaticProvider, TaskInfo}; +pub use tools::{ + AskKind, AskRequest, AskResponse, McpProviderError, McpToolProvider, StaticProvider, TaskInfo, +}; diff --git a/crates/devdev-daemon/src/mcp/provider.rs b/crates/devdev-daemon/src/mcp/provider.rs index e420be9..76b7205 100644 --- a/crates/devdev-daemon/src/mcp/provider.rs +++ b/crates/devdev-daemon/src/mcp/provider.rs @@ -8,11 +8,13 @@ use std::sync::Arc; use async_trait::async_trait; +use devdev_tasks::approval::{ApprovalError, ApprovalGate}; use devdev_tasks::registry::TaskRegistry; use devdev_workspace::Fs; use tokio::sync::Mutex; -use crate::mcp::{McpProviderError, McpToolProvider, TaskInfo}; +use crate::mcp::{AskKind, AskRequest, AskResponse, McpProviderError, McpToolProvider, TaskInfo}; +use crate::secrets::AgentSecrets; /// Wraps the daemon's shared `Arc>` and /// `Arc>` so the MCP server can both surface task state and @@ -25,11 +27,31 @@ use crate::mcp::{McpProviderError, McpToolProvider, TaskInfo}; pub struct DaemonToolProvider { tasks: Arc>, fs: Arc>, + approval_gate: Option>>, + agent_secrets: Option>>, } impl DaemonToolProvider { pub fn new(tasks: Arc>, fs: Arc>) -> Self { - Self { tasks, fs } + Self { + tasks, + fs, + approval_gate: None, + agent_secrets: None, + } + } + + /// Wire the approval gate + secrets slot so `devdev_ask` is live. + /// Constructed without these, the tool returns a `not configured` + /// error — keeping pre-Phase-C tests unaffected. + pub fn with_ask( + mut self, + gate: Arc>, + secrets: Arc>, + ) -> Self { + self.approval_gate = Some(gate); + self.agent_secrets = Some(secrets); + self } } @@ -69,6 +91,63 @@ impl McpToolProvider for DaemonToolProvider { .map_err(|e| McpProviderError::Other(format!("write_path {path}: {e:?}")))?; Ok(()) } + + async fn ask(&self, req: AskRequest) -> Result { + let gate = self + .approval_gate + .as_ref() + .ok_or_else(|| McpProviderError::Other("ask: approval gate not configured".into()))?; + let secrets = self + .agent_secrets + .as_ref() + .ok_or_else(|| McpProviderError::Other("ask: secrets slot not configured".into()))?; + + let action = match req.kind { + AskKind::PostReview => "post_review", + AskKind::PostComment => "post_comment", + AskKind::RequestToken => "request_token", + AskKind::Question => "question", + }; + let details = serde_json::json!({ + "summary": req.summary, + "payload": req.payload, + }); + + let outcome = { + let mut g = gate.lock().await; + g.request_approval(action, details).await + }; + + match outcome { + Ok(()) => { + let needs_token = matches!( + req.kind, + AskKind::PostReview | AskKind::PostComment | AskKind::RequestToken + ); + let (token, expires_at) = if needs_token { + let s = secrets.lock().await; + (s.gh_token.clone(), s.token_expires_at_hint()) + } else { + (None, None) + }; + Ok(AskResponse::Approved { + token, + expires_at, + payload: req.payload, + }) + } + Err(ApprovalError::Rejected) => Ok(AskResponse::Rejected { + reason: "user rejected".into(), + }), + Err(ApprovalError::Timeout) => Ok(AskResponse::Timeout), + Err(ApprovalError::DryRun { action, details }) => Ok(AskResponse::Rejected { + reason: format!("dry-run: {action} {details}"), + }), + Err(ApprovalError::ChannelClosed) => Err(McpProviderError::Other( + "ask: approval channel closed".into(), + )), + } + } } #[cfg(test)] @@ -155,4 +234,169 @@ mod tests { let tasks = provider.tasks_list().await.expect("list"); assert!(tasks.is_empty()); } + + // ── ask: Phase C1 coverage ──────────────────────────────────── + + use devdev_tasks::approval::{ApprovalPolicy, ApprovalResponse, approval_channel}; + + fn build_provider_with_ask( + policy: ApprovalPolicy, + timeout: Duration, + token: Option<&str>, + ) -> ( + DaemonToolProvider, + Arc>, + Arc>, + ) { + let (gate, handle) = approval_channel(policy, timeout); + let gate = Arc::new(Mutex::new(gate)); + let handle = Arc::new(Mutex::new(handle)); + let mut secrets = AgentSecrets::default(); + secrets.set_gh_token(token.map(str::to_string)); + let secrets = Arc::new(Mutex::new(secrets)); + let provider = DaemonToolProvider::new( + Arc::new(Mutex::new(TaskRegistry::new())), + Arc::new(Mutex::new(devdev_workspace::Fs::new())), + ) + .with_ask(gate, Arc::clone(&secrets)); + (provider, handle, secrets) + } + + #[tokio::test] + async fn ask_post_review_auto_approve_returns_token() { + let (provider, _handle, _secrets) = build_provider_with_ask( + ApprovalPolicy::AutoApprove, + Duration::from_secs(1), + Some("ghp_live_token"), + ); + let resp = provider + .ask(AskRequest { + kind: AskKind::PostReview, + summary: "post review on PR #42".into(), + payload: serde_json::json!({ "comment": "looks good" }), + }) + .await + .expect("ask succeeds"); + match resp { + AskResponse::Approved { + token, + expires_at, + payload, + } => { + assert_eq!(token.as_deref(), Some("ghp_live_token")); + assert!(expires_at.is_some()); + assert_eq!(payload["comment"], "looks good"); + } + other => panic!("expected approved, got {other:?}"), + } + } + + #[tokio::test] + async fn ask_question_does_not_surface_token() { + let (provider, _h, _s) = build_provider_with_ask( + ApprovalPolicy::AutoApprove, + Duration::from_secs(1), + Some("ghp_live_token"), + ); + let resp = provider + .ask(AskRequest { + kind: AskKind::Question, + summary: "what color?".into(), + payload: serde_json::json!({}), + }) + .await + .unwrap(); + match resp { + AskResponse::Approved { + token, expires_at, .. + } => { + assert!(token.is_none(), "question should not return token"); + assert!(expires_at.is_none()); + } + other => panic!("expected approved, got {other:?}"), + } + } + + #[tokio::test] + async fn ask_rejected_returns_reason() { + let (provider, handle, _s) = + build_provider_with_ask(ApprovalPolicy::Ask, Duration::from_secs(2), Some("tok")); + let req = AskRequest { + kind: AskKind::PostReview, + summary: "x".into(), + payload: serde_json::json!({}), + }; + let provider_clone = provider.clone(); + let ask_task = tokio::spawn(async move { provider_clone.ask(req).await }); + // Pump the handle: receive the request, then deny it. + let pending = { + let mut h = handle.lock().await; + h.request_rx.recv().await.expect("request arrives") + }; + { + let h = handle.lock().await; + h.response_tx + .send(ApprovalResponse { + id: pending.id.clone(), + approve: false, + }) + .await + .unwrap(); + } + let resp = ask_task.await.unwrap().unwrap(); + assert!(matches!(resp, AskResponse::Rejected { .. })); + } + + #[tokio::test] + async fn ask_timeout_returns_timeout_status() { + let (provider, _handle, _s) = + build_provider_with_ask(ApprovalPolicy::Ask, Duration::from_millis(50), None); + let resp = provider + .ask(AskRequest { + kind: AskKind::Question, + summary: "stalls".into(), + payload: serde_json::json!({}), + }) + .await + .unwrap(); + assert!(matches!(resp, AskResponse::Timeout)); + } + + #[tokio::test] + async fn ask_dry_run_policy_reports_dry_run() { + let (provider, _h, _s) = + build_provider_with_ask(ApprovalPolicy::DryRun, Duration::from_secs(1), None); + let resp = provider + .ask(AskRequest { + kind: AskKind::PostComment, + summary: "drop".into(), + payload: serde_json::json!({"comment": "x"}), + }) + .await + .unwrap(); + match resp { + AskResponse::Rejected { reason } => { + assert!(reason.contains("dry-run"), "reason was {reason}"); + } + other => panic!("expected rejected, got {other:?}"), + } + } + + #[tokio::test] + async fn ask_without_configuration_errors() { + // Provider built without `with_ask`. + let provider = DaemonToolProvider::new( + Arc::new(Mutex::new(TaskRegistry::new())), + Arc::new(Mutex::new(devdev_workspace::Fs::new())), + ); + let err = provider + .ask(AskRequest { + kind: AskKind::Question, + summary: "nope".into(), + payload: serde_json::json!({}), + }) + .await + .expect_err("must error"); + assert!(format!("{err}").contains("not configured")); + } } diff --git a/crates/devdev-daemon/src/mcp/tools.rs b/crates/devdev-daemon/src/mcp/tools.rs index 9c6a4b8..ca3136f 100644 --- a/crates/devdev-daemon/src/mcp/tools.rs +++ b/crates/devdev-daemon/src/mcp/tools.rs @@ -46,6 +46,58 @@ pub struct TaskInfo { pub status: String, } +/// Kinds of asks the agent can make through [`McpToolProvider::ask`]. +/// Drives both the user-facing summary and (for `RequestToken`) +/// whether we surface the host `gh` token in the response. +#[derive(Clone, Debug, Serialize, Deserialize, schemars::JsonSchema, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum AskKind { + /// Agent intends to post a PR review/comment. Approval response + /// surfaces a short-lived `gh` token the agent uses with `gh pr + /// comment` / `gh pr review`. + PostReview, + /// Agent wants to leave a non-review comment. + PostComment, + /// Agent wants the `gh` token for an action not enumerated above. + RequestToken, + /// Agent wants the user to answer an open question. No token. + Question, +} + +/// One ask request from the agent. +#[derive(Clone, Debug, Serialize, Deserialize, schemars::JsonSchema)] +pub struct AskRequest { + pub kind: AskKind, + /// Human-readable single-line summary surfaced in the approval prompt. + pub summary: String, + /// Free-form structured payload (e.g. `{ comment, file?, line? }` + /// for `post_review`). Echoed back in the response. + #[serde(default)] + pub payload: serde_json::Value, +} + +/// Outcome the agent receives. +#[derive(Clone, Debug, Serialize, Deserialize, schemars::JsonSchema)] +#[serde(tag = "status", rename_all = "snake_case")] +pub enum AskResponse { + /// Approved. `token` is `Some` for kinds that requested a token + /// AND the host had one to give. + Approved { + #[serde(skip_serializing_if = "Option::is_none")] + token: Option, + /// Wall-clock seconds since epoch hinting when the token + /// should no longer be relied upon. + #[serde(skip_serializing_if = "Option::is_none")] + expires_at: Option, + /// Echo of the original payload so the agent can correlate. + payload: serde_json::Value, + }, + /// User rejected the request. + Rejected { reason: String }, + /// Approval request timed out. + Timeout, +} + /// Data source for DevDev MCP tools. /// /// Concrete daemon wiring lives in a separate capability @@ -69,6 +121,15 @@ pub trait McpToolProvider: Send + Sync { "fs_write not supported by this provider".into(), )) } + + /// Submit an ask to the user. Default impl rejects so read-only + /// providers (e.g. `StaticProvider`) fail loudly rather than + /// auto-approving silently. + async fn ask(&self, _req: AskRequest) -> Result { + Err(McpProviderError::Other( + "ask not supported by this provider".into(), + )) + } } /// Fixed-data provider used by tests and documentation examples. @@ -142,6 +203,35 @@ impl DevDevMcpHandler { args.path ))])) } + + #[tool( + description = "Ask the user (via DevDev's approval gate) for permission \ + to take an external action. Use this BEFORE running `gh pr comment`, \ + posting a review, fetching a token, or any other side-effecting \ + operation. `kind` is one of: `post_review` (will return a short-lived \ + GitHub token), `post_comment` (also returns token), `request_token` \ + (returns token only), `question` (returns approval, no token). \ + `summary` is a single-line human-readable description shown in the \ + approval prompt. `payload` is your free-form structured arguments \ + (e.g. `{ \"comment\": \"...\", \"file\": \"...\", \"line\": 42 }`). \ + The response is `{status: \"approved\"|\"rejected\"|\"timeout\", ...}`. \ + On `approved`, use `token` (if present) with `GH_TOKEN= gh ...`." + )] + async fn devdev_ask( + &self, + rmcp::handler::server::wrapper::Parameters(args): rmcp::handler::server::wrapper::Parameters< + AskRequest, + >, + ) -> Result { + let resp = self + .provider + .ask(args) + .await + .map_err(|e| McpError::internal_error(e.to_string(), None))?; + // Serialize is infallible for this enum. + let text = serde_json::to_string_pretty(&resp).unwrap(); + Ok(CallToolResult::success(vec![Content::text(text)])) + } } /// Arguments for the `devdev_fs_write` MCP tool. diff --git a/crates/devdev-daemon/src/runner.rs b/crates/devdev-daemon/src/runner.rs new file mode 100644 index 0000000..baef16d --- /dev/null +++ b/crates/devdev-daemon/src/runner.rs @@ -0,0 +1,47 @@ +//! `RouterRunner` — bridge from [`AgentRunner`] to [`SessionRouter`]. +//! +//! Each PR-level task gets its own `RouterRunner` so it owns its own +//! agent session keyed by task id. The session is created lazily on +//! the first `run_prompt` call. + +use std::sync::Arc; + +use async_trait::async_trait; +use devdev_tasks::agent::AgentRunner; +use tokio::sync::Mutex; + +use crate::router::{SessionContext, SessionHandle, SessionRouter}; + +pub struct RouterRunner { + router: Arc, + task_id: String, + handle: Mutex>, +} + +impl RouterRunner { + pub fn new(router: Arc, task_id: impl Into) -> Self { + Self { + router, + task_id: task_id.into(), + handle: Mutex::new(None), + } + } +} + +#[async_trait] +impl AgentRunner for RouterRunner { + async fn run_prompt(&self, prompt: String) -> Result { + let mut slot = self.handle.lock().await; + if slot.is_none() { + let h = self + .router + .create_session(&self.task_id, SessionContext::default()) + .await + .map_err(|e| e.to_string())?; + *slot = Some(h); + } + let h = slot.as_ref().expect("just inserted"); + let resp = h.send_prompt(&prompt).await.map_err(|e| e.to_string())?; + Ok(resp.text) + } +} diff --git a/crates/devdev-daemon/src/secrets.rs b/crates/devdev-daemon/src/secrets.rs new file mode 100644 index 0000000..63ac4c2 --- /dev/null +++ b/crates/devdev-daemon/src/secrets.rs @@ -0,0 +1,98 @@ +//! Host-derived secrets handed to the agent only via approved +//! `devdev_ask` calls. +//! +//! The slot is populated at `devdev up` time (best-effort `gh auth +//! token`) and may be cleared / rotated at runtime. It is *never* +//! injected into the agent's process environment — the agent must +//! call `devdev_ask` and receive the token in the tool response. + +use std::process::Stdio; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; + +use tokio::process::Command; + +/// Slot of secrets that may be surfaced through `devdev_ask`. +#[derive(Debug, Default, Clone)] +pub struct AgentSecrets { + /// Result of `gh auth token`. `None` if `gh` is missing or unauth. + pub gh_token: Option, + /// Wall-clock seconds since epoch when `gh_token` was sampled. + pub gh_token_sampled_at: Option, +} + +impl AgentSecrets { + /// Set the GitHub token and stamp the sample time. Used both by + /// the boot path and by tests (which can inject deterministically). + pub fn set_gh_token(&mut self, token: Option) { + self.gh_token = token; + self.gh_token_sampled_at = SystemTime::now() + .duration_since(UNIX_EPOCH) + .ok() + .map(|d| d.as_secs()); + } + + /// `expires_at` hint for token consumers. We do not enforce + /// revocation; this is informational only. + pub fn token_expires_at_hint(&self) -> Option { + // Hand out a one-hour validity window. GitHub user tokens live + // far longer, but bounded hints train downstream consumers + // not to cache. + const ONE_HOUR_SECS: u64 = 3600; + self.gh_token_sampled_at.map(|t| t + ONE_HOUR_SECS) + } +} + +/// Best-effort `gh auth token` invocation. Returns `Ok(None)` when +/// `gh` is not on PATH or returns a non-zero status; the caller treats +/// that as "no token available" rather than a hard failure. +pub async fn try_read_gh_token() -> std::io::Result> { + let mut cmd = Command::new("gh"); + cmd.arg("auth").arg("token"); + cmd.stdout(Stdio::piped()).stderr(Stdio::null()); + let output = match tokio::time::timeout(Duration::from_secs(5), cmd.output()).await { + Ok(r) => r?, + Err(_) => return Ok(None), + }; + if !output.status.success() { + return Ok(None); + } + let token = String::from_utf8_lossy(&output.stdout).trim().to_string(); + if token.is_empty() { + Ok(None) + } else { + Ok(Some(token)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn set_token_stamps_time() { + let mut s = AgentSecrets::default(); + assert!(s.gh_token.is_none()); + assert!(s.token_expires_at_hint().is_none()); + s.set_gh_token(Some("ghp_test".to_string())); + assert_eq!(s.gh_token.as_deref(), Some("ghp_test")); + let exp = s.token_expires_at_hint().expect("hint set"); + let now = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_secs(); + // Expires roughly an hour from now (bounded slack for slow tests). + assert!(exp >= now + 3500 && exp <= now + 3700); + } + + #[test] + fn clear_removes_token_and_keeps_hint_none() { + let mut s = AgentSecrets::default(); + s.set_gh_token(Some("x".into())); + s.set_gh_token(None); + assert!(s.gh_token.is_none()); + // Hint stays Some because we stamped time on the second call; + // hint is a function of "when did we last sample", not whether + // we got a value. + assert!(s.token_expires_at_hint().is_some()); + } +} diff --git a/crates/devdev-daemon/tests/e2e_pr_shepherding.rs b/crates/devdev-daemon/tests/e2e_pr_shepherding.rs index 24a0e17..7d9991a 100644 --- a/crates/devdev-daemon/tests/e2e_pr_shepherding.rs +++ b/crates/devdev-daemon/tests/e2e_pr_shepherding.rs @@ -1,55 +1,40 @@ -//! E2E PR Shepherding tests — P2-09. +//! E2E PR Shepherding tests — event-driven (Phase B2). //! -//! Proves the full Phase 2 stack works: daemon ↔ IPC ↔ dispatch ↔ tasks ↔ mock agent ↔ mock GitHub. +//! Proves the daemon ↔ IPC ↔ dispatch ↔ MonitorPrTask ↔ EventBus +//! ↔ mock agent ↔ mock GitHub seam works. use std::sync::Arc; use std::time::Duration; -use devdev_daemon::dispatch::DispatchContext; +use devdev_daemon::dispatch::{DispatchContext, spawn_event_coordinator}; use devdev_daemon::ipc::IpcServer; +use devdev_daemon::ledger::NdjsonLedger; use devdev_daemon::router::{ AgentResponse, ResponseChunk, RouterError, SessionBackend, SessionRouter, }; use devdev_daemon::{Daemon, DaemonConfig}; use devdev_integrations::{MockGitHubAdapter, PrState, PrStatus, PullRequest}; use devdev_tasks::approval::{self, ApprovalPolicy}; -use devdev_tasks::monitor_pr::ReviewFn; +use devdev_tasks::events::{DaemonEvent, EventBus}; +use devdev_tasks::ledger::IdempotencyLedger; use devdev_tasks::registry::TaskRegistry; use serde_json::json; use tokio::sync::{Mutex, mpsc, watch}; // ── Fake Agent Backend ───────────────────────────────────────── -/// Scripted fake agent that returns canned review text. struct FakeAgentBackend { - /// Responses keyed by prompt substring match. - responses: Vec<(&'static str, String)>, - /// Default response if no match. - default_response: String, session_counter: std::sync::atomic::AtomicU64, + prompts: std::sync::Mutex>, } impl FakeAgentBackend { fn new() -> Self { Self { - responses: vec![ - ("review", "Overall looks good.\n[src/config.rs:42] Missing validation.\n[src/lib.rs:10] Unused import.".to_string()), - ("new commits", "Updated review: changes look fine.\n[src/config.rs:42] Fixed now.".to_string()), - ], - default_response: "I'll look into that.".to_string(), session_counter: std::sync::atomic::AtomicU64::new(1), + prompts: std::sync::Mutex::new(Vec::new()), } } - - fn find_response(&self, prompt: &str) -> String { - let lower = prompt.to_lowercase(); - for (pattern, response) in &self.responses { - if lower.contains(pattern) { - return response.clone(); - } - } - self.default_response.clone() - } } #[async_trait::async_trait] @@ -66,8 +51,9 @@ impl SessionBackend for FakeAgentBackend { _session_id: &str, text: &str, ) -> Result { + self.prompts.lock().unwrap().push(text.to_string()); Ok(AgentResponse { - text: self.find_response(text), + text: format!("Reviewed: {} chars", text.len()), stop_reason: "end_turn".to_string(), }) } @@ -78,8 +64,8 @@ impl SessionBackend for FakeAgentBackend { text: &str, tx: mpsc::Sender, ) -> Result<(), RouterError> { - let response = self.find_response(text); - let _ = tx.send(ResponseChunk::Text(response)).await; + self.prompts.lock().unwrap().push(text.to_string()); + let _ = tx.send(ResponseChunk::Text("Reviewed.".into())).await; let _ = tx .send(ResponseChunk::Done { stop_reason: "end_turn".to_string(), @@ -131,36 +117,22 @@ fn test_github(sha: &str) -> MockGitHubAdapter { ) } -fn fake_review_fn(router: Arc) -> ReviewFn { - Arc::new(move |prompt| { - let router = Arc::clone(&router); - Box::pin(async move { - let handle = router - .create_interactive_session() - .await - .map_err(|e| e.to_string())?; - let resp = handle - .send_prompt(&prompt) - .await - .map_err(|e| e.to_string())?; - Ok(resp.text) - }) - }) -} - // ── E2E Harness ──────────────────────────────────────────────── struct E2EHarness { port: u16, ctx: Arc, - github: Arc, + bus: EventBus, + backend: Arc, shutdown_tx: watch::Sender, _server_handle: tokio::task::JoinHandle<()>, + _coord_handle: tokio::task::JoinHandle<()>, _daemon: Daemon, + _tmp: tempfile::TempDir, } impl E2EHarness { - async fn new_with_policy(policy: ApprovalPolicy) -> Self { + async fn new() -> Self { let tmp = tempfile::tempdir().unwrap(); let config = DaemonConfig { data_dir: tmp.path().to_path_buf(), @@ -174,35 +146,43 @@ impl E2EHarness { let github: Arc = Arc::clone(&gh) as Arc; - let backend: Arc = Arc::new(FakeAgentBackend::new()); - let router = Arc::new(SessionRouter::new(backend)); + let backend = Arc::new(FakeAgentBackend::new()); + let backend_dyn: Arc = backend.clone(); + let router = Arc::new(SessionRouter::new(backend_dyn)); let registry = Arc::new(Mutex::new(TaskRegistry::new())); - let (approval_gate, approval_handle) = - approval::approval_channel(policy, Duration::from_secs(30)); - // We'll replace this when tasks are created; keep initial handle. - let _ = approval_gate; // consumed by channel - let approval_handle = Arc::new(Mutex::new(approval_handle)); + let policy = ApprovalPolicy::AutoApprove; + let (gate, handle) = approval::approval_channel(policy, Duration::from_secs(30)); + let approval_gate = Arc::new(Mutex::new(gate)); + let approval_handle = Arc::new(Mutex::new(handle)); + let agent_secrets = Arc::new(Mutex::new(devdev_daemon::secrets::AgentSecrets::default())); - let (shutdown_tx, shutdown_rx) = watch::channel(false); + let bus = EventBus::new(); + let ledger: Arc = + Arc::new(NdjsonLedger::open(tmp.path().join("ledger.ndjson")).unwrap()); - let review_fn = fake_review_fn(Arc::clone(&router)); + let (shutdown_tx, shutdown_rx) = watch::channel(false); let ctx = Arc::new( DispatchContext::new( Arc::clone(&router), Arc::clone(®istry), github, + approval_gate, approval_handle, - review_fn, + bus.clone(), + ledger, policy, + agent_secrets, shutdown_tx.clone(), Arc::new(Mutex::new(devdev_workspace::Fs::new())), ) .with_approval_timeout(Duration::from_secs(2)), ); + let coord_handle = spawn_event_coordinator(Arc::clone(&ctx), shutdown_tx.subscribe()); + let server = IpcServer::bind().await.unwrap(); let port = server.port(); @@ -211,314 +191,288 @@ impl E2EHarness { devdev_daemon::server::run(ctx_clone, server, shutdown_rx).await; }); - // Small delay for server to be ready. tokio::time::sleep(Duration::from_millis(50)).await; Self { port, ctx, - github: gh, + bus, + backend, shutdown_tx, _server_handle: server_handle, + _coord_handle: coord_handle, _daemon: daemon, + _tmp: tmp, } } - async fn new() -> Self { - Self::new_with_policy(ApprovalPolicy::Ask).await - } - - /// Poll all tasks once (simulates scheduler tick). async fn advance_polls(&self, count: usize) { for _ in 0..count { self.ctx.poll_all_tasks().await; } } - async fn stop(self) -> Vec { + async fn stop(self) { let _ = self.shutdown_tx.send(true); tokio::time::sleep(Duration::from_millis(50)).await; - // Return checkpoint data (fs serialized). - let fs = self._daemon.fs.lock().await; - fs.serialize() } } -// ── Scenario A: Interactive ──────────────────────────────────── +// ── Scenarios ────────────────────────────────────────────────── #[tokio::test] -async fn e2e_interactive_pr_monitoring() { +async fn task_add_creates_idle_task_until_event_arrives() { let harness = E2EHarness::new().await; - // User sends a message to trigger PR monitoring via interactive session. - let resp = raw_ipc( + let add_resp = raw_ipc( harness.port, - "send", - json!({"text": "Please review PR test-org/test-repo#1"}), + "task/add", + json!({"description": "Monitor PR test-org/test-repo#1"}), ) .await; assert!( - resp.error.is_none(), - "send should succeed: {:?}", - resp.error + add_resp.error.is_none(), + "task/add failed: {:?}", + add_resp.error ); - let response_text = resp.result.unwrap()["response"] - .as_str() - .unwrap() - .to_string(); - assert!(!response_text.is_empty()); - // Check status. - let status = raw_ipc(harness.port, "status", json!({})).await; - assert!(status.error.is_none()); + // No event yet → poll is quiet, agent untouched. + harness.advance_polls(1).await; + assert!(harness.backend.prompts.lock().unwrap().is_empty()); harness.stop().await; } -// ── Scenario B: Headless auto-approve ────────────────────────── - #[tokio::test] -async fn e2e_headless_auto_approve() { - let harness = E2EHarness::new_with_policy(ApprovalPolicy::AutoApprove).await; +async fn pr_opened_event_drives_agent_prompt() { + let harness = E2EHarness::new().await; - // Create task via IPC. - let add_resp = raw_ipc( + raw_ipc( harness.port, "task/add", - json!({ - "description": "Monitor PR test-org/test-repo#1", - "auto_approve": true - }), + json!({"description": "Monitor PR test-org/test-repo#1"}), ) .await; - assert!( - add_resp.error.is_none(), - "task/add failed: {:?}", - add_resp.error - ); - let task_id = add_resp.result.unwrap()["task_id"] - .as_str() - .unwrap() - .to_string(); - // Poll tasks to trigger review. - harness.advance_polls(1).await; + harness.bus.publish(DaemonEvent::PrOpened { + owner: "test-org".into(), + repo: "test-repo".into(), + number: 1, + head_sha: "sha-initial-001".into(), + }); - // Review should be posted (auto-approve). - assert!( - !harness.github.posted_reviews().is_empty(), - "review should be posted with auto-approve" - ); + harness.advance_polls(1).await; - // Check status. - let status = raw_ipc(harness.port, "status", json!({})).await; - let tasks_count = status.result.unwrap()["tasks"].as_u64().unwrap(); - assert!(tasks_count >= 1); + let prompts = harness.backend.prompts.lock().unwrap().clone(); + assert_eq!(prompts.len(), 1, "agent should be prompted exactly once"); + assert!(prompts[0].contains("opened")); + assert!(prompts[0].contains("test-org/test-repo#1")); - // Check task log. - let log = raw_ipc(harness.port, "task/log", json!({"task_id": task_id})).await; - let entries = log.result.unwrap()["entries"].as_array().unwrap().len(); - assert!(entries > 0, "task log should have entries after poll"); + // Task log should reflect the agent reply. + let log = raw_ipc(harness.port, "task/log", json!({"task_id": "t-1"})).await; + let entries = log.result.unwrap()["entries"].as_array().unwrap().clone(); + assert!(!entries.is_empty(), "task log should have entries"); harness.stop().await; } -// ── Scenario C: One-shot ─────────────────────────────────────── - #[tokio::test] -async fn e2e_one_shot_review() { - let harness = E2EHarness::new_with_policy(ApprovalPolicy::AutoApprove).await; +async fn pr_closed_event_completes_task() { + let harness = E2EHarness::new().await; - // Send a one-shot review request. - let resp = raw_ipc( + raw_ipc( harness.port, - "send", - json!({"text": "Review PR test-org/test-repo#1"}), + "task/add", + json!({"description": "Monitor PR test-org/test-repo#1"}), ) .await; - assert!(resp.error.is_none()); - let result = resp.result.unwrap(); - let response_text = result["response"].as_str().unwrap(); - assert!(!response_text.is_empty()); - // Shutdown via IPC. - let resp = raw_ipc(harness.port, "shutdown", json!({})).await; - assert!(resp.error.is_none()); -} + harness.bus.publish(DaemonEvent::PrClosed { + owner: "test-org".into(), + repo: "test-repo".into(), + number: 1, + merged: true, + }); -// ── Checkpoint recovery ──────────────────────────────────────── + harness.advance_polls(1).await; -#[tokio::test] -async fn e2e_checkpoint_recovery() { - let tmp = tempfile::tempdir().unwrap(); - let data_dir = tmp.path().to_path_buf(); + let status = raw_ipc(harness.port, "status", json!({})).await; + let task_count = status.result.unwrap()["tasks"].as_u64().unwrap(); + assert!(task_count >= 1); - // Phase 1: start, write to fs, checkpoint, stop. - { - let config = DaemonConfig { - data_dir: data_dir.clone(), - checkpoint_on_stop: true, - foreground: true, - }; - let daemon = Daemon::start(config, false).await.unwrap(); + // Task should be Completed; agent never invoked. + assert!(harness.backend.prompts.lock().unwrap().is_empty()); - { - let mut fs = daemon.fs.lock().await; - fs.write_path(b"/marker.txt", b"phase1").unwrap(); - } + harness.stop().await; +} - daemon.save_checkpoint().await.unwrap(); - daemon.stop().await.unwrap(); - } +#[tokio::test] +async fn pr_updated_event_reprompts_agent() { + let harness = E2EHarness::new().await; - // Phase 2: restart from checkpoint, verify fs intact. - { - let config = DaemonConfig { - data_dir: data_dir.clone(), - checkpoint_on_stop: false, - foreground: true, - }; - let daemon = Daemon::start(config, true).await.unwrap(); + raw_ipc( + harness.port, + "task/add", + json!({"description": "Monitor PR test-org/test-repo#1"}), + ) + .await; - let fs = daemon.fs.lock().await; - let data = fs.read_path(b"/marker.txt").unwrap(); - assert_eq!(data, b"phase1"); + harness.bus.publish(DaemonEvent::PrOpened { + owner: "test-org".into(), + repo: "test-repo".into(), + number: 1, + head_sha: "sha-initial-001".into(), + }); + harness.advance_polls(1).await; - daemon.stop().await.unwrap(); - } -} + harness.bus.publish(DaemonEvent::PrUpdated { + owner: "test-org".into(), + repo: "test-repo".into(), + number: 1, + head_sha: "sha-second-002".into(), + }); + harness.advance_polls(1).await; -// ── Approval protocol ────────────────────────────────────────── + let prompts = harness.backend.prompts.lock().unwrap().clone(); + assert_eq!(prompts.len(), 2, "second push should re-prompt"); + assert!(prompts[1].contains("updated")); + + harness.stop().await; +} #[tokio::test] -async fn e2e_headless_approval_protocol() { - let harness = E2EHarness::new_with_policy(ApprovalPolicy::Ask).await; +async fn unrelated_pr_event_is_ignored() { + let harness = E2EHarness::new().await; - // Create task that requires approval (short timeout so test doesn't hang). - let add_resp = raw_ipc( + raw_ipc( harness.port, "task/add", - json!({ - "description": "Monitor PR test-org/test-repo#1", - "auto_approve": false - }), + json!({"description": "Monitor PR test-org/test-repo#1"}), ) .await; - assert!(add_resp.error.is_none()); - // Poll tasks — approval will timeout since nobody responds. - harness.advance_polls(1).await; + // Different number. + harness.bus.publish(DaemonEvent::PrOpened { + owner: "test-org".into(), + repo: "test-repo".into(), + number: 2, + head_sha: "x".into(), + }); - // With Ask policy and timeout, review should NOT be posted. - // But the log should still have an entry about the timeout. - let log = raw_ipc(harness.port, "task/log", json!({"task_id": "t-1"})).await; - let entries = log.result.unwrap()["entries"].as_array().unwrap().clone(); - assert!( - !entries.is_empty(), - "should have log entry about approval timeout" - ); + harness.advance_polls(1).await; + assert!(harness.backend.prompts.lock().unwrap().is_empty()); harness.stop().await; } -// ── Dry run ──────────────────────────────────────────────────── - #[tokio::test] -async fn e2e_dry_run_no_side_effects() { - let harness = E2EHarness::new_with_policy(ApprovalPolicy::DryRun).await; +async fn repo_watch_ipc_spawns_repo_watch_task() { + let harness = E2EHarness::new().await; - // Create task with dry-run policy. - let add_resp = raw_ipc( + let resp = raw_ipc( harness.port, - "task/add", - json!({ - "description": "Monitor PR test-org/test-repo#1" - }), + "repo/watch", + json!({ "owner": "test-org", "repo": "test-repo", "poll_interval_secs": 1 }), + ) + .await; + assert!(resp.error.is_none(), "repo/watch failed: {:?}", resp.error); + let r = resp.result.unwrap(); + assert!(r["task_id"].as_str().unwrap().starts_with("t-")); + assert_eq!(r["already_watching"], false); + + // Idempotent re-watch. + let resp2 = raw_ipc( + harness.port, + "repo/watch", + json!({ "owner": "test-org", "repo": "test-repo" }), ) .await; - assert!(add_resp.error.is_none()); + let r2 = resp2.result.unwrap(); + assert_eq!(r2["already_watching"], true); - // Poll. + // Polling RepoWatchTask publishes a PrOpened (via the mock + // adapter's existing PR), which the coordinator turns into a + // MonitorPrTask. Since polls + bus delivery + coordinator are + // async, give the system time to settle. + harness.advance_polls(1).await; + tokio::time::sleep(Duration::from_millis(50)).await; harness.advance_polls(1).await; - // No review should be posted. + let prompts = harness.backend.prompts.lock().unwrap().clone(); assert!( - harness.github.posted_reviews().is_empty(), - "dry-run should not post reviews" + !prompts.is_empty(), + "coordinator should have spawned a MonitorPrTask that prompted the agent" ); - // But log should contain the review text. - let log = raw_ipc(harness.port, "task/log", json!({"task_id": "t-1"})).await; - let entries = log.result.unwrap()["entries"].as_array().unwrap().clone(); - assert!(!entries.is_empty(), "dry-run should still log review text"); - - let text = entries[0]["text"].as_str().unwrap(); - assert!(text.contains("dry-run"), "log should mention dry-run"); - harness.stop().await; } -// ── Real GitHub (gated) ──────────────────────────────────────── - #[tokio::test] -#[ignore = "requires DEVDEV_E2E and GH_TOKEN"] -async fn e2e_real_github_pr_review() { - // Placeholder: would use LiveGitHubAdapter with real tokens. -} +async fn repo_unwatch_ipc_cancels_task() { + let harness = E2EHarness::new().await; -// ── New push triggers re-review ──────────────────────────────── + raw_ipc( + harness.port, + "repo/watch", + json!({ "owner": "test-org", "repo": "test-repo" }), + ) + .await; -#[tokio::test] -async fn e2e_new_push_triggers_rereview() { - let harness = E2EHarness::new_with_policy(ApprovalPolicy::AutoApprove).await; + let resp = raw_ipc( + harness.port, + "repo/unwatch", + json!({ "owner": "test-org", "repo": "test-repo" }), + ) + .await; + assert!(resp.error.is_none()); - // Add task. - let add_resp = raw_ipc( + // Re-unwatch should now error. + let resp2 = raw_ipc( harness.port, - "task/add", - json!({ - "description": "Monitor PR test-org/test-repo#1", - "auto_approve": true - }), + "repo/unwatch", + json!({ "owner": "test-org", "repo": "test-repo" }), ) .await; - assert!(add_resp.error.is_none()); + assert!(resp2.error.is_some()); - // First poll — initial review. - harness.advance_polls(1).await; - let reviews_after_first = harness.github.posted_reviews().len(); - assert_eq!(reviews_after_first, 1); + harness.stop().await; +} + +#[tokio::test] +async fn checkpoint_recovery_round_trips_fs() { + let tmp = tempfile::tempdir().unwrap(); + let data_dir = tmp.path().to_path_buf(); - // Set task to Idle so it checks for new SHAs. { - let mut reg = harness.ctx.tasks.lock().await; - if let Some(task) = reg.get_mut("t-1") { - task.set_status(devdev_tasks::TaskStatus::Idle); + let config = DaemonConfig { + data_dir: data_dir.clone(), + checkpoint_on_stop: true, + foreground: true, + }; + let daemon = Daemon::start(config, false).await.unwrap(); + { + let mut fs = daemon.fs.lock().await; + fs.write_path(b"/marker.txt", b"phase1").unwrap(); } + daemon.save_checkpoint().await.unwrap(); + daemon.stop().await.unwrap(); } - // No change — poll should be quiet. - harness.advance_polls(1).await; - assert_eq!(harness.github.posted_reviews().len(), reviews_after_first); - - // Simulate new push. - harness - .github - .update_head_sha("test-org", "test-repo", 1, "sha-new-push-002"); - - // Poll again — should detect new SHA and re-review. - harness.advance_polls(1).await; - assert!( - harness.github.posted_reviews().len() > reviews_after_first, - "new push should trigger re-review" - ); - - harness.stop().await; + { + let config = DaemonConfig { + data_dir: data_dir.clone(), + checkpoint_on_stop: false, + foreground: true, + }; + let daemon = Daemon::start(config, true).await.unwrap(); + let fs = daemon.fs.lock().await; + let data = fs.read_path(b"/marker.txt").unwrap(); + assert_eq!(data, b"phase1"); + } } // ── Helpers ──────────────────────────────────────────────────── -/// Send a raw IPC request to the daemon. async fn raw_ipc( port: u16, method: &str, diff --git a/crates/devdev-integrations/src/github.rs b/crates/devdev-integrations/src/github.rs index 0531402..84900db 100644 --- a/crates/devdev-integrations/src/github.rs +++ b/crates/devdev-integrations/src/github.rs @@ -326,4 +326,34 @@ impl GitHubAdapter for LiveGitHubAdapter { let value: serde_json::Value = self.get_json(&url).await?; Ok(value["head"]["sha"].as_str().unwrap_or("").to_string()) } + + async fn list_open_prs( + &self, + owner: &str, + repo: &str, + ) -> Result, GitHubError> { + let mut all = Vec::new(); + let mut page = 1u32; + let max_pages = 10u32; + loop { + let url = format!( + "{API_BASE}/repos/{owner}/{repo}/pulls?state=open&per_page=100&page={page}" + ); + let value: serde_json::Value = self.get_json(&url).await?; + let arr = value + .as_array() + .ok_or_else(|| GitHubError::Deserialize("expected array".into()))?; + if arr.is_empty() { + break; + } + for item in arr { + all.push(parse_pr(item.clone())?); + } + page += 1; + if page > max_pages { + break; + } + } + Ok(all) + } } diff --git a/crates/devdev-integrations/src/github_mock.rs b/crates/devdev-integrations/src/github_mock.rs index a20c19e..92e96a5 100644 --- a/crates/devdev-integrations/src/github_mock.rs +++ b/crates/devdev-integrations/src/github_mock.rs @@ -212,4 +212,24 @@ impl GitHubAdapter for MockGitHubAdapter { .map(|pr| pr.head_sha.clone()) .ok_or_else(|| GitHubError::NotFound(format!("{owner}/{repo}#{number}"))) } + + async fn list_open_prs( + &self, + owner: &str, + repo: &str, + ) -> Result, GitHubError> { + let overrides = self.sha_overrides.lock().unwrap().clone(); + let mut out = Vec::new(); + for ((o, r, n), pr) in &self.prs { + if o == owner && r == repo && matches!(pr.state, PrState::Open) { + let mut pr = pr.clone(); + if let Some(sha) = overrides.get(&(o.clone(), r.clone(), *n)) { + pr.head_sha = sha.clone(); + } + out.push(pr); + } + } + out.sort_by_key(|p| p.number); + Ok(out) + } } diff --git a/crates/devdev-integrations/src/lib.rs b/crates/devdev-integrations/src/lib.rs index 4cdde6d..793c083 100644 --- a/crates/devdev-integrations/src/lib.rs +++ b/crates/devdev-integrations/src/lib.rs @@ -74,4 +74,21 @@ pub trait GitHubAdapter: Send + Sync { repo: &str, number: u64, ) -> Result; + + /// List open PRs in a repo. Adapters paginate internally; callers + /// receive the flat union. Used by `RepoWatchTask` to discover + /// new PRs without webhooks. + async fn list_open_prs(&self, owner: &str, repo: &str) + -> Result, GitHubError>; +} + +/// Stable fingerprint of a PR's reviewable state. Used as a ledger +/// `state_hash` to dedup re-reviews. We hash `head_sha + updated_at` +/// so a force-push *and* a metadata-only edit both bump the key. +pub fn pr_state_hash(pr: &PullRequest) -> String { + use std::hash::{Hash, Hasher}; + let mut h = std::collections::hash_map::DefaultHasher::new(); + pr.head_sha.hash(&mut h); + pr.updated_at.hash(&mut h); + format!("sha:{:x}", h.finish()) } diff --git a/crates/devdev-scenarios/catalog/S07-repo-watch-task-lifecycle.md b/crates/devdev-scenarios/catalog/S07-repo-watch-task-lifecycle.md new file mode 100644 index 0000000..2e0e540 --- /dev/null +++ b/crates/devdev-scenarios/catalog/S07-repo-watch-task-lifecycle.md @@ -0,0 +1,57 @@ +--- +id: S07 +title: Repo watch task lifecycle +status: ready +blocked-on: [] +--- + +# S07 — Repo watch task lifecycle + +**User story.** A user runs `devdev up`, then `devdev repo watch +owner/repo` to point DevDev at a GitHub repository. The daemon +spins up a `RepoWatchTask`. Later they run `devdev repo unwatch +owner/repo`; the task disappears. Everything stays inside the +data dir. + +This scenario validates the IPC plumbing and task lifecycle for +the repo-watch feature against the **mock** GitHub adapter — no +real API calls. Actual PR-event delivery and re-prompting of +agents is covered by `crates/devdev-daemon/tests/e2e_pr_shepherding.rs`. + +## Steps + +1. Run `devdev up --data-dir --github mock`, wait for the + daemon. +2. IPC `repo/watch` with `{owner: "fake", repo: "repo"}`. +3. IPC `status`. Capture task count. +4. IPC `repo/watch` again with the same params (should be + idempotent). +5. IPC `status`. Task count must be unchanged. +6. IPC `repo/unwatch` with the same params. +7. IPC `status`. Task count back to baseline. +8. `devdev down`. + +## Assertions + +* Step 2 returns `{ task_id: , already_watching: false }`. +* Step 4 returns `already_watching: true` and the **same** `task_id`. +* Step 5's task count equals step 3's task count (idempotency). +* Step 6 returns the same `task_id` it cancelled. +* Step 7's task count is back to baseline (≤ step 3's count − 1). +* The outer scratch is host-confined: every diff lives inside + `data_dir`. + +## Guards against + +* A regression in `dispatch::handle_repo_watch`/`handle_repo_unwatch` + that breaks idempotency. +* A future change that leaks repo state outside the data dir. +* An unwatch that silently leaves the task running. + +## Notes + +This scenario does **not** exercise event delivery, MonitorPrTask +spawning, or `devdev_ask`. Those paths have unit and e2e coverage +in the `devdev-daemon` and `devdev-tasks` crates. S07's job is to +prove the user-surface plumbing is wired end-to-end through the +real binary. diff --git a/crates/devdev-scenarios/tests/scenarios.rs b/crates/devdev-scenarios/tests/scenarios.rs index d9db39b..efa9389 100644 --- a/crates/devdev-scenarios/tests/scenarios.rs +++ b/crates/devdev-scenarios/tests/scenarios.rs @@ -202,3 +202,99 @@ async fn s06_checkpoint_missing_is_fresh_start() { let _ = daemon.status().await.expect("status"); daemon.shutdown().expect("devdev down"); } + +// ── S07 ───────────────────────────────────────────────────────── + +/// S07 — Repo watch task lifecycle. +/// See: crates/devdev-scenarios/catalog/S07-repo-watch-task-lifecycle.md +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn s07_repo_watch_task_lifecycle() { + let scratch = Scratch::new(); + let before = DirSnapshot::capture(scratch.outer()).expect("snapshot before"); + + let mut daemon = DaemonProcess::spawn(&scratch.data_dir, false).expect("devdev up"); + + // Helper closures via local async block — keeps the assertions + // close to the steps they correspond to in the catalog file. + let task_count = |v: &serde_json::Value| -> u64 { + v.get("tasks") + .and_then(|t| t.as_u64()) + .expect("status.tasks is u64") + }; + + let baseline = task_count(&daemon.status().await.expect("status baseline")); + + // Step 2: repo/watch (first call → newly watched). + let mut client = daemon.connect().await.expect("connect 1"); + let resp = client + .request( + "repo/watch", + serde_json::json!({ "owner": "fake", "repo": "repo" }), + ) + .await + .expect("repo/watch 1"); + let result1 = resp.result.expect("repo/watch result 1"); + assert_eq!(result1["already_watching"], serde_json::json!(false)); + let task_id = result1["task_id"] + .as_str() + .expect("task_id string") + .to_string(); + assert!(!task_id.is_empty(), "task_id is non-empty"); + + let after_watch = task_count(&daemon.status().await.expect("status after watch")); + assert_eq!(after_watch, baseline + 1, "watch must add exactly one task"); + + // Step 4: idempotent repo/watch (same params → already_watching). + let mut client = daemon.connect().await.expect("connect 2"); + let resp = client + .request( + "repo/watch", + serde_json::json!({ "owner": "fake", "repo": "repo" }), + ) + .await + .expect("repo/watch 2"); + let result2 = resp.result.expect("repo/watch result 2"); + assert_eq!(result2["already_watching"], serde_json::json!(true)); + assert_eq!( + result2["task_id"].as_str(), + Some(task_id.as_str()), + "idempotent watch returns same task_id" + ); + + let after_watch2 = task_count(&daemon.status().await.expect("status after watch 2")); + assert_eq!( + after_watch2, after_watch, + "duplicate watch must not spawn a second task" + ); + + // Step 6: unwatch. + let mut client = daemon.connect().await.expect("connect 3"); + let resp = client + .request( + "repo/unwatch", + serde_json::json!({ "owner": "fake", "repo": "repo" }), + ) + .await + .expect("repo/unwatch"); + let result3 = resp.result.expect("repo/unwatch result"); + assert_eq!( + result3["task_id"].as_str(), + Some(task_id.as_str()), + "unwatch returns the cancelled task_id" + ); + + // Step 7: task gone. The registry may keep the entry around in + // a cancelled state depending on its bookkeeping, so we assert + // ≤ baseline rather than a strict equality. The semantic + // contract is "no longer running"; the wire shape is internal. + let after_unwatch = task_count(&daemon.status().await.expect("status after unwatch")); + assert!( + after_unwatch <= after_watch, + "unwatch must not increase task count: was {after_watch}, now {after_unwatch}" + ); + + daemon.shutdown().expect("devdev down"); + + let after = DirSnapshot::capture(scratch.outer()).expect("snapshot after"); + assert_confined(scratch.outer(), &scratch.data_dir, &before, &after); +} diff --git a/crates/devdev-tasks/src/agent.rs b/crates/devdev-tasks/src/agent.rs new file mode 100644 index 0000000..55753df --- /dev/null +++ b/crates/devdev-tasks/src/agent.rs @@ -0,0 +1,15 @@ +//! `AgentRunner` — the seam between a task and the agent. +//! +//! Tasks need to ask the agent something (review this PR, summarize +//! this diff) without taking a hard dependency on the daemon's +//! [`SessionRouter`]. This trait is the seam: implementors run a +//! prompt to completion and return the assistant's reply text. + +use async_trait::async_trait; + +#[async_trait] +pub trait AgentRunner: Send + Sync { + /// Run a single prompt to completion. Returns the agent's reply + /// text, or a human-readable error string. + async fn run_prompt(&self, prompt: String) -> Result; +} diff --git a/crates/devdev-tasks/src/events.rs b/crates/devdev-tasks/src/events.rs new file mode 100644 index 0000000..03786a9 --- /dev/null +++ b/crates/devdev-tasks/src/events.rs @@ -0,0 +1,145 @@ +//! Internal daemon event bus — see also `devdev-daemon/src/events.rs` +//! historical home. +//! +//! Lives in `devdev-tasks` because tasks need to publish events +//! without taking a daemon dependency. The bus is a thin +//! `tokio::sync::broadcast` wrapper — see [`EventBus::publish`]. + +use serde::{Deserialize, Serialize}; +use tokio::sync::broadcast; + +const CHANNEL_CAPACITY: usize = 1024; + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(tag = "kind", rename_all = "snake_case")] +pub enum DaemonEvent { + PrOpened { + owner: String, + repo: String, + number: u64, + head_sha: String, + }, + PrUpdated { + owner: String, + repo: String, + number: u64, + head_sha: String, + }, + PrClosed { + owner: String, + repo: String, + number: u64, + merged: bool, + }, +} + +impl DaemonEvent { + /// `(owner, repo, number)` — subscribers filter the broadcast on + /// this tuple to scope to a single PR. + pub fn pr_target(&self) -> Option<(&str, &str, u64)> { + match self { + DaemonEvent::PrOpened { + owner, + repo, + number, + .. + } + | DaemonEvent::PrUpdated { + owner, + repo, + number, + .. + } + | DaemonEvent::PrClosed { + owner, + repo, + number, + .. + } => Some((owner.as_str(), repo.as_str(), *number)), + } + } +} + +#[derive(Debug, Clone)] +pub struct EventBus { + tx: broadcast::Sender, +} + +impl EventBus { + pub fn new() -> Self { + let (tx, _rx) = broadcast::channel(CHANNEL_CAPACITY); + Self { tx } + } + + /// Publish an event. Returns the number of receivers reached. + /// A bus with no subscribers is normal, not a failure. + pub fn publish(&self, event: DaemonEvent) -> usize { + self.tx.send(event).unwrap_or(0) + } + + pub fn subscribe(&self) -> broadcast::Receiver { + self.tx.subscribe() + } + + pub fn subscriber_count(&self) -> usize { + self.tx.receiver_count() + } +} + +impl Default for EventBus { + fn default() -> Self { + Self::new() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn opened(n: u64) -> DaemonEvent { + DaemonEvent::PrOpened { + owner: "o".into(), + repo: "r".into(), + number: n, + head_sha: format!("sha{n}"), + } + } + + #[tokio::test] + async fn publish_no_subscribers_is_ok() { + let bus = EventBus::new(); + assert_eq!(bus.publish(opened(1)), 0); + } + + #[tokio::test] + async fn one_subscriber_receives() { + let bus = EventBus::new(); + let mut rx = bus.subscribe(); + bus.publish(opened(1)); + let got = rx.recv().await.unwrap(); + assert_eq!(got, opened(1)); + } + + #[tokio::test] + async fn two_subscribers_each_receive() { + let bus = EventBus::new(); + let mut a = bus.subscribe(); + let mut b = bus.subscribe(); + bus.publish(opened(1)); + assert_eq!(a.recv().await.unwrap(), opened(1)); + assert_eq!(b.recv().await.unwrap(), opened(1)); + } + + #[tokio::test] + async fn pr_target_extracts_tuple() { + let e = opened(42); + assert_eq!(e.pr_target(), Some(("o", "r", 42))); + let c = DaemonEvent::PrClosed { + owner: "o".into(), + repo: "r".into(), + number: 42, + merged: true, + }; + assert_eq!(c.pr_target(), Some(("o", "r", 42))); + } +} diff --git a/crates/devdev-tasks/src/ledger.rs b/crates/devdev-tasks/src/ledger.rs new file mode 100644 index 0000000..c80d717 --- /dev/null +++ b/crates/devdev-tasks/src/ledger.rs @@ -0,0 +1,50 @@ +//! Idempotency ledger trait — shared abstraction for "have we seen +//! this state before?" +//! +//! See [`crates/devdev-daemon/src/ledger.rs`](../../../devdev-daemon/src/ledger.rs) +//! for the file-backed implementation. The trait lives here because +//! tasks (which can't depend on the daemon crate) consume it. + +use serde::{Deserialize, Serialize}; +use std::time::Duration; + +/// Logical key into the ledger. Equality is by all four fields. +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub struct LedgerKey { + pub adapter: String, + pub resource_type: String, + pub resource_id: String, + pub state_hash: String, +} + +impl LedgerKey { + pub fn new( + adapter: impl Into, + resource_type: impl Into, + resource_id: impl Into, + state_hash: impl Into, + ) -> Self { + Self { + adapter: adapter.into(), + resource_type: resource_type.into(), + resource_id: resource_id.into(), + state_hash: state_hash.into(), + } + } +} + +/// Backend-agnostic ledger errors. +#[derive(thiserror::Error, Debug)] +pub enum LedgerError { + #[error("ledger I/O error: {0}")] + Io(#[from] std::io::Error), + #[error("ledger format error: {0}")] + Format(String), +} + +/// Abstract durable "have we seen this state?" store. +pub trait IdempotencyLedger: Send + Sync { + fn seen(&self, key: &LedgerKey) -> Result; + fn record(&self, key: &LedgerKey, metadata: serde_json::Value) -> Result<(), LedgerError>; + fn prune(&self, older_than: Duration) -> Result; +} diff --git a/crates/devdev-tasks/src/lib.rs b/crates/devdev-tasks/src/lib.rs index 600af19..1d75c2e 100644 --- a/crates/devdev-tasks/src/lib.rs +++ b/crates/devdev-tasks/src/lib.rs @@ -3,20 +3,28 @@ //! Manages long-lived background work. A task is a unit of ongoing activity //! that polls on a schedule, reacts to changes, and produces output. +pub mod agent; pub mod approval; +pub mod events; +pub mod ledger; pub mod monitor_pr; pub mod pr_ref; pub mod registry; +pub mod repo_watch; pub mod review; pub mod scheduler; pub mod task; +pub use agent::AgentRunner; pub use approval::{ ApprovalError, ApprovalGate, ApprovalPolicy, ApprovalRequest, ApprovalResponse, }; +pub use events::{DaemonEvent, EventBus}; +pub use ledger::{IdempotencyLedger, LedgerError, LedgerKey}; pub use monitor_pr::MonitorPrTask; pub use pr_ref::PrRef; pub use registry::TaskRegistry; +pub use repo_watch::RepoWatchTask; pub use review::{ParsedReview, parse_review}; pub use scheduler::TaskScheduler; pub use task::{Task, TaskError, TaskMessage, TaskStatus}; diff --git a/crates/devdev-tasks/src/monitor_pr.rs b/crates/devdev-tasks/src/monitor_pr.rs index 02f1cc9..18037ad 100644 --- a/crates/devdev-tasks/src/monitor_pr.rs +++ b/crates/devdev-tasks/src/monitor_pr.rs @@ -1,29 +1,29 @@ -//! MonitorPR task: monitors a GitHub PR, reviews it, watches for changes. - +//! `MonitorPrTask` — event-driven per-PR shepherd. +//! +//! Subscribes to the daemon [`EventBus`] on construction and filters +//! to its own `(owner, repo, number)` triple. On `PrOpened` / +//! `PrUpdated` it re-prompts the agent via [`AgentRunner`] with the +//! current diff plus any caller-supplied preference-file paths. On +//! `PrClosed` the task completes. +//! +//! There is no longer any `post_review`/`ApprovalGate` plumbing here. +//! Posting is the agent's job — it runs `gh` itself, gated by the +//! `devdev_ask` MCP tool which carries approval and a short-lived +//! token. + +use std::path::PathBuf; use std::sync::Arc; use std::time::Duration; -use devdev_integrations::{GitHubAdapter, Review, ReviewComment, ReviewEvent}; -use tokio::sync::Mutex; +use devdev_integrations::GitHubAdapter; +use tokio::sync::broadcast::{Receiver, error::TryRecvError}; -use crate::approval::{ApprovalError, ApprovalGate}; +use crate::agent::AgentRunner; +use crate::events::{DaemonEvent, EventBus}; use crate::pr_ref::PrRef; -use crate::review::parse_review; use crate::task::{Task, TaskError, TaskMessage, TaskStatus}; -/// Callback for getting agent reviews. Takes a prompt, returns review text. -/// This is how MonitorPrTask interacts with the agent without depending on -/// the daemon crate's SessionHandle directly. -pub type ReviewFn = Arc< - dyn Fn( - String, - ) - -> std::pin::Pin> + Send>> - + Send - + Sync, ->; - -/// A task that monitors a single GitHub PR. +/// A task that watches a single GitHub PR for changes via the bus. pub struct MonitorPrTask { id: String, pr_ref: PrRef, @@ -32,8 +32,11 @@ pub struct MonitorPrTask { observations: Vec, poll_interval: Duration, github: Arc, - approval: Arc>, - review_fn: ReviewFn, + runner: Arc, + rx: Receiver, + /// Paths to `.devdev/*.md` preference files (Vibe Check, Phase D). + /// Injected verbatim into the prompt; agent reads them on demand. + preference_paths: Vec, } impl MonitorPrTask { @@ -41,8 +44,8 @@ impl MonitorPrTask { id: String, pr_ref_str: &str, github: Arc, - approval: Arc>, - review_fn: ReviewFn, + runner: Arc, + bus: &EventBus, ) -> Result { let pr_ref = PrRef::parse(pr_ref_str)?; Ok(Self { @@ -53,8 +56,9 @@ impl MonitorPrTask { observations: Vec::new(), poll_interval: Duration::from_secs(60), github, - approval, - review_fn, + runner, + rx: bus.subscribe(), + preference_paths: Vec::new(), }) } @@ -63,20 +67,51 @@ impl MonitorPrTask { self } + pub fn with_preferences(mut self, paths: Vec) -> Self { + self.preference_paths = paths; + self + } + pub fn pr_ref(&self) -> &PrRef { &self.pr_ref } - /// Build the prompt for the agent. - fn build_prompt(&self, pr_title: &str, pr_body: &str, diff: &str) -> String { + pub fn observations(&self) -> &[String] { + &self.observations + } + + /// Whether an event targets this task's PR. + fn matches(&self, ev: &DaemonEvent) -> bool { + match ev.pr_target() { + Some((o, r, n)) => { + o == self.pr_ref.owner && r == self.pr_ref.repo && n == self.pr_ref.number + } + None => false, + } + } + + fn build_prompt(&self, kind: &str, pr_title: &str, pr_body: &str, diff: &str) -> String { let mut prompt = format!( - "You are reviewing PR #{} in {}/{}.\n\ + "PR {} was {} (head_sha={}).\n\ Title: {}\n\ Description: {}\n\n\ Diff:\n```\n{}\n```\n\n", - self.pr_ref.number, self.pr_ref.owner, self.pr_ref.repo, pr_title, pr_body, diff, + self.pr_ref, + kind, + self.last_sha.as_deref().unwrap_or("?"), + pr_title, + pr_body, + diff, ); + if !self.preference_paths.is_empty() { + prompt.push_str("Preference files (read on demand):\n"); + for p in &self.preference_paths { + prompt.push_str(&format!("- {}\n", p.display())); + } + prompt.push('\n'); + } + if !self.observations.is_empty() { prompt.push_str("Prior observations:\n"); for obs in &self.observations { @@ -86,120 +121,81 @@ impl MonitorPrTask { } prompt.push_str( - "Review this PR. For inline comments, use the format [file:line] comment.\n\ - Provide a summary of your findings.", + "Review this PR. To post a comment or review, call the \ + `devdev_ask` tool with `kind=post_review`; on approval \ + you'll receive a short-lived token to run `gh` yourself.", ); prompt } - async fn do_review(&mut self) -> Result, TaskError> { + async fn handle_event(&mut self, ev: DaemonEvent) -> Result, TaskError> { + match ev { + DaemonEvent::PrClosed { merged, .. } => { + self.status = TaskStatus::Completed; + Ok(vec![TaskMessage::Text(format!( + "PR {} closed (merged={merged}).", + self.pr_ref + ))]) + } + DaemonEvent::PrOpened { head_sha, .. } => { + self.last_sha = Some(head_sha); + self.do_prompt("opened").await + } + DaemonEvent::PrUpdated { head_sha, .. } => { + self.last_sha = Some(head_sha); + self.do_prompt("updated").await + } + } + } + + async fn do_prompt(&mut self, kind: &str) -> Result, TaskError> { let o = &self.pr_ref.owner; let r = &self.pr_ref.repo; let n = self.pr_ref.number; - // Fetch PR info. let pr = self .github .get_pr(o, r, n) .await - .map_err(|e| TaskError::PollFailed(format!("failed to fetch PR: {e}")))?; - - // Check if merged/closed. - match pr.state { - devdev_integrations::PrState::Merged | devdev_integrations::PrState::Closed => { - self.status = TaskStatus::Completed; - return Ok(vec![TaskMessage::Text(format!( - "PR {} is now {:?}.", - self.pr_ref, pr.state - ))]); - } - _ => {} + .map_err(|e| TaskError::PollFailed(format!("get_pr: {e}")))?; + + if matches!( + pr.state, + devdev_integrations::PrState::Closed | devdev_integrations::PrState::Merged + ) { + self.status = TaskStatus::Completed; + return Ok(vec![TaskMessage::Text(format!( + "PR {} is now {:?}.", + self.pr_ref, pr.state + ))]); } - // Update SHA. - self.last_sha = Some(pr.head_sha.clone()); - - // Fetch diff. let diff = self .github .get_pr_diff(o, r, n) .await - .map_err(|e| TaskError::PollFailed(format!("failed to fetch diff: {e}")))?; + .map_err(|e| TaskError::PollFailed(format!("get_pr_diff: {e}")))?; - // Build prompt and get review. - let prompt = self.build_prompt(&pr.title, pr.body.as_deref().unwrap_or(""), &diff); - - let review_text = (self.review_fn)(prompt) + let prompt = self.build_prompt(kind, &pr.title, pr.body.as_deref().unwrap_or(""), &diff); + let reply = self + .runner + .run_prompt(prompt) .await - .map_err(|e| TaskError::PollFailed(format!("agent review failed: {e}")))?; - - // Parse review. - let parsed = parse_review(&review_text); + .map_err(|e| TaskError::PollFailed(format!("agent: {e}")))?; - // Store observation. - let summary = if parsed.body.len() > 200 { - format!("{}...", &parsed.body[..200]) + let summary = if reply.len() > 200 { + format!("{}…", &reply[..200]) } else { - parsed.body.clone() + reply.clone() }; self.observations.push(summary); + self.status = TaskStatus::Idle; - // Build GitHub Review. - let review = Review { - event: ReviewEvent::Comment, - body: parsed.body.clone(), - comments: parsed - .comments - .iter() - .map(|c| ReviewComment { - path: c.path.clone(), - line: c.line, - body: c.body.clone(), - }) - .collect(), - }; - - // Request approval to post. - let approval_result = { - let mut gate = self.approval.lock().await; - gate.request_approval( - "post_review", - serde_json::json!({ - "repo": format!("{o}/{r}"), - "pr": n, - "comments": review.comments.len(), - }), - ) - .await - }; - - match approval_result { - Ok(()) => { - self.github - .post_review(o, r, n, review) - .await - .map_err(|e| TaskError::PollFailed(format!("failed to post review: {e}")))?; - - Ok(vec![TaskMessage::Text(format!( - "Review posted for {}:\n{review_text}", - self.pr_ref - ))]) - } - Err(ApprovalError::DryRun { .. }) => Ok(vec![TaskMessage::Text(format!( - "[dry-run] Would post review for {}:\n{review_text}", - self.pr_ref - ))]), - Err(ApprovalError::Rejected) => Ok(vec![TaskMessage::Text(format!( - "Review rejected by user for {}:\n{review_text}", - self.pr_ref - ))]), - Err(ApprovalError::Timeout) => Ok(vec![TaskMessage::Text(format!( - "Approval timed out for {}. Review not posted.", - self.pr_ref - ))]), - Err(e) => Err(TaskError::PollFailed(format!("approval error: {e}"))), - } + Ok(vec![TaskMessage::Text(format!( + "PR {} ({kind}) → agent reply:\n{reply}", + self.pr_ref + ))]) } } @@ -226,33 +222,29 @@ impl Task for MonitorPrTask { return Ok(vec![]); } - match &self.status { - TaskStatus::Created | TaskStatus::Polling => { - // First review. - self.do_review().await - } - TaskStatus::Idle => { - // Check for new commits. - let o = &self.pr_ref.owner; - let r = &self.pr_ref.repo; - let n = self.pr_ref.number; - - let current_sha = self - .github - .get_pr_head_sha(o, r, n) - .await - .map_err(|e| TaskError::PollFailed(format!("failed to check SHA: {e}")))?; - - if self.last_sha.as_deref() == Some(¤t_sha) { - // No change. - return Ok(vec![]); + let mut messages = Vec::new(); + loop { + match self.rx.try_recv() { + Ok(ev) => { + if self.matches(&ev) { + let mut m = self.handle_event(ev).await?; + messages.append(&mut m); + if self.status.is_terminal() { + break; + } + } + } + Err(TryRecvError::Empty) => break, + Err(TryRecvError::Closed) => break, + Err(TryRecvError::Lagged(n)) => { + messages.push(TaskMessage::Text(format!( + "[warning] event bus lagged {n} messages — possible missed events for {}", + self.pr_ref + ))); } - - // New commits — re-review. - self.do_review().await } - _ => Ok(vec![]), } + Ok(messages) } fn serialize(&self) -> Result { diff --git a/crates/devdev-tasks/src/repo_watch.rs b/crates/devdev-tasks/src/repo_watch.rs new file mode 100644 index 0000000..185e1c8 --- /dev/null +++ b/crates/devdev-tasks/src/repo_watch.rs @@ -0,0 +1,424 @@ +//! `RepoWatchTask` — observes a GitHub repo and emits PR events. +//! +//! The polling counterpart to a webhook receiver. Each tick: +//! +//! 1. List open PRs via the [`GitHubAdapter`]. +//! 2. For each PR, compute a state hash (head_sha + updated_at) and +//! consult the [`IdempotencyLedger`]. If we've seen this exact +//! state, skip — we already published the corresponding event. +//! 3. For new states, publish [`DaemonEvent::PrOpened`] (first time +//! we've seen this PR number) or [`DaemonEvent::PrUpdated`] (we +//! knew the PR but its hash moved). Then record in the ledger. +//! 4. PRs that disappeared since the last poll are reported via +//! [`DaemonEvent::PrClosed`]. +//! +//! State carried across polls is intentionally minimal: a map of +//! `pr_number → state_hash` for open/close detection. Cross-restart +//! dedup is the ledger's job, not ours. + +use std::collections::HashMap; +use std::sync::Arc; +use std::time::Duration; + +use devdev_integrations::{GitHubAdapter, pr_state_hash}; + +use crate::events::{DaemonEvent, EventBus}; +use crate::ledger::{IdempotencyLedger, LedgerKey}; +use crate::task::{Task, TaskError, TaskMessage, TaskStatus}; + +const ADAPTER: &str = "github"; +const RESOURCE_TYPE: &str = "pr_state"; + +/// A task that polls a single repo's open PRs and emits events. +pub struct RepoWatchTask { + id: String, + owner: String, + repo: String, + /// `pr_number → state_hash` for the most recent poll. + last_seen: HashMap, + poll_interval: Duration, + status: TaskStatus, + github: Arc, + ledger: Arc, + bus: EventBus, +} + +impl RepoWatchTask { + pub fn new( + id: String, + owner: impl Into, + repo: impl Into, + github: Arc, + ledger: Arc, + bus: EventBus, + ) -> Self { + Self { + id, + owner: owner.into(), + repo: repo.into(), + last_seen: HashMap::new(), + poll_interval: Duration::from_secs(60), + status: TaskStatus::Created, + github, + ledger, + bus, + } + } + + pub fn with_interval(mut self, interval: Duration) -> Self { + self.poll_interval = interval; + self + } + + pub fn owner(&self) -> &str { + &self.owner + } + + pub fn repo(&self) -> &str { + &self.repo + } + + fn resource_id(&self, number: u64) -> String { + format!("{}/{}#{}", self.owner, self.repo, number) + } + + async fn do_poll(&mut self) -> Result, TaskError> { + let prs = self + .github + .list_open_prs(&self.owner, &self.repo) + .await + .map_err(|e| TaskError::PollFailed(format!("list_open_prs: {e}")))?; + + let mut messages = Vec::new(); + let mut current: HashMap = HashMap::new(); + + for pr in &prs { + let hash = pr_state_hash(pr); + current.insert(pr.number, hash.clone()); + + let key = LedgerKey::new(ADAPTER, RESOURCE_TYPE, self.resource_id(pr.number), &hash); + + // Ledger consult — if we've published this exact state + // before (across the lifetime of the daemon), skip. + let already = self + .ledger + .seen(&key) + .map_err(|e| TaskError::PollFailed(format!("ledger.seen: {e}")))?; + if already { + continue; + } + + let event = if self.last_seen.contains_key(&pr.number) { + DaemonEvent::PrUpdated { + owner: self.owner.clone(), + repo: self.repo.clone(), + number: pr.number, + head_sha: pr.head_sha.clone(), + } + } else { + DaemonEvent::PrOpened { + owner: self.owner.clone(), + repo: self.repo.clone(), + number: pr.number, + head_sha: pr.head_sha.clone(), + } + }; + self.bus.publish(event); + self.ledger + .record( + &key, + serde_json::json!({ + "head_sha": pr.head_sha, + "updated_at": pr.updated_at, + }), + ) + .map_err(|e| TaskError::PollFailed(format!("ledger.record: {e}")))?; + + messages.push(TaskMessage::Text(format!( + "{} #{} state {}", + self.resource_id(pr.number), + pr.number, + if self.last_seen.contains_key(&pr.number) { + "updated" + } else { + "opened" + } + ))); + } + + // PrClosed: anything in last_seen but not in current. + for (number, _) in self.last_seen.iter() { + if !current.contains_key(number) { + // We can't know merged-vs-closed cheaply without an + // extra `get_pr` call. Best-effort: assume closed + // (mergeable=false) and let MonitorPrTask resolve via + // its own get_pr_status if it cares. + let event = DaemonEvent::PrClosed { + owner: self.owner.clone(), + repo: self.repo.clone(), + number: *number, + merged: false, + }; + self.bus.publish(event); + messages.push(TaskMessage::Text(format!( + "{} closed", + self.resource_id(*number) + ))); + } + } + + self.last_seen = current; + self.status = TaskStatus::Idle; + Ok(messages) + } +} + +#[async_trait::async_trait] +impl Task for RepoWatchTask { + fn id(&self) -> &str { + &self.id + } + + fn describe(&self) -> String { + format!("Watching {}/{} for PR events", self.owner, self.repo) + } + + fn status(&self) -> &TaskStatus { + &self.status + } + + fn set_status(&mut self, status: TaskStatus) { + self.status = status; + } + + async fn poll(&mut self) -> Result, TaskError> { + if self.status.is_terminal() { + return Ok(vec![]); + } + self.do_poll().await + } + + fn serialize(&self) -> Result { + Ok(serde_json::json!({ + "id": self.id, + "owner": self.owner, + "repo": self.repo, + "last_seen": self.last_seen, + "poll_interval_secs": self.poll_interval.as_secs(), + })) + } + + fn task_type(&self) -> &str { + "repo_watch" + } + + fn poll_interval(&self) -> Duration { + self.poll_interval + } +} + +#[cfg(test)] +mod tests { + use super::*; + use devdev_integrations::{MockGitHubAdapter, PrState, PullRequest}; + + /// In-memory test ledger. + #[derive(Default)] + struct MemLedger { + seen: std::sync::Mutex>, + } + impl IdempotencyLedger for MemLedger { + fn seen(&self, key: &LedgerKey) -> Result { + Ok(self.seen.lock().unwrap().contains(key)) + } + fn record( + &self, + key: &LedgerKey, + _meta: serde_json::Value, + ) -> Result<(), crate::ledger::LedgerError> { + self.seen.lock().unwrap().insert(key.clone()); + Ok(()) + } + fn prune(&self, _older_than: Duration) -> Result { + Ok(0) + } + } + + fn pr(number: u64, sha: &str, updated: &str) -> PullRequest { + PullRequest { + number, + title: format!("PR {number}"), + author: "test".into(), + state: PrState::Open, + head_sha: sha.into(), + base_sha: "base".into(), + head_ref: "head".into(), + base_ref: "main".into(), + body: None, + created_at: "2026-01-01".into(), + updated_at: updated.into(), + } + } + + fn task() -> ( + RepoWatchTask, + Arc, + Arc, + EventBus, + ) { + let gh = Arc::new(MockGitHubAdapter::new()); + let ledger = Arc::new(MemLedger::default()); + let bus = EventBus::new(); + let t = RepoWatchTask::new( + "t-1".into(), + "o", + "r", + gh.clone() as Arc, + ledger.clone() as Arc, + bus.clone(), + ); + (t, gh, ledger, bus) + } + + #[tokio::test] + async fn empty_repo_emits_nothing() { + let (mut t, _gh, _l, bus) = task(); + let mut rx = bus.subscribe(); + let msgs = t.poll().await.unwrap(); + assert!(msgs.is_empty()); + assert!(rx.try_recv().is_err()); + } + + #[tokio::test] + async fn first_pr_emits_opened() { + let gh = Arc::new(MockGitHubAdapter::new().with_pr("o", "r", pr(1, "sha1", "t1"))); + let ledger = Arc::new(MemLedger::default()); + let bus = EventBus::new(); + let mut rx = bus.subscribe(); + let mut t = RepoWatchTask::new( + "t-1".into(), + "o", + "r", + gh as Arc, + ledger as Arc, + bus, + ); + t.poll().await.unwrap(); + let evt = rx.recv().await.unwrap(); + assert!(matches!(evt, DaemonEvent::PrOpened { number: 1, .. })); + } + + #[tokio::test] + async fn second_poll_no_change_emits_nothing() { + let gh = Arc::new(MockGitHubAdapter::new().with_pr("o", "r", pr(1, "sha1", "t1"))); + let ledger = Arc::new(MemLedger::default()); + let bus = EventBus::new(); + let mut rx = bus.subscribe(); + let mut t = RepoWatchTask::new( + "t-1".into(), + "o", + "r", + gh as Arc, + ledger as Arc, + bus, + ); + t.poll().await.unwrap(); + let _ = rx.recv().await.unwrap(); + // Second poll: same SHA + updated_at → no event. + t.poll().await.unwrap(); + assert!(rx.try_recv().is_err()); + } + + #[tokio::test] + async fn updated_pr_emits_pr_updated() { + let gh = Arc::new(MockGitHubAdapter::new().with_pr("o", "r", pr(1, "sha1", "t1"))); + let ledger = Arc::new(MemLedger::default()); + let bus = EventBus::new(); + let mut rx = bus.subscribe(); + let mut t = RepoWatchTask::new( + "t-1".into(), + "o", + "r", + gh.clone() as Arc, + ledger as Arc, + bus, + ); + t.poll().await.unwrap(); + let _ = rx.recv().await.unwrap(); + // Simulate force-push: head_sha changes via mock override. + gh.update_head_sha("o", "r", 1, "sha2"); + t.poll().await.unwrap(); + let evt = rx.recv().await.unwrap(); + assert!(matches!(evt, DaemonEvent::PrUpdated { number: 1, .. })); + } + + #[tokio::test] + async fn closed_pr_emits_pr_closed() { + // Two adapters: one with PR open, one without. + let gh_open = Arc::new(MockGitHubAdapter::new().with_pr("o", "r", pr(1, "sha1", "t1"))); + let ledger = Arc::new(MemLedger::default()); + let bus = EventBus::new(); + let mut rx = bus.subscribe(); + let mut t = RepoWatchTask::new( + "t-1".into(), + "o", + "r", + gh_open as Arc, + ledger.clone() as Arc, + bus.clone(), + ); + t.poll().await.unwrap(); + let _ = rx.recv().await.unwrap(); + + // Replace adapter with empty one (PR disappeared). + let gh_empty = Arc::new(MockGitHubAdapter::new()); + t.github = gh_empty; + t.poll().await.unwrap(); + let evt = rx.recv().await.unwrap(); + assert!(matches!(evt, DaemonEvent::PrClosed { number: 1, .. })); + } + + #[tokio::test] + async fn ledger_dedups_across_restart() { + let gh = Arc::new(MockGitHubAdapter::new().with_pr("o", "r", pr(1, "sha1", "t1"))); + let ledger = Arc::new(MemLedger::default()); + let bus = EventBus::new(); + let mut rx = bus.subscribe(); + + // First task instance: emits PrOpened. + let mut t1 = RepoWatchTask::new( + "t-1".into(), + "o", + "r", + gh.clone() as Arc, + ledger.clone() as Arc, + bus.clone(), + ); + t1.poll().await.unwrap(); + let _ = rx.recv().await.unwrap(); + drop(t1); + + // Second task instance ("restart"): same ledger, fresh in-memory state. + let mut t2 = RepoWatchTask::new( + "t-1".into(), + "o", + "r", + gh as Arc, + ledger as Arc, + bus, + ); + t2.poll().await.unwrap(); + // Ledger says "seen" — no event. + assert!(rx.try_recv().is_err()); + } + + #[tokio::test] + async fn serialize_round_trips() { + let (mut t, _gh, _l, _bus) = task(); + t.last_seen.insert(1, "hash1".into()); + let v = t.serialize().unwrap(); + assert_eq!(v["owner"], "o"); + assert_eq!(v["repo"], "r"); + assert_eq!(v["last_seen"]["1"], "hash1"); + } +} diff --git a/crates/devdev-tasks/tests/acceptance_monitor_pr.rs b/crates/devdev-tasks/tests/acceptance_monitor_pr.rs index d6afec1..4acc1be 100644 --- a/crates/devdev-tasks/tests/acceptance_monitor_pr.rs +++ b/crates/devdev-tasks/tests/acceptance_monitor_pr.rs @@ -1,15 +1,15 @@ -//! Acceptance tests for P2-07 — MonitorPR Task. +//! Acceptance tests for `MonitorPrTask` — event-driven shepherd. use std::sync::Arc; -use std::time::Duration; +use async_trait::async_trait; use devdev_integrations::{MockGitHubAdapter, PrState, PrStatus, PullRequest}; -use devdev_tasks::approval::{self, ApprovalPolicy, ApprovalResponse}; -use devdev_tasks::monitor_pr::{MonitorPrTask, ReviewFn}; +use devdev_tasks::agent::AgentRunner; +use devdev_tasks::events::{DaemonEvent, EventBus}; +use devdev_tasks::monitor_pr::MonitorPrTask; use devdev_tasks::pr_ref::PrRef; use devdev_tasks::review::parse_review; use devdev_tasks::task::{Task, TaskStatus}; -use tokio::sync::Mutex; fn mock_pr(number: u64, sha: &str) -> PullRequest { PullRequest { @@ -27,14 +27,6 @@ fn mock_pr(number: u64, sha: &str) -> PullRequest { } } -fn fake_review_fn() -> ReviewFn { - Arc::new(|_prompt| { - Box::pin(async { - Ok("Overall looks good.\n[src/config.rs:42] Missing validation for empty strings.\n[src/lib.rs:10] Unused import.".to_string()) - }) - }) -} - fn mock_github(sha: &str) -> MockGitHubAdapter { MockGitHubAdapter::new() .with_pr("org", "repo", mock_pr(247, sha)) @@ -55,6 +47,20 @@ fn mock_github(sha: &str) -> MockGitHubAdapter { ) } +/// Canned agent that records prompts and replies "looks good". +#[derive(Default)] +struct FakeRunner { + seen: tokio::sync::Mutex>, +} + +#[async_trait] +impl AgentRunner for FakeRunner { + async fn run_prompt(&self, prompt: String) -> Result { + self.seen.lock().await.push(prompt); + Ok("Overall looks good.".to_string()) + } +} + // ── PR ref parsing ───────────────────────────────────────────── #[test] @@ -90,149 +96,160 @@ fn parse_structured_review() { assert_eq!(review.comments.len(), 2); assert_eq!(review.comments[0].path, "src/config.rs"); assert_eq!(review.comments[0].line, 42); - assert_eq!(review.comments[1].path, "src/lib.rs"); } -#[test] -fn parse_fallback_body_only() { - let text = "The code looks fine overall. No major issues found."; - let review = parse_review(text); - assert!(review.comments.is_empty()); - assert!(!review.body.is_empty()); -} +// ── MonitorPrTask lifecycle (event-driven) ───────────────────── -#[test] -fn parse_mixed() { - let text = "Summary: looks ok.\n[src/config.rs:42] bad validation\nOther notes."; - let review = parse_review(text); - assert_eq!(review.comments.len(), 1); - assert!(review.body.contains("Summary")); - assert!(review.body.contains("Other notes")); -} +#[tokio::test] +async fn idle_with_no_events_is_quiet() { + let gh: Arc = Arc::new(mock_github("abc123")); + let bus = EventBus::new(); + let runner: Arc = Arc::new(FakeRunner::default()); + let mut task = MonitorPrTask::new("t-1".into(), "org/repo#247", gh, runner, &bus).unwrap(); -// ── MonitorPrTask lifecycle ──────────────────────────────────── + let msgs = task.poll().await.unwrap(); + assert!(msgs.is_empty()); +} #[tokio::test] -async fn first_poll_loads_and_reviews() { - let github: Arc = Arc::new(mock_github("abc123")); - let (gate, _handle) = - approval::approval_channel(ApprovalPolicy::AutoApprove, Duration::from_secs(5)); - let gate = Arc::new(Mutex::new(gate)); - - let mut task = - MonitorPrTask::new("t-1".into(), "org/repo#247", github, gate, fake_review_fn()).unwrap(); +async fn pr_opened_event_triggers_agent_prompt() { + let gh: Arc = Arc::new(mock_github("abc123")); + let bus = EventBus::new(); + let runner = Arc::new(FakeRunner::default()); + let runner_dyn: Arc = runner.clone(); + let mut task = MonitorPrTask::new("t-1".into(), "org/repo#247", gh, runner_dyn, &bus).unwrap(); + + bus.publish(DaemonEvent::PrOpened { + owner: "org".into(), + repo: "repo".into(), + number: 247, + head_sha: "abc123".into(), + }); let msgs = task.poll().await.unwrap(); assert!(!msgs.is_empty()); + let seen = runner.seen.lock().await; + assert_eq!(seen.len(), 1); + assert!(seen[0].contains("opened")); + assert!(seen[0].contains("abc123")); } #[tokio::test] -async fn first_poll_posts_review_when_approved() { - let gh = Arc::new(mock_github("abc123")); - let github: Arc = - Arc::clone(&gh) as Arc; - let (gate, _handle) = - approval::approval_channel(ApprovalPolicy::AutoApprove, Duration::from_secs(5)); - let gate = Arc::new(Mutex::new(gate)); - - let mut task = - MonitorPrTask::new("t-1".into(), "org/repo#247", github, gate, fake_review_fn()).unwrap(); - - let _msgs = task.poll().await.unwrap(); - assert!(!gh.posted_reviews().is_empty()); +async fn pr_updated_event_triggers_agent_prompt() { + let gh: Arc = Arc::new(mock_github("abc123")); + let bus = EventBus::new(); + let runner = Arc::new(FakeRunner::default()); + let runner_dyn: Arc = runner.clone(); + let mut task = MonitorPrTask::new("t-1".into(), "org/repo#247", gh, runner_dyn, &bus).unwrap(); + + bus.publish(DaemonEvent::PrUpdated { + owner: "org".into(), + repo: "repo".into(), + number: 247, + head_sha: "def456".into(), + }); + + task.poll().await.unwrap(); + let seen = runner.seen.lock().await; + assert_eq!(seen.len(), 1); + assert!(seen[0].contains("updated")); } #[tokio::test] -async fn first_poll_skips_post_when_rejected() { - let gh = Arc::new(mock_github("abc123")); - let github: Arc = - Arc::clone(&gh) as Arc; - let (gate, mut handle) = - approval::approval_channel(ApprovalPolicy::Ask, Duration::from_secs(5)); - let gate = Arc::new(Mutex::new(gate)); - - // Respond with reject in background. - tokio::spawn(async move { - let req = handle.request_rx.recv().await.unwrap(); - handle - .response_tx - .send(ApprovalResponse { - id: req.id, - approve: false, - }) - .await - .unwrap(); +async fn pr_closed_event_completes_task() { + let gh: Arc = Arc::new(mock_github("abc123")); + let bus = EventBus::new(); + let runner: Arc = Arc::new(FakeRunner::default()); + let mut task = MonitorPrTask::new("t-1".into(), "org/repo#247", gh, runner, &bus).unwrap(); + + bus.publish(DaemonEvent::PrClosed { + owner: "org".into(), + repo: "repo".into(), + number: 247, + merged: true, }); - let mut task = - MonitorPrTask::new("t-1".into(), "org/repo#247", github, gate, fake_review_fn()).unwrap(); - let msgs = task.poll().await.unwrap(); - assert!(!msgs.is_empty()); // Should have "rejected" message. - assert!(gh.posted_reviews().is_empty()); // No review posted. + assert!(!msgs.is_empty()); + assert_eq!(task.status(), &TaskStatus::Completed); } #[tokio::test] -async fn subsequent_poll_no_change_quiet() { - let github: Arc = Arc::new(mock_github("abc123")); - let (gate, _handle) = - approval::approval_channel(ApprovalPolicy::AutoApprove, Duration::from_secs(5)); - let gate = Arc::new(Mutex::new(gate)); +async fn non_matching_event_is_ignored() { + let gh: Arc = Arc::new(mock_github("abc123")); + let bus = EventBus::new(); + let runner = Arc::new(FakeRunner::default()); + let runner_dyn: Arc = runner.clone(); + let mut task = MonitorPrTask::new("t-1".into(), "org/repo#247", gh, runner_dyn, &bus).unwrap(); + + // Different PR number. + bus.publish(DaemonEvent::PrOpened { + owner: "org".into(), + repo: "repo".into(), + number: 999, + head_sha: "x".into(), + }); - let mut task = - MonitorPrTask::new("t-1".into(), "org/repo#247", github, gate, fake_review_fn()).unwrap(); + let msgs = task.poll().await.unwrap(); + assert!(msgs.is_empty()); + assert!(runner.seen.lock().await.is_empty()); +} - // First poll — does review. - let _msgs = task.poll().await.unwrap(); +#[tokio::test] +async fn observations_accumulate_across_events() { + let gh: Arc = Arc::new(mock_github("abc123")); + let bus = EventBus::new(); + let runner: Arc = Arc::new(FakeRunner::default()); + let mut task = MonitorPrTask::new("t-1".into(), "org/repo#247", gh, runner, &bus).unwrap(); + + bus.publish(DaemonEvent::PrOpened { + owner: "org".into(), + repo: "repo".into(), + number: 247, + head_sha: "abc123".into(), + }); + task.poll().await.unwrap(); - // Set to Idle. - task.set_status(TaskStatus::Idle); + bus.publish(DaemonEvent::PrUpdated { + owner: "org".into(), + repo: "repo".into(), + number: 247, + head_sha: "def456".into(), + }); + task.poll().await.unwrap(); - // Second poll — same SHA, should be quiet. - let msgs = task.poll().await.unwrap(); - assert!(msgs.is_empty()); + assert_eq!(task.observations().len(), 2); } #[tokio::test] -async fn pr_merged_transitions_to_completed() { +async fn merged_pr_short_circuits_to_completed() { let mut pr = mock_pr(247, "abc123"); pr.state = PrState::Merged; - - let github: Arc = Arc::new( + let gh: Arc = Arc::new( MockGitHubAdapter::new() .with_pr("org", "repo", pr) - .with_diff("org", "repo", 247, "") - .with_status( - "org", - "repo", - 247, - PrStatus { - mergeable: None, - checks: vec![], - }, - ), + .with_diff("org", "repo", 247, ""), ); - let (gate, _handle) = - approval::approval_channel(ApprovalPolicy::AutoApprove, Duration::from_secs(5)); - let gate = Arc::new(Mutex::new(gate)); - - let mut task = - MonitorPrTask::new("t-1".into(), "org/repo#247", github, gate, fake_review_fn()).unwrap(); - - let msgs = task.poll().await.unwrap(); - assert!(!msgs.is_empty()); + let bus = EventBus::new(); + let runner: Arc = Arc::new(FakeRunner::default()); + let mut task = MonitorPrTask::new("t-1".into(), "org/repo#247", gh, runner, &bus).unwrap(); + + bus.publish(DaemonEvent::PrUpdated { + owner: "org".into(), + repo: "repo".into(), + number: 247, + head_sha: "abc123".into(), + }); + task.poll().await.unwrap(); assert_eq!(task.status(), &TaskStatus::Completed); } #[tokio::test] -async fn serialize_deserialize_roundtrip() { - let github: Arc = Arc::new(mock_github("abc123")); - let (gate, _handle) = - approval::approval_channel(ApprovalPolicy::AutoApprove, Duration::from_secs(5)); - let gate = Arc::new(Mutex::new(gate)); - - let task = - MonitorPrTask::new("t-1".into(), "org/repo#247", github, gate, fake_review_fn()).unwrap(); +async fn serialize_includes_pr_state() { + let gh: Arc = Arc::new(mock_github("abc123")); + let bus = EventBus::new(); + let runner: Arc = Arc::new(FakeRunner::default()); + let task = MonitorPrTask::new("t-1".into(), "org/repo#247", gh, runner, &bus).unwrap(); let data = task.serialize().unwrap(); assert_eq!(data["id"], "t-1"); @@ -240,24 +257,3 @@ async fn serialize_deserialize_roundtrip() { assert_eq!(data["repo"], "repo"); assert_eq!(data["number"], 247); } - -#[tokio::test] -async fn dry_run_never_posts() { - let gh = Arc::new(mock_github("abc123")); - let github: Arc = - Arc::clone(&gh) as Arc; - let (gate, _handle) = - approval::approval_channel(ApprovalPolicy::DryRun, Duration::from_secs(5)); - let gate = Arc::new(Mutex::new(gate)); - - let mut task = - MonitorPrTask::new("t-1".into(), "org/repo#247", github, gate, fake_review_fn()).unwrap(); - - let msgs = task.poll().await.unwrap(); - assert!(!msgs.is_empty()); - // Verify dry-run message. - if let devdev_tasks::TaskMessage::Text(text) = &msgs[0] { - assert!(text.contains("dry-run")); - } - assert!(gh.posted_reviews().is_empty()); -} diff --git a/docs/internals/capabilities/22-monitor-pr-task.md b/docs/internals/capabilities/22-monitor-pr-task.md index add3389..53a32ac 100644 --- a/docs/internals/capabilities/22-monitor-pr-task.md +++ b/docs/internals/capabilities/22-monitor-pr-task.md @@ -1,7 +1,7 @@ --- id: monitor-pr-task title: "MonitorPR Task" -status: not-started +status: shipped type: composition phase: 2 crate: devdev-tasks diff --git a/docs/internals/capabilities/24-e2e-pr-shepherding.md b/docs/internals/capabilities/24-e2e-pr-shepherding.md index 4bf50b4..3d6101a 100644 --- a/docs/internals/capabilities/24-e2e-pr-shepherding.md +++ b/docs/internals/capabilities/24-e2e-pr-shepherding.md @@ -1,7 +1,7 @@ --- id: e2e-pr-shepherding title: "E2E PR Shepherding" -status: not-started +status: in-progress type: composition phase: 2 crate: tests diff --git a/docs/internals/capabilities/25-vibe-check.md b/docs/internals/capabilities/25-vibe-check.md index 435ae69..b8cec3f 100644 --- a/docs/internals/capabilities/25-vibe-check.md +++ b/docs/internals/capabilities/25-vibe-check.md @@ -1,7 +1,7 @@ --- id: vibe-check title: "Vibe Check — Preference Authoring" -status: not-started +status: shipped type: composition phase: 5 crate: devdev-cli @@ -12,8 +12,17 @@ effort: L # P5-01 — Vibe Check (Preference Authoring) +**Status: shipped (Phase D).** Resolved questions captured below. + The first of the two pillars from `spirit/outline.md` §1 ("The Vibe Check"). DevDev interviews the user in natural language and writes their technical preferences out as plain Markdown files in a `.devdev/` directory at the workspace root. No YAML schemas, no DSLs — file-scoped Markdown documents that the Scout (P5-02) and the Heavy (existing ACP path) can read directly. +## Resolved + +- **Location:** `.devdev/` is searched in the workdir, all ancestors, and `~/.devdev/`. Repo-wins, then parent, then home, dedup by title (earliest layer wins). See `crates/devdev-cli/src/preferences.rs`. +- **Scribe prompt:** lives at `crates/devdev-cli/src/vibe_check_prompt.md`, embedded into the binary via `include_str!`. `devdev init` ships it as the session preamble and runs a stdin REPL against the daemon's ACP session. +- **Revision behaviour:** prompt instructs the scribe to append `## Revision ` sections rather than overwrite — enforced socially via the prompt, not by the file layer. +- **Surfacing into PR review:** the per-PR `MonitorPrTask` carries a `Vec` of preference paths (`with_preferences`); the prompt lists them so the agent reads them on demand via the existing MCP `fs/*` tools. Auto-injection from dispatch is deferred (would require relocating the loader to break a daemon→cli cycle). + ## Scope **In:** @@ -31,9 +40,7 @@ The first of the two pillars from `spirit/outline.md` §1 ("The Vibe Check"). De ## Open Questions -- Should `.devdev/` live in the repo (tracked, shareable) or in the user's home (`~/.devdev/`, private)? Outline says "personal", suggesting home; PR review use cases suggest repo. **Resolution path:** support both — repo-local takes precedence over home, like `.gitconfig`. -- What's the scribe's prompt? Probably a small canned system prompt that says "you are interviewing the user about their coding preferences; write one Markdown file per distinct topic; keep titles short and prose conversational." -- What does the scribe do when the user says something contradictory to an existing file? Append a "Revision (date)" section, not silently overwrite. +*(All previously open questions resolved — see Resolved section above.)* ## Dependencies diff --git a/docs/internals/capabilities/27-idempotency-ledger.md b/docs/internals/capabilities/27-idempotency-ledger.md index 9e3e377..16082c0 100644 --- a/docs/internals/capabilities/27-idempotency-ledger.md +++ b/docs/internals/capabilities/27-idempotency-ledger.md @@ -1,7 +1,7 @@ --- id: idempotency-ledger title: "Idempotency Ledger" -status: not-started +status: shipped type: leaf phase: 2 crate: devdev-daemon From eb878c6909105bc7dcc4da3fcc4f7aa76ffee9fb Mon Sep 17 00:00:00 2001 From: Autumn Wyborny Date: Sun, 26 Apr 2026 18:02:36 -0700 Subject: [PATCH 2/3] dogfood infra: poll scheduler, ACP optional status, idle timeout - scheduler in run_up calling poll_all_tasks every 5s - last_polled self-throttle in RepoWatchTask - ToolCall/ToolCallUpdate.status now Optional (Copilot CLI omits on initial tool_call) - ACP idle_timeout bumped to 300s for live agents --- crates/devdev-acp/src/types.rs | 11 +++++++++-- crates/devdev-acp/tests/acceptance.rs | 4 ++-- crates/devdev-cli/src/acp_backend.rs | 6 ++++++ crates/devdev-cli/src/daemon_cli.rs | 25 +++++++++++++++++++++++++ crates/devdev-tasks/src/repo_watch.rs | 13 ++++++++++++- 5 files changed, 54 insertions(+), 5 deletions(-) diff --git a/crates/devdev-acp/src/types.rs b/crates/devdev-acp/src/types.rs index e4bc743..7a45839 100644 --- a/crates/devdev-acp/src/types.rs +++ b/crates/devdev-acp/src/types.rs @@ -444,7 +444,11 @@ pub struct ToolCall { pub tool_call_id: String, pub title: String, pub kind: ToolCallKind, - pub status: ToolCallStatus, + /// Optional: Copilot CLI omits this on the initial `tool_call` + /// notification (status is implicit `pending`). Subsequent + /// `tool_call_update` notifications carry it explicitly. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub status: Option, #[serde(default, skip_serializing_if = "Option::is_none")] pub raw_input: Option, } @@ -453,7 +457,10 @@ pub struct ToolCall { #[serde(rename_all = "camelCase")] pub struct ToolCallUpdate { pub tool_call_id: String, - pub status: ToolCallStatus, + /// Optional for symmetry with `ToolCall`. A status-less update + /// is a metadata-only change (e.g. output appended). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub status: Option, #[serde(default, skip_serializing_if = "Option::is_none")] pub output: Option, } diff --git a/crates/devdev-acp/tests/acceptance.rs b/crates/devdev-acp/tests/acceptance.rs index db5556f..24ec2a3 100644 --- a/crates/devdev-acp/tests/acceptance.rs +++ b/crates/devdev-acp/tests/acceptance.rs @@ -72,7 +72,7 @@ fn session_update_tool_call_roundtrip() { tool_call_id: "tc-1".into(), title: "Read file".into(), kind: ToolCallKind::Read, - status: ToolCallStatus::Completed, + status: Some(ToolCallStatus::Completed), raw_input: Some(serde_json::json!({"path": "/foo.rs"})), }); let json = serde_json::to_string(&variant).unwrap(); @@ -85,7 +85,7 @@ fn session_update_tool_call_roundtrip() { fn session_update_tool_call_update_roundtrip() { let variant = SessionUpdate::ToolCallUpdate(ToolCallUpdate { tool_call_id: "tc-1".into(), - status: ToolCallStatus::Failed, + status: Some(ToolCallStatus::Failed), output: Some("error: not found".into()), }); let json = serde_json::to_string(&variant).unwrap(); diff --git a/crates/devdev-cli/src/acp_backend.rs b/crates/devdev-cli/src/acp_backend.rs index ed99a4a..1b62865 100644 --- a/crates/devdev-cli/src/acp_backend.rs +++ b/crates/devdev-cli/src/acp_backend.rs @@ -76,6 +76,12 @@ impl AcpSessionBackend { let argv: Vec<&str> = args.iter().map(String::as_str).collect(); let client_config = AcpClientConfig { env_overrides: crate::realpath_shim::prepare_nodejs_options(), + // Real agents (Copilot CLI included) can think for + // a long time between session/update notifications, + // especially while running multi-step gh/git plans. + // Keep the idle window generous to avoid killing a + // working agent mid-turn. + idle_timeout: std::time::Duration::from_secs(300), ..AcpClientConfig::default() }; let client = AcpClient::connect_process( diff --git a/crates/devdev-cli/src/daemon_cli.rs b/crates/devdev-cli/src/daemon_cli.rs index 394f03e..edd9f06 100644 --- a/crates/devdev-cli/src/daemon_cli.rs +++ b/crates/devdev-cli/src/daemon_cli.rs @@ -332,6 +332,30 @@ pub async fn run_up(args: UpArgs) -> Result<()> { // becomes a `MonitorPrTask` (created on first observation). let coordinator = spawn_event_coordinator(Arc::clone(&ctx), shutdown_tx.subscribe()); + // Background task scheduler: tick every 5s and poll every + // registered task. Each task's `poll()` should self-throttle + // against its own `poll_interval()` if it cares; today the + // RepoWatchTask polls on every tick — adequate for dogfood, + // but overdue for a per-task timer. + let scheduler_ctx = Arc::clone(&ctx); + let mut scheduler_shutdown = shutdown_tx.subscribe(); + let scheduler = tokio::spawn(async move { + let mut ticker = tokio::time::interval(Duration::from_secs(5)); + ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay); + loop { + tokio::select! { + _ = ticker.tick() => { + scheduler_ctx.poll_all_tasks().await; + } + changed = scheduler_shutdown.changed() => { + if changed.is_err() || *scheduler_shutdown.borrow() { + break; + } + } + } + } + }); + let server_task = tokio::spawn(server::run(Arc::clone(&ctx), server, shutdown_rx)); eprintln!( @@ -353,6 +377,7 @@ pub async fn run_up(args: UpArgs) -> Result<()> { // Let the accept loop observe the flag and exit. let _ = server_task.await; let _ = coordinator.await; + let _ = scheduler.await; // Stop the MCP server so its port is released before we claim // shutdown is done. Shutdown is graceful — no pending requests. diff --git a/crates/devdev-tasks/src/repo_watch.rs b/crates/devdev-tasks/src/repo_watch.rs index 185e1c8..0437ccd 100644 --- a/crates/devdev-tasks/src/repo_watch.rs +++ b/crates/devdev-tasks/src/repo_watch.rs @@ -18,7 +18,7 @@ use std::collections::HashMap; use std::sync::Arc; -use std::time::Duration; +use std::time::{Duration, Instant}; use devdev_integrations::{GitHubAdapter, pr_state_hash}; @@ -37,6 +37,8 @@ pub struct RepoWatchTask { /// `pr_number → state_hash` for the most recent poll. last_seen: HashMap, poll_interval: Duration, + /// When `poll()` last actually ran. `None` until the first run. + last_polled: Option, status: TaskStatus, github: Arc, ledger: Arc, @@ -58,6 +60,7 @@ impl RepoWatchTask { repo: repo.into(), last_seen: HashMap::new(), poll_interval: Duration::from_secs(60), + last_polled: None, status: TaskStatus::Created, github, ledger, @@ -195,6 +198,14 @@ impl Task for RepoWatchTask { if self.status.is_terminal() { return Ok(vec![]); } + // Self-throttle: callers may invoke `poll()` faster than + // our `poll_interval`. Skip until we're due. + if let Some(prev) = self.last_polled + && prev.elapsed() < self.poll_interval + { + return Ok(vec![]); + } + self.last_polled = Some(Instant::now()); self.do_poll().await } From 6126f70ba814822906f4fbb4a6d11eb468dd8e95 Mon Sep 17 00:00:00 2001 From: Autumn Wyborny Date: Sun, 26 Apr 2026 18:47:28 -0700 Subject: [PATCH 3/3] fix bugs surfaced by dogfood PR review - dispatch.rs: per-task auto_approve no longer mutates the daemon-wide approval gate (was leaking policy across all tasks). Documented as TODO until per-session approval routing exists. - repo_watch.rs: a fresh task instance now republishes PrOpened on first observation even when the ledger already has the state hash, so MonitorPrTasks get respawned after daemon restart. Ledger still suppresses duplicate records on the same instance. - updated unit test ledger_dedups_across_restart to assert the new (correct) republish-on-restart semantics. - adjusted other repo_watch tests to set with_interval(0) so the new poll() self-throttle doesn't suppress the second poll in 2-poll tests. --- crates/devdev-daemon/src/dispatch.rs | 17 ++--- crates/devdev-tasks/src/repo_watch.rs | 91 ++++++++++++++++++--------- 2 files changed, 70 insertions(+), 38 deletions(-) diff --git a/crates/devdev-daemon/src/dispatch.rs b/crates/devdev-daemon/src/dispatch.rs index fdb4218..0d43b33 100644 --- a/crates/devdev-daemon/src/dispatch.rs +++ b/crates/devdev-daemon/src/dispatch.rs @@ -205,14 +205,15 @@ impl DispatchContext { self.approval_policy }; - // If the per-task `auto_approve` overrides the daemon's - // policy, swap the active approval channel so `devdev_ask` - // bypasses prompts for this task. - if policy != self.approval_policy { - let (gate, handle) = approval_channel(policy, self.approval_timeout); - *self.approval_gate.lock().await = gate; - *self.approval_handle.lock().await = handle; - } + // NOTE: per-task approval policy isolation is not yet + // implemented. The MCP `devdev_ask` tool resolves through + // the daemon-wide `approval_gate`, so a task-local override + // here cannot be safely applied without flipping behaviour + // for every other task and tool call. Until we plumb a + // session->task mapping through the MCP provider so each + // call can pick its own gate, the per-task `auto_approve` + // flag is accepted on the wire but treated as a no-op. + let _ = policy; let mut registry = self.tasks.lock().await; let task_id = registry.next_id(); diff --git a/crates/devdev-tasks/src/repo_watch.rs b/crates/devdev-tasks/src/repo_watch.rs index 0437ccd..e15bb44 100644 --- a/crates/devdev-tasks/src/repo_watch.rs +++ b/crates/devdev-tasks/src/repo_watch.rs @@ -101,25 +101,37 @@ impl RepoWatchTask { let key = LedgerKey::new(ADAPTER, RESOURCE_TYPE, self.resource_id(pr.number), &hash); - // Ledger consult — if we've published this exact state - // before (across the lifetime of the daemon), skip. + // Two independent questions: + // 1. Have we already published this exact state hash? + // (`already` — persisted across daemon restarts.) + // 2. Is this the first time we've seen this PR number + // *in this task instance*? (`first_in_session` — + // true after every restart, until we observe.) + // + // A fresh task must always emit `PrOpened` on first + // observation so the event coordinator can (re)spawn a + // `MonitorPrTask`, even if the ledger already has the + // state recorded from a previous run. The ledger only + // suppresses *duplicate* records / no-op `PrUpdated`s. let already = self .ledger .seen(&key) .map_err(|e| TaskError::PollFailed(format!("ledger.seen: {e}")))?; - if already { + let first_in_session = !self.last_seen.contains_key(&pr.number); + + if already && !first_in_session { continue; } - let event = if self.last_seen.contains_key(&pr.number) { - DaemonEvent::PrUpdated { + let event = if first_in_session { + DaemonEvent::PrOpened { owner: self.owner.clone(), repo: self.repo.clone(), number: pr.number, head_sha: pr.head_sha.clone(), } } else { - DaemonEvent::PrOpened { + DaemonEvent::PrUpdated { owner: self.owner.clone(), repo: self.repo.clone(), number: pr.number, @@ -127,24 +139,26 @@ impl RepoWatchTask { } }; self.bus.publish(event); - self.ledger - .record( - &key, - serde_json::json!({ - "head_sha": pr.head_sha, - "updated_at": pr.updated_at, - }), - ) - .map_err(|e| TaskError::PollFailed(format!("ledger.record: {e}")))?; + if !already { + self.ledger + .record( + &key, + serde_json::json!({ + "head_sha": pr.head_sha, + "updated_at": pr.updated_at, + }), + ) + .map_err(|e| TaskError::PollFailed(format!("ledger.record: {e}")))?; + } messages.push(TaskMessage::Text(format!( "{} #{} state {}", self.resource_id(pr.number), pr.number, - if self.last_seen.contains_key(&pr.number) { - "updated" - } else { + if first_in_session { "opened" + } else { + "updated" } ))); } @@ -287,7 +301,10 @@ mod tests { gh.clone() as Arc, ledger.clone() as Arc, bus.clone(), - ); + ) + // Disable the self-throttle for unit tests; we want every + // explicit `poll()` call to actually run `do_poll()`. + .with_interval(Duration::from_secs(0)); (t, gh, ledger, bus) } @@ -313,7 +330,8 @@ mod tests { gh as Arc, ledger as Arc, bus, - ); + ) + .with_interval(Duration::from_secs(0)); t.poll().await.unwrap(); let evt = rx.recv().await.unwrap(); assert!(matches!(evt, DaemonEvent::PrOpened { number: 1, .. })); @@ -332,7 +350,8 @@ mod tests { gh as Arc, ledger as Arc, bus, - ); + ) + .with_interval(Duration::from_secs(0)); t.poll().await.unwrap(); let _ = rx.recv().await.unwrap(); // Second poll: same SHA + updated_at → no event. @@ -353,7 +372,8 @@ mod tests { gh.clone() as Arc, ledger as Arc, bus, - ); + ) + .with_interval(Duration::from_secs(0)); t.poll().await.unwrap(); let _ = rx.recv().await.unwrap(); // Simulate force-push: head_sha changes via mock override. @@ -377,7 +397,8 @@ mod tests { gh_open as Arc, ledger.clone() as Arc, bus.clone(), - ); + ) + .with_interval(Duration::from_secs(0)); t.poll().await.unwrap(); let _ = rx.recv().await.unwrap(); @@ -396,7 +417,7 @@ mod tests { let bus = EventBus::new(); let mut rx = bus.subscribe(); - // First task instance: emits PrOpened. + // First task instance: emits PrOpened, records ledger. let mut t1 = RepoWatchTask::new( "t-1".into(), "o", @@ -404,22 +425,32 @@ mod tests { gh.clone() as Arc, ledger.clone() as Arc, bus.clone(), - ); + ) + .with_interval(Duration::from_secs(0)); t1.poll().await.unwrap(); let _ = rx.recv().await.unwrap(); drop(t1); - // Second task instance ("restart"): same ledger, fresh in-memory state. + // Second task instance ("restart"): same ledger, fresh + // in-memory state. We MUST republish `PrOpened` so the + // event coordinator can respawn the per-PR `MonitorPrTask`, + // even though the ledger already has the state hash. let mut t2 = RepoWatchTask::new( "t-1".into(), "o", "r", - gh as Arc, - ledger as Arc, + gh.clone() as Arc, + ledger.clone() as Arc, bus, - ); + ) + .with_interval(Duration::from_secs(0)); + t2.poll().await.unwrap(); + let evt = rx.recv().await.unwrap(); + assert!(matches!(evt, DaemonEvent::PrOpened { number: 1, .. })); + + // Third poll on the same instance: ledger still has it AND + // last_seen has it — no duplicate event. t2.poll().await.unwrap(); - // Ledger says "seen" — no event. assert!(rx.try_recv().is_err()); }