diff --git a/Cargo.lock b/Cargo.lock index fe5a969..f6be579 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2377,6 +2377,7 @@ version = "0.1.2" dependencies = [ "clap", "ignore", + "libc", "mockito", "regex", "regorus", diff --git a/Cargo.toml b/Cargo.toml index 15190c5..a5fd6c4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,6 +26,9 @@ regorus = { version = "0.9", default-features = false, features = ["arc"] } reqwest = { version = "0.12", default-features = false, features = ["blocking", "json", "rustls-tls"] } url = "2" +[target.'cfg(unix)'.dependencies] +libc = "0.2" + [dev-dependencies] tempfile = "3" mockito = "1" diff --git a/README.md b/README.md index 6bf3a07..491f369 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,17 @@ zift report . # detailed findings report ## Deep mode (`--deep`) -`--deep` talks to **any OpenAI-compatible chat-completions endpoint** — one client speaks to Ollama, LM Studio, llama.cpp, vLLM, OpenRouter, OpenAI, and Anthropic-via-proxy. Pick where you want your bytes to go. +`--deep` ships three transports — pick whichever fits your existing tooling. Pick exactly one: + +| Tier | Transport | When | +|------|-----------|------| +| 1 | **MCP server** (`zift mcp`) | You already use an agent host (Claude Code, Cursor, Continue, Cline, Zed). The host owns the model; Zift is a tool provider. | +| 2 | **OpenAI-compatible HTTP** (`--base-url`) | Headless / CI runs against any OpenAI-shaped chat-completions endpoint — Ollama, LM Studio, llama.cpp, vLLM, OpenRouter, OpenAI, Anthropic-via-proxy. | +| 3 | **Subprocess hook** (`--agent-cmd`) | Anything else — `claude -p`, `aider`, custom shell scripts. Stdin: prompt + JSON envelope. Stdout: JSON matching the deep-mode schema. | + +### HTTP transport (`--base-url`) + +One client speaks to **any OpenAI-compatible chat-completions endpoint** — Ollama, LM Studio, llama.cpp, vLLM, OpenRouter, OpenAI, and Anthropic-via-proxy. Pick where you want your bytes to go. ### Local model (Ollama, LM Studio, llama.cpp) @@ -67,6 +77,56 @@ cost_per_1k_output = 0.0 # e.g. 0.0006 for gpt-4o-mini output `api_key` is intentionally **not** readable from `.zift.toml` — keys belong in `$ZIFT_AGENT_API_KEY` or `--api-key`, not in source-controlled files. +### Subprocess transport (`--agent-cmd`) + +For agents that don't speak the OpenAI HTTP dialect — `claude -p`, `aider`, or any user wrapper script — drive them through stdin/stdout: + +```bash +zift scan ./src --deep --agent-cmd "claude -p --output-format json" +``` + +Zift writes one JSON envelope to the command's stdin and reads the deep-mode JSON response from stdout: + +```jsonc +// stdin (one line, then EOF) +{"system": "...", "user": "...", "schema": { /* JSON Schema */ }} + +// stdout (the deep-mode response schema) +{"findings": [{"line_start": 12, "line_end": 18, "category": "rbac", ...}]} +``` + +The schema is identical to the HTTP transport's response — wrappers around real LLMs forward `system`/`user` straight through. + +#### `.zift.toml` for subprocess + +```toml +[deep] +mode = "subprocess" +agent_cmd = "claude -p --output-format json" +agent_timeout_secs = 600 # generous; LLM CLIs can be slow (default) +``` + +#### Example wrappers + +```bash +# Claude Code CLI in print mode — already emits structured JSON. +zift scan . --deep --agent-cmd "claude -p --output-format json" + +# Custom shell script — read envelope from stdin, call your favorite agent, +# emit `{"findings": [...]}` on stdout. +zift scan . --deep --agent-cmd "./scripts/zift-agent.sh" + +# Pipeline with jq for response massaging. +zift scan . --deep --agent-cmd "my-agent | jq -c '{findings: .results}'" +``` + +#### Caveats + +- **No token tracking.** Subprocess agents don't return token counts in any standard way; `--max-cost` has no effect. Enforce ceilings externally (timeouts, ulimits, wrapper scripts). +- **No retry.** Each candidate gets one subprocess invocation. Nonzero exit, bad JSON, or timeout → skip the candidate, keep going. +- **Unix-only for v0.1.4.** Windows users: use the HTTP transport or wrap the agent in a WSL command. +- **Security note.** `agent_cmd` is run through your platform shell. Don't run Zift against an untrusted `.zift.toml` — same threat model as `.editorconfig`-style attacks. + ## MCP server (`zift mcp`) If you already use an agent host — Claude Code, Cursor, Continue, Cline, Zed, or anything else that speaks the [Model Context Protocol](https://modelcontextprotocol.io) — Zift can plug in as a tool provider over stdio: diff --git a/plans/todo/03-pr3-subprocess-hook.md b/plans/done/03-pr3-subprocess-hook.md similarity index 59% rename from plans/todo/03-pr3-subprocess-hook.md rename to plans/done/03-pr3-subprocess-hook.md index 8066e77..adfb44e 100644 --- a/plans/todo/03-pr3-subprocess-hook.md +++ b/plans/done/03-pr3-subprocess-hook.md @@ -105,3 +105,38 @@ Each commit small and reviewable. - **Hard to debug.** When a user's `agent_cmd` returns garbage, the failure mode is opaque. Surface a generic, non-sensitive error to the user (e.g. "agent_cmd failed to parse output"). Gate verbose stdout/stderr capture behind explicit debug logging (e.g. `RUST_LOG=zift::deep=debug`), and even there cap the snippet length and apply the same redaction discipline as `src/deep/client.rs` — `agent_cmd` output can mirror prompt text and scanned source verbatim, which would re-create the secret/source-leak class we already avoid in the HTTP client. - **Security.** Running arbitrary shell commands the user configured is a footgun if `.zift.toml` is checked in to a repo and Zift is run by another user. Document; consider warning when `agent_cmd` is read from a `.zift.toml` not owned by the running user. + +--- + +## 10. Shipped + +**Branch**: `feat/deep-subprocess` +**Status**: open as PR. + +### Module additions + +- `src/deep/analyzer.rs` — `Analyzer` trait + relocated `AnalyzeResponse`/`TokenUsage` from `client.rs`. The seam between `deep::run` and concrete transports. +- `src/deep/subprocess.rs` — `SubprocessClient`, ~280 lines including unit tests. Spawns the user's command through the platform shell (`sh -c` on Unix, `cmd /C` on Windows), writes a JSON envelope to stdin, reads stdout to EOF with a wall-clock timeout enforced by polling `try_wait` at 50ms granularity. Stderr is captured for debug logs only. +- `tests/deep_subprocess_integration.rs` — 6 end-to-end tests against shell-script fixtures (`#![cfg(unix)]`). + +### Plan deviations (all flagged in commit messages) + +1. **`base_url`/`model` remain `String`, not `Option`.** Plan §3 implied separate fields per mode. Switching to `Option` would have churned every call site (HTTP client constructor, `cost::CostTracker::new`, every test rt() helper). Empty strings in subprocess mode work because `OpenAiCompatibleClient::new` is never instantiated when `mode == Subprocess` — guarded by the `match` in `deep::run`. Documented on `DeepRuntime`. +2. **Spawn failures (sh exits 127) surface as `BadResponse`, not `Config`.** Plan §9 implied any spawn error was misconfiguration. In practice, `sh -c` itself spawns fine and returns 127 for "command not found" — that surfaces as nonzero exit, mapped to `BadResponse`, which the orchestrator skips. The user still gets a clear error from the per-candidate-skip warn-log; hard-failing the whole deep run on a typo felt heavy-handed when the structural pass would have already produced findings. +3. **Stdout/stderr drained via background threads with `mpsc::channel`.** Plan §3 sketched a single-thread-with-`read_to_string` flow. That deadlocks if the agent fills the stderr pipe (~64KB) before exiting — the writer thread blocks on stdin, the main thread blocks on stdout, and stderr never gets read. Three-thread design (writer, stdout reader, stderr reader) is necessary for backpressure correctness, not a stylistic choice. +4. **`SubprocessClient` derives `Debug`.** Required so `Result::unwrap_err` works in `#[test]` blocks. Struct only contains a command string and `Duration` — nothing sensitive. +5. **No `wait_timeout` crate dep.** Plan §3 left the timeout strategy open. We poll `try_wait` at 50ms cadence inside the orchestrator's own loop; works on every platform without a new dependency. Latency overhead is rounding error against agent CLIs that take seconds-to-minutes per request. +6. **Plan moved to `done/` in this same PR.** Folder convention from `00-deep-mode-overview.md`. Overview file's PR 3 cross-reference updated to point at `../done/03-...`. + +### Test counts + +After this PR: + +- 275 lib unit tests (was 253; +22 new in `deep::config::tests` and `deep::subprocess::tests`). +- 18 + 6 + 6 = 30 integration tests across 3 files (`deep_http_integration`, `mcp_stdio_integration`, `deep_subprocess_integration`). + +### Open follow-ups + +- **Concurrency** — subprocess transport is hard-pinned to `max_concurrent = 1`. Rationale: `claude -p` and friends serialize internally, parallelism rarely helps. If a real workload contradicts this, lift the cap behind explicit `[deep] max_concurrent = N`. +- **Windows fixture** — integration tests are `#![cfg(unix)]`. A Rust-binary fixture would unblock Windows CI. Defer until a Windows user files an issue. +- **Owner-mismatch warning on `.zift.toml`.** Plan §9 floated warning when `agent_cmd` comes from a config file not owned by the running user. Useful future hardening; not in scope here. diff --git a/plans/todo/00-deep-mode-overview.md b/plans/todo/00-deep-mode-overview.md index a7ae7d1..74574a1 100644 --- a/plans/todo/00-deep-mode-overview.md +++ b/plans/todo/00-deep-mode-overview.md @@ -19,7 +19,7 @@ Make `zift scan --deep` produce semantic findings (`pass: ScanPass::Semantic`) w |------|-----------|----------------|-----| | 1 | **MCP server** (`zift mcp`) | User has an agent host (Claude Code, Cursor, Continue, Cline, Zed). Their agent calls Zift tools; their agent calls the model. We never see the model. | [PR 2](../done/02-pr2-mcp-server.md) | | 2 | **OpenAI-compatible HTTP** (`--base-url`) | Headless / CI runs. One client speaks to Ollama, LM Studio, llama.cpp `server`, vLLM, OpenRouter, Together, Groq, Anthropic-via-proxy, OpenAI itself. | [PR 1](../done/01-pr1-deep-http-transport.md) | -| 3 | **Subprocess hook** (`--agent-cmd`) | Anything else — `claude -p`, `aider`, custom shell scripts, agents that don't expose HTTP. Stdin: prompt + JSON. Stdout: JSON matching our schema. | [PR 3](./03-pr3-subprocess-hook.md) | +| 3 | **Subprocess hook** (`--agent-cmd`) | Anything else — `claude -p`, `aider`, custom shell scripts, agents that don't expose HTTP. Stdin: prompt + JSON. Stdout: JSON matching our schema. | [PR 3](../done/03-pr3-subprocess-hook.md) | User picks explicitly via `[deep] mode = "mcp" | "http" | "subprocess"`. No provider auto-detection magic. @@ -64,4 +64,4 @@ We build PR 1 first even though MCP (PR 2) is the strategically headline answer. - [PR 1 — HTTP transport](../done/01-pr1-deep-http-transport.md) - [PR 2 — MCP server](../done/02-pr2-mcp-server.md) -- [PR 3 — Subprocess hook](./03-pr3-subprocess-hook.md) +- [PR 3 — Subprocess hook](../done/03-pr3-subprocess-hook.md) diff --git a/src/cli.rs b/src/cli.rs index dd1703a..a599f9a 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -116,6 +116,23 @@ pub struct ScanArgs { /// invocation. Build-time validation in `deep::config::build` is enough. #[arg(long, env = "ZIFT_AGENT_API_KEY")] pub api_key: Option, + + /// Shell command line for the subprocess transport (requires --deep) + /// + /// Selects the subprocess deep-mode transport: Zift writes a single + /// JSON envelope `{system, user, schema}` to the command's stdin and + /// reads the deep-mode JSON response from stdout. Use for agent CLIs + /// that don't speak the OpenAI HTTP dialect — e.g. `claude -p + /// --output-format json`, `aider`, or a custom wrapper script. + /// + /// Mutually exclusive with --base-url at config-build time (validated + /// in deep::config::build). + /// + /// Examples: + /// --agent-cmd "claude -p --output-format json" + /// --agent-cmd "./scripts/my-agent.sh" + #[arg(long, requires = "deep")] + pub agent_cmd: Option, } // -- Extract -- @@ -365,4 +382,47 @@ mod tests { panic!("expected Scan command"); } } + + #[test] + fn agent_cmd_requires_deep() { + // Clap's `requires = "deep"` should reject `--agent-cmd` on + // its own — without `--deep`, the subprocess transport never + // gets exercised, so accepting the flag silently would mask a + // misconfigured invocation. + let result = Cli::try_parse_from([ + "zift", + "scan", + "--agent-cmd", + "claude -p --output-format json", + ".", + ]); + assert!( + result.is_err(), + "expected parse error for --agent-cmd without --deep, got: {result:?}", + ); + } + + #[test] + fn agent_cmd_with_deep_parses() { + // Companion to `agent_cmd_requires_deep`: with `--deep` the + // flag must round-trip into `ScanArgs`. + let cli = Cli::try_parse_from([ + "zift", + "scan", + "--deep", + "--agent-cmd", + "claude -p --output-format json", + ".", + ]) + .unwrap(); + if let Some(Command::Scan(args)) = cli.command { + assert!(args.deep); + assert_eq!( + args.agent_cmd.as_deref(), + Some("claude -p --output-format json"), + ); + } else { + panic!("expected Scan command"); + } + } } diff --git a/src/commands/init.rs b/src/commands/init.rs index 2950403..015fb33 100644 --- a/src/commands/init.rs +++ b/src/commands/init.rs @@ -7,6 +7,9 @@ exclude = ["vendor/**", "node_modules/**", "target/**"] # min_confidence = "medium" # [deep] +# # mode = "http" # or "subprocess" — usually inferred from the fields below. +# +# # ----- HTTP transport (default) ----- # base_url = "http://localhost:11434/v1" # Ollama, LM Studio, OpenAI-compatible # model = "your-model-name" # max_cost = 5.00 # USD spend ceiling (requires rates below) @@ -14,6 +17,13 @@ exclude = ["vendor/**", "node_modules/**", "target/**"] # cost_per_1k_output = 0.0006 # e.g. gpt-4o-mini output # # API key: set $ZIFT_AGENT_API_KEY in your environment, or pass --api-key. # # Do NOT put the key in this file — it gets checked into source control. +# +# # ----- Subprocess transport ----- +# # Zift writes a JSON envelope (system/user/schema) to the command's stdin +# # and reads {"findings": [...]} from its stdout. For `claude -p`, `aider`, +# # or any wrapper script. +# agent_cmd = "claude -p --output-format json" +# agent_timeout_secs = 600 # default 600 (LLM CLIs can be slow) [extract] package_prefix = "app.authz" diff --git a/src/commands/scan.rs b/src/commands/scan.rs index 1bac2a4..14d3af0 100644 --- a/src/commands/scan.rs +++ b/src/commands/scan.rs @@ -44,11 +44,24 @@ pub fn execute(args: ScanArgs, config: ZiftConfig) -> Result<()> { let mut result = scanner::scan(&path, &loaded_rules, &args, &config)?; if let Some(runtime) = deep_runtime.as_ref() { - tracing::info!( - "running deep scan: base_url={} model={}", - runtime.base_url, - runtime.model, - ); + match runtime.mode { + deep::config::DeepMode::Http => tracing::info!( + "running deep scan via HTTP: base_url={} model={}", + runtime.base_url, + runtime.model, + ), + // Deliberately omit `agent_cmd` from the log — wrapper + // commands frequently embed API keys or tokens + // (`./run.sh --token=$X`), and emitting them at info level + // would leak credentials into shared CI logs. Same instinct + // as the redacted `Debug` impl on `DeepRuntime` for + // `api_key`. Operators who need to verify the configured + // command can read it from `.zift.toml` or pass `--debug`. + deep::config::DeepMode::Subprocess => tracing::info!( + "running deep scan via subprocess: timeout={}s", + runtime.agent_timeout_secs, + ), + } result.findings = deep::run(result.findings, &path, runtime)?; } diff --git a/src/config.rs b/src/config.rs index 59354f7..07f3eac 100644 --- a/src/config.rs +++ b/src/config.rs @@ -24,15 +24,32 @@ pub struct ScanConfig { #[derive(Debug, Default, Deserialize)] #[serde(default)] pub struct DeepConfig { + /// Transport mode: `"http"` (PR 1) or `"subprocess"` (PR 3). When + /// unset, [`crate::deep::config::resolve_mode`] infers from which + /// other fields are populated. Default is `"http"`. + pub mode: Option, /// OpenAI-compatible chat-completions endpoint, e.g. "http://localhost:11434/v1". + /// Used only when `mode = "http"`. pub base_url: Option, - /// Model name to send to the agent endpoint. + /// Model name to send to the agent endpoint. Used only when `mode = "http"`. pub model: Option, + /// Shell command line invoked per request when `mode = "subprocess"`. + /// Receives a single JSON envelope on stdin (`{system, user, schema}`) + /// and must write a JSON object matching the deep-mode response schema + /// to stdout. Examples: `claude -p --output-format json`, + /// `aider --no-auto-commits`, or any user wrapper script. + pub agent_cmd: Option, + /// Per-request timeout (seconds) for the subprocess transport. LLM + /// CLIs are noticeably slower than HTTP — 30s+ cold starts are normal + /// for `claude -p`. Default is 600s; tune down if your agent is + /// faster. + pub agent_timeout_secs: Option, /// Maximum spend limit in USD. pub max_cost: Option, /// USD cost per 1k input tokens. Required for `max_cost` to bind on /// hosted models — without this (and `cost_per_1k_output`), the spend - /// tracker is a no-op. + /// tracker is a no-op. Subprocess transport does not report tokens, + /// so this only affects HTTP mode in practice. pub cost_per_1k_input: Option, /// USD cost per 1k output tokens. See `cost_per_1k_input`. pub cost_per_1k_output: Option, @@ -90,6 +107,7 @@ languages = ["java", "typescript"] min_confidence = "medium" [deep] +mode = "http" base_url = "http://localhost:11434/v1" model = "qwen2.5-coder:14b" max_cost = 5.00 @@ -106,11 +124,16 @@ additional = ["./custom-rules"] let config: ZiftConfig = toml::from_str(toml).unwrap(); assert_eq!(config.scan.exclude.len(), 2); assert_eq!(config.scan.languages, vec!["java", "typescript"]); + assert_eq!(config.deep.mode.as_deref(), Some("http")); assert_eq!( config.deep.base_url.as_deref(), Some("http://localhost:11434/v1") ); assert_eq!(config.deep.model.as_deref(), Some("qwen2.5-coder:14b")); + // HTTP mode leaves the subprocess fields unset so transport + // selection in `deep::config::resolve_mode` stays unambiguous. + assert!(config.deep.agent_cmd.is_none()); + assert!(config.deep.agent_timeout_secs.is_none()); assert_eq!(config.deep.max_cost, Some(5.0)); assert_eq!(config.deep.cost_per_1k_input, Some(0.00015)); assert_eq!(config.deep.cost_per_1k_output, Some(0.0006)); @@ -118,6 +141,28 @@ additional = ["./custom-rules"] assert_eq!(config.rules.additional, vec!["./custom-rules"]); } + #[test] + fn parse_subprocess_deep_config() { + // Mirror of `parse_full_config` for the subprocess transport, + // pinned so the new fields can't silently break deserialization. + let toml = r#" +[deep] +mode = "subprocess" +agent_cmd = "claude -p --output-format json" +agent_timeout_secs = 300 +"#; + let config: ZiftConfig = toml::from_str(toml).unwrap(); + assert_eq!(config.deep.mode.as_deref(), Some("subprocess")); + assert_eq!( + config.deep.agent_cmd.as_deref(), + Some("claude -p --output-format json"), + ); + assert_eq!(config.deep.agent_timeout_secs, Some(300)); + // Subprocess mode legitimately leaves HTTP fields blank. + assert!(config.deep.base_url.is_none()); + assert!(config.deep.model.is_none()); + } + #[test] fn parse_partial_config() { let toml = r#" diff --git a/src/deep/analyzer.rs b/src/deep/analyzer.rs new file mode 100644 index 0000000..50fbed0 --- /dev/null +++ b/src/deep/analyzer.rs @@ -0,0 +1,57 @@ +//! Transport-agnostic analyzer interface. +//! +//! `Analyzer` is the seam between [`crate::deep::run`] and the concrete +//! transports — HTTP today, subprocess (PR 3) tomorrow. Adding a new +//! transport means writing one `impl Analyzer` and dispatching on +//! [`crate::deep::config::DeepMode`]; the orchestrator, candidate +//! selection, prompt rendering, schema, merge, and cost tracker all stay +//! unchanged. +//! +//! The shared types ([`AnalyzeResponse`], [`TokenUsage`]) live here so +//! transports can depend on the trait module without taking a transitive +//! dep on each other (the HTTP client must not know the subprocess +//! client exists, and vice versa). + +use crate::deep::error::DeepError; +use crate::deep::finding::SemanticFinding; +use crate::deep::prompt::RenderedPrompt; + +/// One round-trip's worth of model output: the parsed semantic findings +/// plus token-usage stats for cost tracking. +/// +/// Transports that don't have a notion of token usage (subprocess hooks +/// shelling out to opaque CLIs) populate [`TokenUsage::default`], which +/// the cost tracker treats as a no-op. +#[derive(Debug)] +pub struct AnalyzeResponse { + pub findings: Vec, + pub usage: TokenUsage, +} + +/// Token counts reported by the upstream model. `0` for both fields means +/// "no usage reported" — the cost tracker short-circuits to a no-op when +/// either rate is zero, so an empty `TokenUsage` from a transport that +/// can't measure tokens (e.g. subprocess) costs nothing. +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] +pub struct TokenUsage { + pub input_tokens: u32, + pub output_tokens: u32, +} + +/// Send one prompt to a transport and parse the response. +/// +/// Implementations are expected to be cheap to construct (one per +/// `deep::run`) and safe to call serially from the orchestrator. The +/// orchestrator currently dispatches sequentially; a future fan-out +/// would require `Sync`, which we'll add when (if) we wire that up. +pub trait Analyzer { + /// Errors must map to the categories [`crate::deep::run`] handles: + /// + /// - [`DeepError::Config`] / [`DeepError::Io`]: hard fail the whole + /// deep pass — these are operator-actionable misconfiguration or + /// system-level failures, not per-call hiccups. + /// - [`DeepError::Http`], [`DeepError::BadResponse`], + /// [`DeepError::Transient`], [`DeepError::Timeout`]: skip the + /// candidate, continue dispatching others. Best-effort enrichment. + fn analyze(&self, prompt: &RenderedPrompt) -> Result; +} diff --git a/src/deep/candidate.rs b/src/deep/candidate.rs index b81691e..67388ed 100644 --- a/src/deep/candidate.rs +++ b/src/deep/candidate.rs @@ -352,6 +352,7 @@ mod tests { fn rt() -> DeepRuntime { DeepRuntime { + mode: crate::deep::config::DeepMode::Http, base_url: "http://x/v1".into(), model: "m".into(), api_key: None, @@ -365,6 +366,8 @@ mod tests { max_prompt_chars: 16_000, excludes: Vec::new(), language_filter: Vec::new(), + agent_cmd: None, + agent_timeout_secs: 600, } } diff --git a/src/deep/client.rs b/src/deep/client.rs index 2e6f637..0343a93 100644 --- a/src/deep/client.rs +++ b/src/deep/client.rs @@ -11,6 +11,7 @@ //! emit JSON in the message body anyway. The retry strips the directive //! and re-parses; if that still fails, we return [`DeepError::BadResponse`]. +use crate::deep::analyzer::Analyzer; use crate::deep::config::DeepRuntime; use crate::deep::error::DeepError; use crate::deep::finding::SemanticFinding; @@ -18,17 +19,10 @@ use crate::deep::prompt::RenderedPrompt; use serde::Deserialize; use std::time::Duration; -#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] -pub struct TokenUsage { - pub input_tokens: u32, - pub output_tokens: u32, -} - -#[derive(Debug)] -pub struct AnalyzeResponse { - pub findings: Vec, - pub usage: TokenUsage, -} +// Re-export so existing call sites (`use zift::deep::client::TokenUsage`) +// keep compiling after the move to `analyzer.rs`. Keeps the diff minimal +// and avoids a churn ripple across the integration test suite. +pub use crate::deep::analyzer::{AnalyzeResponse, TokenUsage}; pub struct OpenAiCompatibleClient { http: reqwest::blocking::Client, @@ -209,11 +203,21 @@ impl OpenAiCompatibleClient { } } +impl Analyzer for OpenAiCompatibleClient { + fn analyze(&self, prompt: &RenderedPrompt) -> Result { + OpenAiCompatibleClient::analyze(self, prompt) + } +} + /// Strip a leading/trailing markdown fence if present, regardless of the /// optional language tag (` ```json `, ` ```javascript `, plain ` ``` `, …). /// Some local models wrap JSON in fences despite system-prompt instructions /// not to. -fn strip_markdown_fence(s: &str) -> &str { +/// +/// `pub(crate)` so the subprocess transport ([`crate::deep::subprocess`]) +/// can reuse it — agent CLIs that wrap LLMs (e.g. `claude -p`) emit fenced +/// JSON for the same reason raw chat-completions endpoints do. +pub(crate) fn strip_markdown_fence(s: &str) -> &str { let trimmed = s.trim(); let after_fence = match trimmed.strip_prefix("```") { Some(rest) => { @@ -233,7 +237,10 @@ fn strip_markdown_fence(s: &str) -> &str { .trim() } -fn truncate_for_log(s: &str) -> String { +/// `pub(crate)` for the same reason as [`strip_markdown_fence`] — both +/// transports want truncated, UTF-8-safe debug previews of agent output +/// without leaking full prompts/source into log strings. +pub(crate) fn truncate_for_log(s: &str) -> String { const MAX: usize = 200; if s.len() <= MAX { s.to_string() diff --git a/src/deep/config.rs b/src/deep/config.rs index 33d2f81..2f923ed 100644 --- a/src/deep/config.rs +++ b/src/deep/config.rs @@ -12,6 +12,22 @@ use crate::config::ZiftConfig; use crate::deep::error::DeepError; use crate::types::Language; +/// Which transport [`crate::deep::run`] dispatches to per request. +/// +/// Resolution lives in [`build`] — `--agent-cmd` on the CLI implies +/// [`DeepMode::Subprocess`], `--base-url` implies [`DeepMode::Http`], and +/// `[deep] mode = "..."` in `.zift.toml` is honored when no CLI flag +/// pins the choice. Default is [`DeepMode::Http`] — preserves the +/// behavior shipped in PR 1. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum DeepMode { + /// OpenAI-compatible chat-completions over HTTP. PR 1's transport. + Http, + /// Spawn an arbitrary command per request; speak the JSON envelope + /// contract over stdin/stdout. PR 3's transport. + Subprocess, +} + /// Resolved runtime configuration for the deep (semantic) scan. /// /// `Debug` is implemented manually to redact `api_key` — derive(Debug) would @@ -19,6 +35,8 @@ use crate::types::Language; /// call site (none today, but defense in depth). #[derive(Clone)] pub struct DeepRuntime { + /// Selected transport. See [`DeepMode`]. + pub mode: DeepMode, pub base_url: String, pub model: String, pub api_key: Option, @@ -37,11 +55,21 @@ pub struct DeepRuntime { /// Language filter from `--language`. Empty == all languages. Forwarded /// to cold-region file discovery. pub language_filter: Vec, + /// Shell command line invoked per request when [`Self::mode`] is + /// [`DeepMode::Subprocess`]. `None` for HTTP. Validated to be present + /// in [`build`] when subprocess mode is selected. + pub agent_cmd: Option, + /// Per-request timeout for the subprocess transport. Distinct from + /// [`Self::request_timeout_secs`] (HTTP) because LLM CLIs are + /// noticeably slower than HTTP — `claude -p` cold-starts can take + /// 30+ seconds before producing a token. + pub agent_timeout_secs: u64, } impl std::fmt::Debug for DeepRuntime { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("DeepRuntime") + .field("mode", &self.mode) .field("base_url", &self.base_url) .field("model", &self.model) .field("api_key", &self.api_key.as_ref().map(|_| "")) @@ -55,6 +83,8 @@ impl std::fmt::Debug for DeepRuntime { .field("max_prompt_chars", &self.max_prompt_chars) .field("excludes", &self.excludes) .field("language_filter", &self.language_filter) + .field("agent_cmd", &self.agent_cmd) + .field("agent_timeout_secs", &self.agent_timeout_secs) .finish() } } @@ -79,6 +109,10 @@ const DEFAULT_MAX_PROMPT_CHARS: usize = 16_000; const DEFAULT_TEMPERATURE: f32 = 0.0; const DEFAULT_REMOTE_CONCURRENCY: usize = 4; const DEFAULT_LOCAL_CONCURRENCY: usize = 1; +/// Subprocess agents (e.g. `claude -p`) cold-start much slower than raw +/// HTTP — 30s+ before the first token is normal. 600s (10 min) is the +/// plan-recommended ceiling: generous, not pathological. +const DEFAULT_AGENT_TIMEOUT_SECS: u64 = 600; /// Heuristic check: is this base_url pointing at a local server? /// @@ -93,11 +127,142 @@ fn is_localhost(base_url: &str) -> bool { || lower.contains("://0.0.0.0") } +/// Resolve which transport mode to use from CLI flags, config, and +/// inference. Locked rules: +/// +/// - `--agent-cmd` on the CLI **always** selects subprocess. It is +/// mutually exclusive with `--base-url` (different transports; +/// accepting both would just hide the conflict). +/// - `--base-url` on the CLI **always** selects HTTP. +/// - With no CLI override, an explicit `[deep] mode = "..."` wins. +/// - With neither, infer from which `[deep]` fields are populated. If +/// both are populated, demand the user disambiguate — silently picking +/// one would let a typo flip the transport. +pub(crate) fn resolve_mode(args: &ScanArgs, config: &ZiftConfig) -> Result { + let cli_agent = args.agent_cmd.as_deref().is_some_and(|s| !s.is_empty()); + let cli_base_url = args.base_url.as_deref().is_some_and(|s| !s.is_empty()); + + if cli_agent && cli_base_url { + return Err(DeepError::Config( + "--agent-cmd and --base-url are mutually exclusive — they select \ + different transports (subprocess vs HTTP); pass exactly one" + .into(), + )); + } + if cli_agent { + return Ok(DeepMode::Subprocess); + } + if cli_base_url { + return Ok(DeepMode::Http); + } + + if let Some(m) = config.deep.mode.as_deref().map(str::trim) + && !m.is_empty() + { + return match m.to_ascii_lowercase().as_str() { + "http" => Ok(DeepMode::Http), + "subprocess" => Ok(DeepMode::Subprocess), + other => Err(DeepError::Config(format!( + "[deep] mode must be \"http\" or \"subprocess\" (got {other:?})" + ))), + }; + } + + // No CLI flag, no config mode — infer from which fields are populated. + // Prefer the field the user actually configured; demand disambiguation + // if both are set so a stale field doesn't silently flip transports. + let cfg_agent = config + .deep + .agent_cmd + .as_deref() + .is_some_and(|s| !s.trim().is_empty()); + let cfg_base = config + .deep + .base_url + .as_deref() + .is_some_and(|s| !s.trim().is_empty()); + match (cfg_agent, cfg_base) { + (true, false) => Ok(DeepMode::Subprocess), + (false, true) => Ok(DeepMode::Http), + // Default when nothing is configured: HTTP. The Http branch will then + // hard-fail on missing base_url/model — same behavior as PR 1 shipped. + (false, false) => Ok(DeepMode::Http), + (true, true) => Err(DeepError::Config( + "[deep] sets both base_url and agent_cmd — set [deep] mode = \"http\" \ + or \"subprocess\" to disambiguate, or pass --base-url / --agent-cmd \ + on the CLI" + .into(), + )), + } +} + /// Resolve CLI args + config-file values into a [`DeepRuntime`]. /// -/// Validates required fields; returns [`DeepError::Config`] on missing -/// `base_url` or `model`. +/// Branches on transport mode (see [`resolve_mode`]). Returns +/// [`DeepError::Config`] on missing required fields per the selected +/// mode (`base_url`/`model` for HTTP, `agent_cmd` for subprocess). pub fn build(args: &ScanArgs, config: &ZiftConfig) -> Result { + let mode = resolve_mode(args, config)?; + + let api_key = args.api_key.clone().filter(|s| !s.is_empty()); + let max_cost_usd = + validate_non_negative_finite("max_cost", args.max_cost.or(config.deep.max_cost))?; + let cost_per_1k_input = + validate_non_negative_finite("cost_per_1k_input", config.deep.cost_per_1k_input)?; + let cost_per_1k_output = + validate_non_negative_finite("cost_per_1k_output", config.deep.cost_per_1k_output)?; + + // Warn if a cap is set but no rates are configured — the tracker + // short-circuits when both rates are 0, so the cap would never bind. + // Subprocess transport doesn't track tokens at all, so the warning is + // even more relevant there: spend ceilings have no effect on agent + // CLIs unless the user wraps them in something that returns usage. + let no_rates = + cost_per_1k_input.unwrap_or(0.0) == 0.0 && cost_per_1k_output.unwrap_or(0.0) == 0.0; + if max_cost_usd.is_some() && no_rates { + tracing::warn!( + "--max-cost is set but [deep] cost_per_1k_input / \ + cost_per_1k_output are not configured in .zift.toml — spend \ + tracking is a no-op without rates" + ); + } + + // Merge excludes from config + CLI; preserve CLI ordering after config. + let mut excludes = config.scan.exclude.clone(); + excludes.extend(args.exclude.iter().cloned()); + + match mode { + DeepMode::Http => build_http( + args, + config, + api_key, + max_cost_usd, + cost_per_1k_input, + cost_per_1k_output, + excludes, + ), + DeepMode::Subprocess => build_subprocess( + args, + config, + api_key, + max_cost_usd, + cost_per_1k_input, + cost_per_1k_output, + excludes, + ), + } +} + +#[allow(clippy::too_many_arguments)] +fn build_http( + args: &ScanArgs, + config: &ZiftConfig, + api_key: Option, + max_cost_usd: Option, + cost_per_1k_input: Option, + cost_per_1k_output: Option, + excludes: Vec, +) -> Result { let base_url = args .base_url .clone() @@ -144,37 +309,14 @@ pub fn build(args: &ScanArgs, config: &ZiftConfig) -> Result Result, + max_cost_usd: Option, + cost_per_1k_input: Option, + cost_per_1k_output: Option, + excludes: Vec, +) -> Result { + let agent_cmd = args + .agent_cmd + .clone() + .or_else(|| config.deep.agent_cmd.clone()) + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()) + .ok_or_else(|| { + DeepError::Config( + "subprocess mode requires --agent-cmd \ + (or [deep] agent_cmd in .zift.toml)" + .into(), + ) + })?; + + let agent_timeout_secs = config + .deep + .agent_timeout_secs + .unwrap_or(DEFAULT_AGENT_TIMEOUT_SECS); + + // Subprocess agents serialize internally on the agent's side (one + // `claude -p` invocation, one model call); spawning N of them in + // parallel rarely improves throughput and frequently competes for + // the agent's own rate-limit budget. Default to 1; users override + // via shell tooling, not Zift. + let max_concurrent = DEFAULT_LOCAL_CONCURRENCY; + + Ok(DeepRuntime { + mode: DeepMode::Subprocess, + // base_url and model are HTTP-only. Empty in subprocess mode — + // the HTTP client is never instantiated, so these never reach + // the wire. Documented on `DeepRuntime`. + base_url: String::new(), + model: String::new(), + api_key, + max_cost_usd, + cost_per_1k_input, + cost_per_1k_output, + request_timeout_secs: DEFAULT_REQUEST_TIMEOUT_SECS, + max_candidates: DEFAULT_MAX_CANDIDATES, + max_concurrent, + temperature: DEFAULT_TEMPERATURE, + max_prompt_chars: DEFAULT_MAX_PROMPT_CHARS, + excludes, + language_filter: args.language.clone(), + agent_cmd: Some(agent_cmd), + agent_timeout_secs, }) } @@ -293,6 +496,7 @@ mod tests { #[test] fn debug_format_redacts_api_key() { let runtime = DeepRuntime { + mode: DeepMode::Http, base_url: "http://x/v1".into(), model: "m".into(), api_key: Some("sk-supersecret".into()), @@ -306,6 +510,8 @@ mod tests { max_prompt_chars: 16_000, excludes: Vec::new(), language_filter: Vec::new(), + agent_cmd: None, + agent_timeout_secs: 600, }; let formatted = format!("{runtime:?}"); assert!(!formatted.contains("sk-supersecret")); @@ -476,4 +682,187 @@ mod tests { assert_eq!(runtime.max_prompt_chars, DEFAULT_MAX_PROMPT_CHARS); assert_eq!(runtime.temperature, DEFAULT_TEMPERATURE); } + + // -- Subprocess mode resolution --------------------------------------- + + fn subprocess_args(agent_cmd: Option<&str>) -> ScanArgs { + ScanArgs { + deep: true, + agent_cmd: agent_cmd.map(String::from), + ..ScanArgs::default() + } + } + + #[test] + fn cli_agent_cmd_selects_subprocess_mode() { + let args = subprocess_args(Some("claude -p --output-format json")); + let runtime = build(&args, &ZiftConfig::default()).unwrap(); + assert_eq!(runtime.mode, DeepMode::Subprocess); + assert_eq!( + runtime.agent_cmd.as_deref(), + Some("claude -p --output-format json") + ); + // base_url and model are HTTP-only — empty in subprocess mode. + assert!(runtime.base_url.is_empty()); + assert!(runtime.model.is_empty()); + } + + #[test] + fn cli_agent_cmd_and_base_url_are_mutually_exclusive() { + // Both pin different transports; refuse to silently pick one. + let mut args = subprocess_args(Some("claude -p")); + args.base_url = Some("http://x/v1".into()); + args.model = Some("m".into()); + let err = build(&args, &ZiftConfig::default()).unwrap_err(); + assert!( + matches!(err, DeepError::Config(ref msg) if msg.contains("mutually exclusive")), + "expected Config(), got: {err:?}", + ); + } + + #[test] + fn config_mode_subprocess_requires_agent_cmd() { + // Explicit `mode = "subprocess"` without `agent_cmd` is a hard + // error — otherwise the failure moves to spawn-time as an opaque + // empty-command crash. + let config = config_with(DeepConfig { + mode: Some("subprocess".into()), + ..DeepConfig::default() + }); + // No CLI flags — fall through to config. + let args = ScanArgs { + deep: true, + ..ScanArgs::default() + }; + let err = build(&args, &config).unwrap_err(); + assert!( + matches!(err, DeepError::Config(ref msg) if msg.contains("agent_cmd")), + "expected Config(), got: {err:?}", + ); + } + + #[test] + fn config_agent_cmd_without_explicit_mode_infers_subprocess() { + // Single populated transport field → infer that mode. No need to + // require users to spell out `mode = "subprocess"` redundantly. + let config = config_with(DeepConfig { + agent_cmd: Some("claude -p".into()), + ..DeepConfig::default() + }); + let args = ScanArgs { + deep: true, + ..ScanArgs::default() + }; + let runtime = build(&args, &config).unwrap(); + assert_eq!(runtime.mode, DeepMode::Subprocess); + assert_eq!(runtime.agent_cmd.as_deref(), Some("claude -p")); + } + + #[test] + fn config_with_both_transport_fields_demands_disambiguation() { + // Both base_url AND agent_cmd are populated, no explicit mode — + // refuse to silently pick. A stale config field would otherwise + // flip the transport and the user would never notice until the + // wrong agent ran. + let config = config_with(DeepConfig { + base_url: Some("http://x/v1".into()), + agent_cmd: Some("claude -p".into()), + model: Some("m".into()), + ..DeepConfig::default() + }); + let args = ScanArgs { + deep: true, + ..ScanArgs::default() + }; + let err = build(&args, &config).unwrap_err(); + assert!( + matches!(err, DeepError::Config(ref msg) if msg.contains("disambiguate")), + "expected Config(), got: {err:?}", + ); + } + + #[test] + fn invalid_mode_string_rejected() { + let config = config_with(DeepConfig { + mode: Some("rest".into()), + ..DeepConfig::default() + }); + let args = ScanArgs { + deep: true, + ..ScanArgs::default() + }; + let err = build(&args, &config).unwrap_err(); + assert!(matches!(err, DeepError::Config(_))); + } + + #[test] + fn cli_agent_cmd_overrides_config_mode_http() { + // Edge: config says http, CLI says agent_cmd. CLI wins. + let config = config_with(DeepConfig { + mode: Some("http".into()), + base_url: Some("http://x/v1".into()), + model: Some("m".into()), + ..DeepConfig::default() + }); + let args = subprocess_args(Some("claude -p")); + let runtime = build(&args, &config).unwrap(); + assert_eq!(runtime.mode, DeepMode::Subprocess); + } + + #[test] + fn subprocess_caps_concurrency_to_one() { + let args = subprocess_args(Some("claude -p")); + let runtime = build(&args, &ZiftConfig::default()).unwrap(); + assert_eq!(runtime.max_concurrent, DEFAULT_LOCAL_CONCURRENCY); + } + + #[test] + fn subprocess_default_timeout_is_600s() { + let args = subprocess_args(Some("claude -p")); + let runtime = build(&args, &ZiftConfig::default()).unwrap(); + assert_eq!(runtime.agent_timeout_secs, DEFAULT_AGENT_TIMEOUT_SECS); + assert_eq!(runtime.agent_timeout_secs, 600); + } + + #[test] + fn subprocess_timeout_loaded_from_config() { + let config = config_with(DeepConfig { + agent_cmd: Some("claude -p".into()), + agent_timeout_secs: Some(120), + ..DeepConfig::default() + }); + let args = ScanArgs { + deep: true, + ..ScanArgs::default() + }; + let runtime = build(&args, &config).unwrap(); + assert_eq!(runtime.agent_timeout_secs, 120); + } + + #[test] + fn whitespace_only_agent_cmd_treated_as_missing() { + for cmd in [" ", "\t", "\n", " \t\n "] { + let args = subprocess_args(Some(cmd)); + let err = build(&args, &ZiftConfig::default()).unwrap_err(); + assert!( + matches!(err, DeepError::Config(_)), + "expected Config error for agent_cmd={cmd:?}, got: {err:?}", + ); + } + } + + #[test] + fn subprocess_inherits_excludes_and_language_filter() { + // Cold-region discovery is mode-agnostic — both transports must + // honor the same scope filters, otherwise subprocess users would + // get a different file set than HTTP users from the same config. + let mut zcfg = ZiftConfig::default(); + zcfg.scan.exclude = vec!["vendor/**".into()]; + let mut args = subprocess_args(Some("claude -p")); + args.exclude = vec!["cli/**".into()]; + args.language = vec![Language::Java]; + let runtime = build(&args, &zcfg).unwrap(); + assert_eq!(runtime.excludes, vec!["vendor/**", "cli/**"]); + assert_eq!(runtime.language_filter, vec![Language::Java]); + } } diff --git a/src/deep/cost.rs b/src/deep/cost.rs index 3efab92..23fe0db 100644 --- a/src/deep/cost.rs +++ b/src/deep/cost.rs @@ -5,7 +5,7 @@ //! running concurrent requests. If both per-1k rates are unset (or zero), //! tracking is a no-op and `record` always returns `Ok`. -use crate::deep::client::TokenUsage; +use crate::deep::analyzer::TokenUsage; use crate::deep::config::DeepRuntime; use crate::deep::error::DeepError; use std::sync::atomic::{AtomicU64, Ordering}; @@ -74,6 +74,7 @@ mod tests { fn rt(cap: Option, in_rate: Option, out_rate: Option) -> DeepRuntime { DeepRuntime { + mode: crate::deep::config::DeepMode::Http, base_url: "http://x/v1".into(), model: "m".into(), api_key: None, @@ -87,6 +88,8 @@ mod tests { max_prompt_chars: 16_000, excludes: Vec::new(), language_filter: Vec::new(), + agent_cmd: None, + agent_timeout_secs: 600, } } diff --git a/src/deep/mod.rs b/src/deep/mod.rs index 56b8b5b..d038b31 100644 --- a/src/deep/mod.rs +++ b/src/deep/mod.rs @@ -5,6 +5,7 @@ //! The primitives in this module are intentionally transport-agnostic so //! that PR 2 (MCP server) and PR 3 (subprocess hook) can reuse them. +pub mod analyzer; pub mod candidate; pub mod client; pub mod config; @@ -14,6 +15,7 @@ pub mod error; pub mod finding; pub mod merge; pub mod prompt; +pub mod subprocess; pub use config::DeepRuntime; pub use error::DeepError; @@ -56,7 +58,14 @@ pub fn run( runtime.max_candidates ); - let client = client::OpenAiCompatibleClient::new(runtime)?; + // Dispatch on transport mode: subprocess hooks (Tier 3) shell out to an + // arbitrary command speaking our JSON contract; HTTP (Tier 2) speaks the + // OpenAI chat-completions dialect. The trait keeps the rest of `run` + // identical between transports. + let analyzer: Box = match runtime.mode { + config::DeepMode::Http => Box::new(client::OpenAiCompatibleClient::new(runtime)?), + config::DeepMode::Subprocess => Box::new(subprocess::SubprocessClient::new(runtime)?), + }; let cost_tracker = cost::CostTracker::new(runtime); // Index structural findings by id so we can look up the seed Finding for @@ -81,7 +90,7 @@ pub fn run( structural_finding: seed, }); - let response = match client.analyze(&prompt) { + let response = match analyzer.analyze(&prompt) { Ok(r) => r, Err(DeepError::Http(e)) => { tracing::warn!( diff --git a/src/deep/subprocess.rs b/src/deep/subprocess.rs new file mode 100644 index 0000000..25d9e06 --- /dev/null +++ b/src/deep/subprocess.rs @@ -0,0 +1,598 @@ +//! Subprocess hook transport (Tier 3 of the deep-mode design). +//! +//! Spawns a user-supplied shell command per request, writes a single +//! JSON envelope to its stdin, and reads the deep-mode response schema +//! from its stdout. The escape hatch for any agent that doesn't fit the +//! MCP server (Tier 1) or the OpenAI-compatible HTTP client (Tier 2): +//! +//! ```text +//! claude -p --output-format json +//! aider --no-auto-commits +//! ./my-wrapper.sh # arbitrary user script +//! ``` +//! +//! Wire format on stdin (one line, then EOF): +//! +//! ```json +//! {"system": "...", "user": "...", "schema": { ... }} +//! ``` +//! +//! Wire format expected on stdout (parsed verbatim, optional markdown +//! fence stripped): +//! +//! ```json +//! {"findings": [{"line_start": 12, "line_end": 18, ...}]} +//! ``` +//! +//! Both shapes are identical to the HTTP transport's contract — that's +//! deliberate: agent CLIs that wrap real LLMs can route system/user +//! straight through, and we never fork the schema between transports. +//! +//! ## Cost tracking +//! +//! N/A. Subprocess agents don't return token counts in any standard +//! way; [`crate::deep::analyzer::TokenUsage::default`] short-circuits +//! the cost tracker. Users wanting a ceiling enforce it externally +//! (timeouts, ulimits, wrapper scripts). +//! +//! ## Security +//! +//! The user supplies an arbitrary shell command. If `.zift.toml` is +//! checked in and Zift is run by another user (CI, shared dev box), +//! that's a footgun — same threat as `.editorconfig`-style attacks. We +//! document the risk; we don't sandbox. + +use crate::deep::analyzer::{AnalyzeResponse, Analyzer, TokenUsage}; +use crate::deep::client::{strip_markdown_fence, truncate_for_log}; +use crate::deep::config::DeepRuntime; +use crate::deep::error::DeepError; +use crate::deep::finding::SemanticFinding; +use crate::deep::prompt::RenderedPrompt; +use serde::Deserialize; +use std::io::{Read, Write}; +use std::process::{Command, Stdio}; +use std::sync::mpsc; +use std::thread; +use std::time::{Duration, Instant}; + +/// `Debug` is implemented manually so `Result::unwrap_err` +/// works in tests (the std `unwrap_err` requires `Self: Debug`) without +/// printing the raw command string. Users sometimes inline API keys or +/// bearer tokens directly in `agent_cmd` (e.g. +/// `claude -p --api-key sk-...`); a derived `Debug` would echo those +/// secrets through any panic, `unwrap_err`, or `?`-bubbled error log. +pub struct SubprocessClient { + /// Shell command line, as supplied by the user. Passed to the + /// platform shell (`sh -c` on Unix, `cmd /C` on Windows). Treated + /// as potentially-sensitive — never formatted into errors or logs. + cmd: String, + /// Wall-clock ceiling for one request. On expiry the child is + /// killed and [`DeepError::Timeout`] is returned. + timeout: Duration, +} + +impl std::fmt::Debug for SubprocessClient { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("SubprocessClient") + .field("cmd", &"") + .field("timeout", &self.timeout) + .finish() + } +} + +impl SubprocessClient { + pub fn new(runtime: &DeepRuntime) -> Result { + let cmd = runtime + .agent_cmd + .clone() + .ok_or_else(|| { + // Belt-and-suspenders: `deep::config::build` already + // validates this. If we ever reach here, the runtime + // was hand-constructed with a bug — fail loud. + DeepError::Config( + "subprocess analyzer constructed without agent_cmd \ + (runtime invariant violated)" + .into(), + ) + })? + .trim() + .to_string(); + if cmd.is_empty() { + return Err(DeepError::Config( + "subprocess agent_cmd is empty after trim".into(), + )); + } + Ok(Self { + cmd, + timeout: Duration::from_secs(runtime.agent_timeout_secs), + }) + } + + /// Spawn the agent, write the JSON envelope, read stdout to EOF, + /// parse, and return. + fn run_once(&self, prompt: &RenderedPrompt) -> Result { + let envelope = build_envelope(prompt); + + // Spawn through the platform shell so users can supply pipelines + // (`claude -p | jq ...`). On Unix that's `sh -c `; on + // Windows it's `cmd /C `. + let mut child = shell_command(&self.cmd) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .map_err(|e| { + // ENOENT / permission errors at spawn time are + // operator-actionable misconfiguration (typo in + // `agent_cmd`, missing binary), not transient. Surface + // as `Config` so the orchestrator hard-fails the whole + // deep run rather than silently skipping every + // candidate — every spawn would fail identically. + // + // We deliberately do NOT include `self.cmd` in the + // message: users sometimes inline API keys/tokens in + // the command string, and this error can be logged or + // surfaced verbatim by callers. The OS error itself + // ("No such file or directory", "Permission denied") + // is enough to diagnose typo/missing-binary cases. + DeepError::Config(format!("failed to spawn agent_cmd: {e}")) + })?; + + let mut stdin = child + .stdin + .take() + .expect("stdin pipe was requested via Stdio::piped"); + let stdout = child + .stdout + .take() + .expect("stdout pipe was requested via Stdio::piped"); + let stderr = child + .stderr + .take() + .expect("stderr pipe was requested via Stdio::piped"); + + // Writer thread: writes the envelope and drops stdin (closes + // the pipe so the child sees EOF). On its own thread because + // pipes block at ~64KB on Linux when the child isn't reading; + // a synchronous write_all could deadlock against a buggy + // agent that exits without consuming stdin. + let envelope_bytes = envelope.into_bytes(); + let writer = thread::spawn(move || -> std::io::Result<()> { + stdin.write_all(&envelope_bytes)?; + stdin.flush()?; + // Drop stdin → pipe closes → child sees EOF. + drop(stdin); + Ok(()) + }); + + // Reader threads for stdout/stderr. Both must run on + // background threads for the same backpressure reason as the + // writer: a chatty agent that fills the pipe before exiting + // would otherwise deadlock with the writer. + let (stdout_tx, stdout_rx) = mpsc::channel::>(); + let _stdout_thread = thread::spawn(move || { + let mut buf = String::new(); + let mut handle = stdout; + let res = handle.read_to_string(&mut buf).map(|_| buf); + // Receiver may have hung up if we timed out — ignore. + let _ = stdout_tx.send(res); + }); + + let (stderr_tx, stderr_rx) = mpsc::channel::(); + let _stderr_thread = thread::spawn(move || { + let mut buf = String::new(); + let mut handle = stderr; + let _ = handle.read_to_string(&mut buf); + let _ = stderr_tx.send(buf); + }); + + // Bound the wait by polling `try_wait`. Without `wait_timeout` + // this is the simplest portable approach; 50ms granularity is + // fine since real agent latencies are seconds-to-minutes. + let start = Instant::now(); + let exit = loop { + match child.try_wait() { + Ok(Some(status)) => break status, + Ok(None) => { + if start.elapsed() >= self.timeout { + // Kill the entire process tree (group on Unix), + // reap the immediate child, and let the reader + // threads drain as the pipes close. Killing the + // whole group matters because `sh -c 'cmd'` on + // Linux dash forks `cmd` rather than execing + // into it — leaving the immediate `sh` reaped + // but `cmd` orphaned with our pipes open. + #[cfg(unix)] + kill_process_tree(&child); + #[cfg(not(unix))] + kill_process_tree(&mut child); + let _ = child.wait(); + let _ = writer.join(); + // Bound the drain so a misbehaving descendant + // that somehow survived `SIGKILL` (unkillable + // kernel state, ptrace stop, etc.) cannot hang + // the analyzer. 500ms is well past the kernel's + // signal-delivery latency in practice. + let drain_timeout = Duration::from_millis(500); + let _ = stdout_rx.recv_timeout(drain_timeout); + let _ = stderr_rx.recv_timeout(drain_timeout); + return Err(DeepError::Timeout { + secs: self.timeout.as_secs(), + }); + } + thread::sleep(Duration::from_millis(50)); + } + Err(e) => { + return Err(DeepError::Io(e)); + } + } + }; + + // Drain background threads. EPIPE on writer is OK if the child + // exited before reading stdin (e.g. "help" mode that ignores + // input); only log it. + if let Ok(Err(e)) = writer.join() { + tracing::debug!("subprocess: writer error (likely EPIPE on early exit): {e}"); + } + + // Bound stdout/stderr reads by the remaining wall-clock budget. + // `try_wait` above only watches the immediate shell child, so a + // wrapper like `sh -c 'sleep 30 & printf "{...}"'` makes the + // shell exit promptly while a backgrounded grandchild keeps + // our pipes open. An unbounded `recv()` would then hang past + // `agent_timeout_secs`. Using `recv_timeout(remaining)` keeps + // the wall-clock contract intact end-to-end. + let remaining = self.timeout.saturating_sub(start.elapsed()); + let stdout_buf = match stdout_rx.recv_timeout(remaining) { + Ok(res) => res.map_err(DeepError::Io)?, + Err(mpsc::RecvTimeoutError::Timeout) => { + // Same teardown as the in-loop timeout branch: kill the + // process group so any backgrounded descendant releases + // our pipes, then drain stderr briefly. + #[cfg(unix)] + kill_process_tree(&child); + #[cfg(not(unix))] + kill_process_tree(&mut child); + let _ = stderr_rx.recv_timeout(Duration::from_millis(500)); + return Err(DeepError::Timeout { + secs: self.timeout.as_secs(), + }); + } + Err(mpsc::RecvTimeoutError::Disconnected) => { + return Err(DeepError::BadResponse( + "subprocess stdout reader disconnected".into(), + )); + } + }; + // Stderr is best-effort: cap at a short timeout regardless of + // remaining budget so a stuck stderr pipe (rare, but possible + // with weird LD_PRELOAD shims) can't extend the request. + let stderr_buf = stderr_rx + .recv_timeout(Duration::from_millis(500)) + .unwrap_or_default(); + + if !exit.success() { + // Surface as `BadResponse` so the orchestrator skips this + // candidate but keeps going — same per-candidate-skip path + // as malformed JSON. Avoid leaking stderr verbatim into + // the error message (it can echo prompt text); cap and log + // to debug instead. + tracing::debug!( + exit = ?exit, + stderr = %truncate_for_log(&stderr_buf), + stdout = %truncate_for_log(&stdout_buf), + "subprocess: agent_cmd exited nonzero", + ); + return Err(DeepError::BadResponse(format!( + "agent_cmd exited with {} (no findings parsed)", + exit_status_brief(&exit), + ))); + } + + // Parse stdout as our findings envelope. Same fence-stripping + // and same truncated-debug-log discipline as the HTTP client — + // model-or-CLI output may mirror prompt text and should not + // appear verbatim in error strings. + let cleaned = strip_markdown_fence(&stdout_buf); + let parsed: FindingsEnvelope = serde_json::from_str(cleaned).map_err(|e| { + tracing::debug!( + error = %e, + preview = %truncate_for_log(&stdout_buf), + stderr_preview = %truncate_for_log(&stderr_buf), + "subprocess: stdout was not valid findings JSON", + ); + DeepError::BadResponse("agent_cmd output was not valid findings JSON".into()) + })?; + + Ok(AnalyzeResponse { + findings: parsed.findings, + // Subprocess agents don't report tokens in a standard way. + // The cost tracker short-circuits on default usage. + usage: TokenUsage::default(), + }) + } +} + +impl Analyzer for SubprocessClient { + fn analyze(&self, prompt: &RenderedPrompt) -> Result { + self.run_once(prompt) + } +} + +/// JSON envelope written verbatim to the agent's stdin. +fn build_envelope(prompt: &RenderedPrompt) -> String { + // Use serde_json directly so any future field additions ride the + // same canonical-JSON path as the HTTP transport's request body. + serde_json::json!({ + "system": prompt.system, + "user": prompt.user, + "schema": prompt.schema, + }) + .to_string() +} + +/// Construct a [`Command`] that runs `cmd` through the platform shell. +/// Unix → `sh -c`; Windows → `cmd /C`. Allowing a shell parse keeps the +/// CLI surface friendly (pipes, redirects, env-var expansion) at the +/// cost of inheriting whatever quoting the user's shell does — same +/// trade-off as `npm scripts` or `Makefile` recipes. +/// +/// On Unix the child is placed in its own session/process group via +/// `setsid` in a pre-exec hook so [`kill_process_tree`] can later send +/// `SIGKILL` to the entire tree. Without that, `sh -c 'sleep 30'` on +/// Linux dash forks `sleep` as a grandchild — killing the immediate +/// child reaps `sh` but leaves `sleep` running with our pipes still +/// open, and the reader threads block until `sleep` finishes naturally. +#[cfg(unix)] +fn shell_command(cmd: &str) -> Command { + use std::os::unix::process::CommandExt; + let mut c = Command::new("sh"); + c.arg("-c").arg(cmd); + // SAFETY: `setsid` is async-signal-safe and only mutates this + // process's session/pgid — exactly the call documented as + // permissible inside `pre_exec`. We do not allocate, lock, or + // touch shared state here. + unsafe { + c.pre_exec(|| { + if libc::setsid() == -1 { + return Err(std::io::Error::last_os_error()); + } + Ok(()) + }); + } + c +} + +#[cfg(windows)] +fn shell_command(cmd: &str) -> Command { + let mut c = Command::new("cmd"); + c.arg("/C").arg(cmd); + c +} + +/// Kill the child and any grandchildren it spawned. +/// +/// On Unix we send `SIGKILL` to the negated PID, which addresses the +/// process group (the child became its own group leader via `setsid` +/// in [`shell_command`]). This reaches every descendant — closing +/// inherited pipes promptly so the reader threads can drain. On +/// Windows we fall back to [`std::process::Child::kill`], which the +/// platform implements as `TerminateProcess` on the immediate child +/// only; the trade-off is acceptable here because the same Linux dash +/// vs. macOS bash divergence does not arise on Windows shells. +#[cfg(unix)] +fn kill_process_tree(child: &std::process::Child) { + // SAFETY: `kill(2)` is async-signal-safe and stateless from our + // perspective; the negative PID addresses the process group, and + // an invalid PID just returns ESRCH which we ignore. + unsafe { + let pid = child.id() as libc::pid_t; + libc::kill(-pid, libc::SIGKILL); + } +} + +#[cfg(not(unix))] +fn kill_process_tree(child: &mut std::process::Child) { + let _ = child.kill(); +} + +/// Brief, allocation-free string form of [`std::process::ExitStatus`] +/// for the user-visible error message. `Display` for `ExitStatus` +/// prints "exit status: 1" / "signal: 9" already; just delegate. +fn exit_status_brief(status: &std::process::ExitStatus) -> String { + status.to_string() +} + +#[derive(Deserialize)] +struct FindingsEnvelope { + findings: Vec, +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::deep::candidate::{Candidate, CandidateKind}; + use crate::deep::config::DeepMode; + use crate::deep::prompt::{PromptInputs, render}; + use crate::types::Language; + use std::path::PathBuf; + + fn synth_runtime(cmd: &str, timeout_secs: u64) -> DeepRuntime { + DeepRuntime { + mode: DeepMode::Subprocess, + base_url: String::new(), + model: String::new(), + api_key: None, + max_cost_usd: None, + cost_per_1k_input: None, + cost_per_1k_output: None, + request_timeout_secs: 120, + max_candidates: 50, + max_concurrent: 1, + temperature: 0.0, + max_prompt_chars: 16_000, + excludes: Vec::new(), + language_filter: Vec::new(), + agent_cmd: Some(cmd.into()), + agent_timeout_secs: timeout_secs, + } + } + + fn synth_prompt() -> RenderedPrompt { + let cand = Candidate { + kind: CandidateKind::ColdRegion, + file: PathBuf::from("a.ts"), + language: Language::TypeScript, + line_start: 1, + line_end: 5, + source_snippet: "function isAdmin() { return true; }".into(), + imports: Vec::new(), + original_finding_id: None, + seed_category: None, + }; + render(&PromptInputs { + candidate: &cand, + structural_finding: None, + }) + } + + #[test] + fn new_rejects_empty_agent_cmd() { + let mut rt = synth_runtime("nonempty", 60); + rt.agent_cmd = Some("".into()); + let err = SubprocessClient::new(&rt).unwrap_err(); + assert!(matches!(err, DeepError::Config(_))); + } + + #[test] + fn new_rejects_whitespace_agent_cmd() { + let mut rt = synth_runtime("nonempty", 60); + rt.agent_cmd = Some(" ".into()); + let err = SubprocessClient::new(&rt).unwrap_err(); + assert!(matches!(err, DeepError::Config(_))); + } + + #[test] + fn new_rejects_runtime_without_agent_cmd() { + // Defense-in-depth — `build` validates this, but a hand-rolled + // runtime should still surface a clear error. + let mut rt = synth_runtime("ignored", 60); + rt.agent_cmd = None; + let err = SubprocessClient::new(&rt).unwrap_err(); + assert!( + matches!(err, DeepError::Config(ref msg) if msg.contains("invariant")), + "expected Config(), got: {err:?}", + ); + } + + #[test] + fn build_envelope_contains_system_user_and_schema() { + let prompt = synth_prompt(); + let env = build_envelope(&prompt); + let v: serde_json::Value = serde_json::from_str(&env).unwrap(); + assert!(v.get("system").is_some(), "envelope missing 'system'"); + assert!(v.get("user").is_some(), "envelope missing 'user'"); + assert!(v.get("schema").is_some(), "envelope missing 'schema'"); + // Schema round-trips structurally — must equal what render() produced, + // because subprocess wrappers may forward it verbatim to a real LLM. + assert_eq!(v.get("schema").unwrap(), &prompt.schema); + } + + // -- Live-process tests are gated to Unix to avoid maintaining a + // -- Rust fixture binary for Windows. The integration test crate + // -- under tests/deep_subprocess_integration.rs covers the + // -- end-to-end flow with shell-script fixtures. + + #[cfg(unix)] + #[test] + fn happy_path_with_cat_returning_canned_json() { + // sh -c 'cat < DeepRuntime { DeepRuntime { + mode: zift::deep::config::DeepMode::Http, base_url: server_url.to_string(), model: "test-model".into(), api_key: Some("test-key".into()), @@ -30,6 +31,8 @@ fn runtime_for(server_url: &str) -> DeepRuntime { max_prompt_chars: 16_000, excludes: Vec::new(), language_filter: Vec::new(), + agent_cmd: None, + agent_timeout_secs: 600, } } diff --git a/tests/deep_subprocess_integration.rs b/tests/deep_subprocess_integration.rs new file mode 100644 index 0000000..213feca --- /dev/null +++ b/tests/deep_subprocess_integration.rs @@ -0,0 +1,277 @@ +//! End-to-end integration tests for the subprocess transport. +//! +//! These exercise [`zift::deep::run`] (not just the client) against +//! shell-script fixtures, to catch regressions that pure unit tests +//! would miss — e.g. the orchestrator's per-candidate skip path, the +//! merge of agent-emitted findings into the structural set, and the +//! mode-aware dispatch in [`zift::deep::run`]. +//! +//! Gated to Unix because the fixtures are POSIX shell snippets. The +//! Rust-binary-fixture alternative would be portable but materially +//! heavier; subprocess support is documented Unix-first for v0.1.4. +//! +//! See `plans/todo/03-pr3-subprocess-hook.md` §6 for the test plan. + +#![cfg(unix)] + +use std::fs; +use std::path::PathBuf; + +use serde_json::json; +use tempfile::tempdir; + +use zift::deep::config::{DeepMode, DeepRuntime}; +use zift::types::{AuthCategory, Confidence, Finding, Language, ScanPass}; + +/// Build a minimal subprocess-mode runtime pointing at `cmd`. +fn subprocess_runtime(cmd: &str, timeout_secs: u64) -> DeepRuntime { + DeepRuntime { + mode: DeepMode::Subprocess, + base_url: String::new(), + model: String::new(), + api_key: None, + max_cost_usd: None, + cost_per_1k_input: None, + cost_per_1k_output: None, + request_timeout_secs: 120, + max_candidates: 5, + max_concurrent: 1, + temperature: 0.0, + max_prompt_chars: 16_000, + excludes: Vec::new(), + language_filter: Vec::new(), + agent_cmd: Some(cmd.into()), + agent_timeout_secs: timeout_secs, + } +} + +fn structural_finding(file: &str, line: usize) -> Finding { + Finding { + id: format!("structural-{file}-{line}"), + file: PathBuf::from(file), + line_start: line, + line_end: line + 2, + code_snippet: String::new(), + language: Language::TypeScript, + category: AuthCategory::Custom, + confidence: Confidence::Low, + description: "matched custom rule".into(), + pattern_rule: Some("ts-custom".into()), + rego_stub: None, + pass: ScanPass::Structural, + } +} + +#[test] +fn happy_path_subprocess_emits_semantic_finding() { + let dir = tempdir().unwrap(); + fs::write( + dir.path().join("auth.ts"), + "// imports\nfunction isAdmin(u) {\n return u.role === 'admin';\n}\n", + ) + .unwrap(); + + // Canned response matching deep-mode schema. printf swallows stdin + // (which we still write to it) and emits the canned JSON. + let canned = json!({ + "findings": [{ + "line_start": 2, + "line_end": 4, + "category": "rbac", + "confidence": "high", + "description": "isAdmin role check", + "reasoning": "function name + role comparison", + "is_false_positive": false + }] + }) + .to_string(); + // Use a heredoc-style approach via printf to avoid shell-quoting hell. + // The inner single quotes are escaped via '"'"' for sh -c safety. + let cmd = format!( + "cat >/dev/null && printf '%s' '{}'", + canned.replace('\'', "'\"'\"'") + ); + + let runtime = subprocess_runtime(&cmd, 30); + let merged = zift::deep::run(Vec::new(), dir.path(), &runtime).unwrap(); + + let semantic: Vec<&Finding> = merged + .iter() + .filter(|f| f.pass == ScanPass::Semantic) + .collect(); + assert_eq!( + semantic.len(), + 1, + "expected one semantic finding, got {merged:?}" + ); + assert_eq!(semantic[0].category, AuthCategory::Rbac); + assert_eq!(semantic[0].confidence, Confidence::High); +} + +#[test] +fn nonzero_exit_skips_candidate_keeps_structural() { + // A failing agent_cmd must NOT hard-fail the entire deep run. The + // structural set should ride through unchanged — best-effort + // enrichment, not all-or-nothing. + let dir = tempdir().unwrap(); + fs::write( + dir.path().join("auth.ts"), + "function isAdmin(u) { return u.role === 'admin'; }\n", + ) + .unwrap(); + + let runtime = subprocess_runtime("cat >/dev/null && exit 7", 5); + let structural = vec![structural_finding("auth.ts", 1)]; + let merged = zift::deep::run(structural.clone(), dir.path(), &runtime).unwrap(); + + // Structural finding survived; no semantic added. + assert_eq!(merged.len(), 1); + assert_eq!(merged[0].pass, ScanPass::Structural); +} + +#[test] +fn malformed_stdout_skips_candidate() { + let dir = tempdir().unwrap(); + fs::write( + dir.path().join("auth.ts"), + "function isAdmin(u) { return u.role === 'admin'; }\n", + ) + .unwrap(); + + let runtime = subprocess_runtime("cat >/dev/null && printf 'not json'", 5); + let merged = zift::deep::run(Vec::new(), dir.path(), &runtime).unwrap(); + + // No candidates produced findings; merged is empty (no structural + // input either). + assert!(merged.is_empty(), "expected empty merge, got {merged:?}"); +} + +#[test] +fn timeout_skips_candidate_and_returns_promptly() { + // Subprocess sleeps 30s, timeout is 1s — must skip + return, + // not hang the run. + let dir = tempdir().unwrap(); + fs::write( + dir.path().join("auth.ts"), + "function isAdmin(u) { return u.role === 'admin'; }\n", + ) + .unwrap(); + + let runtime = subprocess_runtime("sleep 30", 1); + let start = std::time::Instant::now(); + let merged = zift::deep::run(Vec::new(), dir.path(), &runtime).unwrap(); + let elapsed = start.elapsed(); + + assert!(merged.is_empty()); + assert!( + elapsed < std::time::Duration::from_secs(10), + "deep::run hung past the per-candidate timeout: {elapsed:?}", + ); +} + +#[test] +fn agent_receives_envelope_with_system_user_schema() { + // Round-trip the envelope through a temp file so we can parse and + // assert on its actual contents — `cat > file` captures the raw + // stdin without needing to modify the runtime API. After the run + // we read the file back, parse as JSON, and assert on the concrete + // fields rather than just key presence. This rules out the + // "everything-grepped-empty-merge passed silently" failure mode + // that the companion test below also guards. + let dir = tempdir().unwrap(); + fs::write( + dir.path().join("auth.ts"), + "function isAdmin(u) { return u.role === 'admin'; }\n", + ) + .unwrap(); + + // Capture stdin to a tempfile, then emit canned findings JSON. + // Using `dir.path()` for the capture file keeps the test + // hermetic — the tempdir is cleaned up on drop. + let envelope_path = dir.path().join("captured-envelope.json"); + let canned = r#"{"findings":[]}"#; + let cmd = format!( + "cat > '{}' && printf '%s' '{}'", + envelope_path.display(), + canned, + ); + + let runtime = subprocess_runtime(&cmd, 10); + let merged = zift::deep::run(Vec::new(), dir.path(), &runtime).unwrap(); + + // No structural input + empty canned findings → empty merge. This + // alone could pass via the orchestrator's skip path; the + // assertions below pin the envelope shape, and + // `agent_envelope_failure_path_is_observable` pins the failure + // path. + assert!(merged.is_empty()); + + // Parse the captured envelope and assert on actual contents, not + // just key presence. This catches schema regressions (e.g., + // accidentally renaming a field) that key-presence-via-grep would + // miss. + let raw = fs::read_to_string(&envelope_path) + .expect("agent_cmd should have written the envelope to the temp file"); + let envelope: serde_json::Value = + serde_json::from_str(&raw).expect("envelope must be valid JSON"); + + // `system` and `user` are non-empty strings (rendered prompt + // template). The renderer never produces empty values for these + // — if it did, the deep-mode response wouldn't be useful. + let system = envelope + .get("system") + .and_then(|v| v.as_str()) + .expect("envelope.system must be a string"); + let user = envelope + .get("user") + .and_then(|v| v.as_str()) + .expect("envelope.user must be a string"); + assert!(!system.is_empty(), "envelope.system must be non-empty"); + assert!(!user.is_empty(), "envelope.user must be non-empty"); + // The user prompt should reference the candidate file path so the + // agent has enough context to produce findings keyed to a location. + assert!( + user.contains("auth.ts"), + "envelope.user must reference the candidate file (got: {user:?})", + ); + + // `schema` is the JSON Schema for the response — must be an + // object, not a string or null. Subprocess wrappers may forward + // this verbatim to a real LLM as `response_format.json_schema`. + let schema = envelope + .get("schema") + .expect("envelope.schema must be present"); + assert!( + schema.is_object(), + "envelope.schema must be a JSON object (got: {schema:?})", + ); +} + +#[test] +fn agent_envelope_failure_path_is_observable() { + // Companion to the test above: if the envelope shape changes and + // the assertion above starts passing only via the orchestrator's + // skip path, we'd never notice. Pin the failure path: a grep that + // rejects the envelope makes the agent fail nonzero → empty + // merge — but if we provide a structural finding, it should + // survive because subprocess is best-effort. + let dir = tempdir().unwrap(); + fs::write( + dir.path().join("auth.ts"), + "function isAdmin(u) { return u.role === 'admin'; }\n", + ) + .unwrap(); + + // Reject an envelope key that should never be present (acts as a + // guarded "this must fail" path). + let cmd = + "in=$(cat) && echo \"$in\" | grep -q '\"definitely_not_present\"' && printf 'unreached'"; + let runtime = subprocess_runtime(cmd, 5); + + let structural = vec![structural_finding("auth.ts", 1)]; + let merged = zift::deep::run(structural, dir.path(), &runtime).unwrap(); + + // Structural finding rides through; no semantic merged in. + assert_eq!(merged.len(), 1); + assert_eq!(merged[0].pass, ScanPass::Structural); +}