diff --git a/docs/cli-reference.md b/docs/cli-reference.md index 41704fc..c04101b 100644 --- a/docs/cli-reference.md +++ b/docs/cli-reference.md @@ -516,7 +516,7 @@ fuseraft validate [options] 12. Agent names referenced in termination strategies exist in the agents list 13. If `Telemetry` is set: `OtlpEndpoint` is a valid absolute URI 14. With `--strict`: every plugin name in any agent's `Plugins` list is registered -15. For every `ApiKeyEnvVar` referenced: the environment variable is set in the current shell (warning if missing) +15. For every `ApiKeyEnvVar` referenced: the environment variable is set in the current shell (warning if missing). Note: agents that rely on the OS keychain rather than an env var skip this check — keychain auth is verified only when `--check-connectivity` is used. **Exit codes** @@ -634,7 +634,7 @@ fuseraft init [output] [options] |------|---------|-------------| | `-t, --template ` | interactive | Team template to use. See templates below. | | `-m, --model ` | auto-detected | Model ID to use for all agents. Auto-detected from your API keys if omitted. | -| `-e, --endpoint ` | `~/.fuseraft/config` | Provider API endpoint URL. Defaults to the endpoint saved in `~/.fuseraft/config` if present. | +| `-e, --endpoint ` | `~/.fuseraft/config` | Provider API endpoint URL. Defaults to the endpoint saved in `~/.fuseraft/config` if present. At run time, agents without an explicit `Endpoint` also inherit this value automatically. | | `--no-interactive` | off | Skip all prompts and generate with the supplied options and defaults. | **Templates** @@ -760,6 +760,15 @@ fuseraft context add ~/specs/ --name specs --description "Product specifications fuseraft context add ~/docs/runbook.md --dir ~/projects/my-app ``` +**Binary document extraction:** When the source is a `.pdf`, `.docx`, `.pptx`, or `.xlsx` file, fuseraft automatically extracts the plain text and stores it as a `.txt` file. Agents read the extracted text via `read_file` — no `Document` plugin required. A note is printed on import: + +``` +✓ architecture — 1 file(s), 48.2 KB + Extracted from architecture.pdf: PDF — 24 page(s) → architecture.txt +``` + +If extraction fails (encrypted file, corrupt format), the binary is stored with a warning and will not be readable by agents via `read_file`. + After importing, agents see an entry like this at the top of their system prompt: ``` diff --git a/docs/configuration.md b/docs/configuration.md index 9756860..5ab38a0 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -148,6 +148,7 @@ Per-plugin tool filter. Keys are plugin names; values are arrays of capability t | `Git` | `read` (git_status, git_diff, git_log, git_show, git_branch_list, git_stash_list) · `write` (git_add, git_commit, git_checkout, git_create_branch, git_init, git_push, git_pull, git_stash, git_stash_pop, git_reset) | | `Http` | `get` · `head` · `post` · `put` · `patch` · `delete` — one per HTTP verb | | `Json` | `read` · `write` (json_merge) | +| `Document` | `read` (document_extract_text, document_get_info, document_list_sheets, document_get_sheet) | | `Search` | `read` | | `Changes` | `read` | | `Scratchpad` | `read` · `write` | diff --git a/docs/context-management.md b/docs/context-management.md new file mode 100644 index 0000000..fce007f --- /dev/null +++ b/docs/context-management.md @@ -0,0 +1,361 @@ +# Context Management + +Context is the most important resource in a long-running agent session. Every token an agent +sees costs money and time; everything it misses is a potential hallucination or regression. +fuseraft manages context through four layers that fire at different points in a session's +lifetime: + +``` +Session start + └─ Layer 1: Context Store → files imported before the session + └─ Layer 2: Persistent Memory → facts recalled from prior sessions + +Each agent turn + └─ Layer 3: ContextWindow → per-agent history filter (every turn) + +History too long + └─ Layer 4: Compaction → replace old turns with a summary +``` + +Each layer is optional and independently configured. Most sessions need only one or two. + +--- + +## Layer 1: Context Store + +The context store pre-loads static reference files into `.fuseraft/context/` before a session +starts. Every agent sees a compact index block at the top of its system prompt listing what is +available, and can access the full content with `read_file`. + +```yaml +# No config required — populated by CLI before running: +# fuseraft context add ~/docs/schema.sql --name db-schema +# fuseraft context add ~/specs/ --name specs +# fuseraft context add ~/docs/design.pdf --name design # text extracted automatically +``` + +**When to use:** Database schemas, API specs, architecture docs, slide decks, spreadsheets, +task briefs — anything too large to paste into the task argument but that agents should know +exists from turn one. + +**Binary documents:** When you import a `.pdf`, `.docx`, `.pptx`, or `.xlsx` file, fuseraft +extracts the plain text at import time and stores a `.txt` file instead. Agents access it +via `read_file` with no extra plugin. For documents found *during* a session — or when you +need individual Excel sheets — use the [`Document` plugin](plugins.md#document) directly. + +See [Context Store](context-store.md) for the full CLI reference. + +--- + +## Layer 2: Persistent Memory + +When `EnableMemory: true` is set on an agent, fuseraft loads that agent's persistent memory +store at session start and prepends a structured block to its instructions. Memories survive +between sessions — they accumulate over time, giving agents a working knowledge of the project. + +```yaml +Agents: + - Name: Developer + EnableMemory: true + Instructions: | + You are a Go developer. Write idiomatic, tested code. +``` + +At session start, the agent sees: + +``` +MEMORY — facts recalled from prior sessions: +[preference] preferred-test-runner: Use `go test -race ./...` for all test runs. +[fact] auth-middleware: The auth middleware was rewritten in v2.3 — do not touch the legacy layer. +``` + +**Storage locations:** + +| Context | Path | +|---|---| +| REPL sessions | `~/.fuseraft/memory/repl/` | +| Orchestration agents | `~/.fuseraft/memory/agents/{AgentName}/` | + +**Memory scoping:** In a project directory that has `.fuseraft/`, only memories saved in that +directory are loaded. Directories without `.fuseraft/` fall back to all global memories. + +**REPL:** Memory is always active in the REPL — no config flag needed. Memories are extracted +automatically at the end of each session and scoped to the working directory via +`.fuseraft/memory_refs.json`. Use `/memory` commands to inspect or delete them. + +**Memory cap:** The prompt block is capped at 8,000 characters. Entries are ordered by type +then name; entries that would exceed the cap are dropped (header only is kept for visibility). + +See [Configuration — Memory](configuration.md#memory) for the full field reference. + +--- + +## Layer 3: ContextWindow (per-agent history filter) + +By default every agent receives the full accumulated conversation history, including tool-call +frames and tool-result messages from all prior turns. In a long multi-agent session this can +reach hundreds of thousands of tokens — most of it irrelevant to late-stage agents. + +`ContextWindow` lets each agent declare a lighter view. The shared history is never mutated; +only the slice passed to that agent's turn is affected. + +### Filters and their order + +Filters are applied in this order every turn: + +1. **TextOnly / ExcludeAgents** — strip tool noise or specific agents' output +2. **MaxTurnAge** — keep only messages from the last N agent turns (semantic cut) +3. **MaxTailMessages** — hard cap: keep only the last N messages (raw count) + +```yaml +Agents: + - Name: Reviewer + ContextWindow: + TextOnly: true # strip all tool-call frames and tool results + ExcludeAgents: # also strip all output from these agents + - Tester + MaxTurnAge: 5 # only keep messages from the last 5 assistant turns + MaxTailMessages: 40 # hard cap after the above filters + ContextCapFraction: 0.8 # emit context_cap_warning when at 80% of MaxTailMessages +``` + +### TextOnly + +Strips all tool-call frames (assistant messages containing only a function-call request) and +all tool-result messages from the history slice. Text-bearing assistant messages and all user +messages are kept. + +**This is the primary lever for context reduction.** A Reviewer that independently re-reads +files and re-runs commands gains nothing from seeing the hundreds of tool results produced by +the Developer — stripping them can reduce input tokens by 90%+ in typical sessions. + +When `ExcludeAgents` is set, tool-result messages are stripped automatically even when +`TextOnly` is false. Tool results are not attributed to a specific agent; leaving them without +their corresponding call frames produces a malformed context with orphaned result IDs. + +### ExcludeAgents + +Names of agents whose messages should be excluded entirely — both text-bearing replies and +tool-call frames. + +### MaxTurnAge + +Keeps only messages from the last N *agent turns*, where each turn ends with an assistant +reply. Unlike `MaxTailMessages` (a raw message count), `MaxTurnAge` is semantic: it counts +backward from the end of history and discards everything before the cut-point. + +Use this to discard early-session context from phases or agents no longer relevant to the +current work — without needing to know the exact message count. + +### MaxTailMessages + +Hard cap applied after the other filters. When the filtered list still exceeds this count, +the oldest messages are dropped. Set `ContextCapFraction` to receive a `context_cap_warning` +event as an early signal before the hard cap is reached. + +### Replay truncation + +Agents sometimes produce verbose stream-of-consciousness output (3–5k tokens). When that text +is replayed verbatim in every subsequent turn, compaction summaries grow each cycle and input +tokens balloon. fuseraft automatically truncates verbose non-summary assistant messages to +2,000 characters when replaying them into the next turn's history. Compaction summaries are +never truncated. + +--- + +## Layer 4: Compaction + +When conversation history grows long enough to approach a model's context window, compaction +fires. It replaces the oldest turns with a single context message that agents treat as +background, then resumes from the retained tail. + +### Trigger + +```yaml +Compaction: + TriggerTurnCount: 50 # fire when assistant-turn count reaches this + KeepRecentTurns: 10 # keep this many turns verbatim; compact the rest +``` + +Compaction fires in two situations: +- Before a session stream starts, when resuming a checkpoint already over the threshold. +- Mid-session, after each checkpoint save, once the live history crosses the threshold. + +`TriggerTurnCount` must be greater than `KeepRecentTurns`. + +### Modes + +| Mode | How context is reconstructed | LLM call? | Requirements | +|---|---|---|---| +| `llm` | LLM summarizes the compacted turns | Yes | A model | +| `intent` | Deterministic `✓`/`✗`/`⧖` per tool call from `intents.json` | No | `ChangeTracking` | +| `lossless` | Evidence graph + contract status + state machine position | No | `statemachine` strategy + `EvidenceStore` | +| `hybrid` | Lossless reconstruction prepended before the LLM summary | Yes | `statemachine` strategy + `EvidenceStore` | +| `window` | Oldest user+assistant pairs dropped until within `TokenBudget` | No | — | + +**`intent` is the recommended mode** for most sessions when `ChangeTracking` is configured. +It requires no state machine and produces a deterministic record of every tool call: + +``` +[INTENT-DERIVED RECONSTRUCTION — covers turns 1–20] + +OPERATIONS (chronological): + ✓ write_file → "src/api/users.go" (turn 3, Developer) + ✗ patch_file → "src/api/auth.go" — oldText not found… (turn 4, Developer) + ✓ shell_run → "go test ./..." (turn 5, Tester) + +RESUMPTION NOTE: History compacted from intent log — deterministic ground truth. +Do not re-execute operations marked ✓ (applied). +Operations marked ✗ (failed) should be retried if the task requires them. +``` + +**`lossless` is the recommended mode** for `statemachine` sessions with an `EvidenceStore`. +Instead of summarizing the conversation, it reads disk state directly — state machine position, +contract pass/fail, evidence items — and injects it as ground truth. No hallucination is +possible because no LLM generates the summary. + +**`window` mode** trades context continuity for simplicity. No summary is injected; the oldest +turns are silently dropped. Useful for exploratory sessions where older context genuinely +doesn't matter, or when you want no compaction LLM cost at all. + +### Pinned summaries + +Prior compaction summaries (`IsCompactionSummary`) are pinned and never dropped by `window` +mode. This preserves the head of the conversation — each compaction cycle adds a new summary +at the front while the window trims from behind it. + +### Compaction model + +By default, `llm` and `hybrid` modes use the first agent's model to generate the summary. +Override with `Compaction.Model` to use a cheaper model for compaction: + +```yaml +Compaction: + TriggerTurnCount: 50 + KeepRecentTurns: 10 + Mode: hybrid + Model: + ModelId: gpt-4o-mini +``` + +### Enriching summaries + +Two optional flags add structured context blocks before the LLM summary text. Both are +prefixed in this order when both are enabled: symbol graph first, then reasoning excerpts. + +**`IncludeReasoning`** — prepends a `[REASONING EXCERPTS]` block containing the model's +thinking for each compacted turn (truncated to ~500 tokens per turn). Useful when the *why* +behind prior decisions matters as much as the *what*. Requires `Events` to be configured +(reasoning excerpts are read from the session events log). + +**`IncludeSymbolGraph`** — prepends a `[SYMBOL DEPENDENCY GRAPH]` block listing every +`SymbolDefinition` and `SymbolReference` node in the evidence store for files written during +the session. Gives agents an explicit map of what symbols were in scope during the compacted +turns. Requires `EvidenceStore` and `ChangeTracking` to be configured. + +```yaml +Compaction: + TriggerTurnCount: 40 + KeepRecentTurns: 8 + Mode: hybrid + IncludeReasoning: true + IncludeSymbolGraph: true +``` + +### Change log grounding + +When `ChangeTracking` or `Validation.ChangeLogPath` is configured, `llm` and `hybrid` +compactors read `changes.json` at compaction time and inject it into the summary prompt as +authoritative ground truth. Agent success claims are overridden by what `changes.json` actually +records — exit codes and file writes are facts; assistant self-reports are not. + +### Cost accounting + +The summary message's cumulative cost includes all the turns it replaced. Budget tracking +remains exact across compaction boundaries. `intent`, `lossless`, and `window` modes incur +no LLM cost at compaction time. + +--- + +## How the layers fit together + +Here is the full sequence from session start through a long-running session: + +``` +1. fuseraft run + ├─ Context Store index → injected into every agent's system prompt + └─ Persistent Memory → prepended to each agent's instructions (if EnableMemory: true) + +2. Each agent turn + └─ ContextWindow filter applied to conversation history + ├─ TextOnly / ExcludeAgents strip tool noise + ├─ MaxTurnAge semantic cut + └─ MaxTailMessages hard cap + └─ Filtered slice + replay-truncated content → sent to LLM + +3. After each checkpoint save + └─ Compaction check + ├─ (llm/intent/lossless/hybrid) assistant-turn count ≥ TriggerTurnCount? + │ YES → compact oldest (Count − KeepRecentTurns) turns into one message + │ save checkpoint with compacted history → continue + └─ (window) estimated token count > TokenBudget? + YES → drop oldest user+assistant pairs until within budget + (pinned summaries are never dropped) +``` + +--- + +## Choosing a strategy + +**For most sessions with `ChangeTracking`:** use `intent` mode. + +```yaml +ChangeTracking: + Path: .fuseraft/changes.json + IntentLogPath: .fuseraft/state/intents.json + +Compaction: + TriggerTurnCount: 40 + KeepRecentTurns: 8 + Mode: intent +``` + +**For `statemachine` sessions with `EvidenceStore`:** use `lossless` or `hybrid`. + +```yaml +Compaction: + TriggerTurnCount: 50 + KeepRecentTurns: 10 + Mode: lossless # or "hybrid" to add an LLM narrative on top +``` + +**For exploratory / throw-away sessions:** use `window` to avoid any compaction cost. + +```yaml +Compaction: + Mode: window + TokenBudget: 60000 +``` + +**For a downstream agent (Reviewer, Tester) that needs less history:** use `ContextWindow`. + +```yaml +Agents: + - Name: Reviewer + ContextWindow: + TextOnly: true + MaxTurnAge: 3 +``` + +**For an agent that should know nothing about earlier phases:** combine `ExcludeAgents` with +`MaxTailMessages` so it only sees the final handoff. + +```yaml +Agents: + - Name: Auditor + ContextWindow: + ExcludeAgents: + - Developer + - Tester + MaxTailMessages: 20 +``` diff --git a/docs/context.md b/docs/context-store.md similarity index 78% rename from docs/context.md rename to docs/context-store.md index 5fd433d..13fa3f5 100644 --- a/docs/context.md +++ b/docs/context-store.md @@ -92,13 +92,36 @@ fuseraft context list --dir ~/projects/my-app fuseraft context remove runbook --dir ~/projects/my-app ``` +## Document extraction + +When you import a PDF, Word document, PowerPoint presentation, or Excel spreadsheet, +fuseraft automatically extracts the plain text at import time and stores a `.txt` version +in the context directory. Agents can then access the extracted text via `read_file` — +no special plugin required. + +``` +fuseraft context add ~/docs/architecture.pdf +# ✓ architecture — 1 file(s), 48.2 KB +# Extracted from architecture.pdf: PDF — 24 page(s) → architecture.txt +``` + +**Supported formats:** `.pdf`, `.docx`, `.pptx`, `.xlsx` + +If text extraction fails (encrypted document, corrupted file), the original binary is stored +instead and a warning is printed. Binary files cannot be read by agents via `read_file`. + +For working with documents found *during* a session, or reading individual Excel sheets, +use the [`Document` plugin](plugins.md#document) directly. + ## What to import The context store works well for: - **Database schemas** — schema SQL, ERDs, or migration history - **API specifications** — OpenAPI/Swagger YAML, Postman collections -- **Architecture documents** — design docs, ADRs, system diagrams +- **Architecture documents** — design docs, ADRs, system diagrams (PDF, DOCX) +- **Slide decks** — PPTX presentations extracted to slide-by-slide text +- **Spreadsheets** — XLSX workbooks with multiple sheets, each extracted as a table - **Reference data** — seed data, sample payloads, fixture files - **Task briefs** — detailed specs too long to paste into the task argument diff --git a/docs/index.md b/docs/index.md index 820815c..ff23104 100644 --- a/docs/index.md +++ b/docs/index.md @@ -31,7 +31,8 @@ This is a personal project and a work in progress, suited for experimentation an | [Security & Sandbox](security.md) | File and network containment | | [Governance](governance.md) | Execution rings, audit log, circuit breaker, SLO tracking | | [Sessions](sessions.md) | Resumption, HITL, cost tracking, compaction | -| [Context Store](context.md) | Importing reference material for agents | +| [Context Management](context-management.md) | How fuseraft manages context across a long session | +| [Context Store](context-store.md) | Importing reference material for agents | | [Examples](examples.md) | Ready-to-use config examples | ## VS Code Extension diff --git a/docs/models.md b/docs/models.md index 9efb27b..6b720fb 100644 --- a/docs/models.md +++ b/docs/models.md @@ -62,8 +62,8 @@ Any field left empty falls back to auto-detection. |-------|------|---------|-------------| | `ModelId` | string | — | Model identifier sent to the API. | | `Provider` | string | auto | Connector type: `openai`, `azure`, `google`, `mistral`, `ollama`. Auto-detected from `ModelId` if omitted. | -| `Endpoint` | string | auto | API base URL. Auto-detected from provider if omitted. Required for `azure`. | -| `ApiKeyEnvVar` | string | auto | Name of the environment variable holding the API key. Auto-detected from provider if omitted. Leave empty for `ollama`. | +| `Endpoint` | string | auto | API base URL. Auto-detected from provider if omitted. Required for `azure`. Falls back to `endpoint` in `~/.fuseraft/config` when blank. | +| `ApiKeyEnvVar` | string | auto | Name of the environment variable holding the API key. Auto-detected from provider if omitted. Leave empty for `ollama`. Falls back to `apiKeyEnvVar` in `~/.fuseraft/config` when blank. | | `MaxTokens` | int | `0` | Max tokens per response. `0` = use model default. | | `Temperature` | number | — | Sampling temperature (0.0–2.0). Omit for reasoning models that reject this parameter. | @@ -90,6 +90,34 @@ For any model not matching the table, specify `Provider`, `Endpoint`, and `ApiKe --- +## Global config defaults + +`~/.fuseraft/config` can define a default `endpoint` and `apiKeyEnvVar` that are applied to every agent model (and named alias) that doesn't set those fields itself. This means you only need to configure the provider once — generated agent files work out of the box without repeating the values. + +```json +{ + "modelId": "anthropic.claude-sonnet-4-5-20250929-v1:0", + "endpoint": "http://localhost:3000/api/openai/v1", + "apiKeyEnvVar": "OPENWEBUI_API_KEY" +} +``` + +Set this file via `fuseraft repl` (the setup wizard writes it automatically) or edit it directly. + +### OS keychain fallback + +If an agent model has neither `ApiKey` nor `ApiKeyEnvVar` set after global defaults are applied, fuseraft retrieves the key stored in the OS keychain (set via `fuseraft key set` or the REPL wizard) and injects it as a literal `ApiKey`. This means the full auth resolution order for any agent model is: + +1. Explicit `ApiKey` in the agent file (literal value) +2. `ApiKeyEnvVar` from the agent file (env var lookup) +3. `apiKeyEnvVar` from `~/.fuseraft/config` (env var lookup) +4. OS keychain (retrieved once at startup, injected as literal key) +5. Nothing — Ollama and other unauthenticated providers work without a key + +Per-agent values always win; global values only fill in empty fields. + +--- + ## Supported providers ### openai — OpenAI and OpenAI-compatible APIs diff --git a/docs/plugins.md b/docs/plugins.md index d2918d9..6d45ab8 100644 --- a/docs/plugins.md +++ b/docs/plugins.md @@ -387,6 +387,23 @@ Agents: --- +## Document + +Read rich document formats as plain text. All operations are read-only. Sandbox rules apply when `FileSystemSandboxPath` is configured. + +| Function | Parameters | Description | +|----------|-----------|-------------| +| `document_extract_text` | `path` | Extract full plain text from a PDF, DOCX, PPTX, or XLSX file. Returns a format/size header followed by the extracted text. | +| `document_get_info` | `path` | Return format metadata (page/sheet count, file size, extracted character count) without returning the full text. Cheaper than `extract_text` for planning. | +| `document_list_sheets` | `path` | List sheet names in an Excel file (`.xlsx` only). | +| `document_get_sheet` | `path`, `sheetName`, `maxRows` (default 0 = all) | Extract one sheet from an Excel file as a pipe-delimited text table. | + +**Supported formats:** `.pdf`, `.docx`, `.pptx`, `.xlsx` + +**Context store integration:** When you run `fuseraft context add` on a supported document, the text is automatically extracted and stored as a `.txt` file at import time. Agents can then access it via `read_file` without needing the `Document` plugin. Use `Document` when you need on-demand extraction inside a session (e.g. processing documents found during a task, or working with individual Excel sheets). + +--- + ## MCP plugins In addition to the built-in plugins above, tools from any connected MCP server are available as plugins. The plugin name is the `Name` field from `McpServers` config. diff --git a/docs/validators.md b/docs/validators.md index 7cd7b4c..30623e1 100644 --- a/docs/validators.md +++ b/docs/validators.md @@ -164,17 +164,17 @@ The validator checks THIS TURN ONLY — prior-turn runs do not carry forward. ## RequireWriteFile -**Used on:** `HANDOFF TO TESTER` (or any route where you require the agent to have written a file this turn) +**Used on:** Any route where the agent must have written at least one file this turn (e.g. `HANDOFF TO TESTER`, `RECON COMPLETE`, `HANDOFF TO REVIEWER`) -**What it checks:** Walks backward through the conversation history looking for completed `write_file` tool calls (`Role=Tool` messages with a `FunctionResultContent` whose function name contains `write_file`). Stops at the most recent user-role message. +**What it checks:** Walks backward through the conversation history looking for completed `write_file` or `patch_file` tool calls (`Role=Tool` messages with a `FunctionResultContent` whose function name contains either string). Stops at the most recent user-role message. -**Passes if:** At least one `write_file` call completed in the current agent turn. +**Passes if:** At least one `write_file` or `patch_file` call completed in the current agent turn. -**Fails if:** No `write_file` call is found — meaning the agent described a file write in text but never actually called the tool. +**Fails if:** Neither tool was called — meaning the agent described a file write in text but never actually called the tool. Text, code blocks, and responses are not saved to disk. ### ShellFallbackPattern -Some fixes require only a shell command (e.g. a dependency update) and produce no `write_file` call. Set `ShellFallbackPattern` on the route to allow a successful matching `shell_run` to satisfy the validator in place of `write_file`: +Some fixes require only a shell command (e.g. a dependency update) and produce no file-write call. Set `ShellFallbackPattern` on the route to allow a successful matching `shell_run` to satisfy the validator instead: ```yaml - Keyword: "HANDOFF TO TESTER" @@ -185,21 +185,17 @@ Some fixes require only a shell command (e.g. a dependency update) and produce n The pattern is a pipe-separated list of substrings (case-insensitive). The validator passes if the turn contains a successful `shell_run` whose command matches any alternative. A failed shell command (exit code non-zero, `[ERROR]`, `[TIMEOUT]`, `[DENIED]`) is never accepted regardless of the pattern. -When `ShellFallbackPattern` is omitted the validator behaves as before — only `write_file` satisfies it. +When `ShellFallbackPattern` is omitted the validator behaves as before — only `write_file`, `patch_file`, or `git_commit` satisfy it. **Error injected on failure:** ``` -HANDOFF TO TESTER blocked: no evidence of real work this turn -(no write_file, no git_commit, no shell fallback matched). +Handoff blocked: no evidence of real work this turn +(no write_file, no patch_file, no git_commit, no shell fallback matched). -Required before handing off: - 1. write_file for every changed file. - 2. shell_run ./build.sh — fix until it passes. - 3. git_add + git_commit. - 4. Retry handoff. - -All tools available: write_file, shell_run, read_file. Code blocks are NOT saved to disk. +You must write at least one file before handing off. Use write_file for new files +or patch_file for surgical edits to existing files. Code blocks in your response +are NOT saved to disk — you must call the tool. ``` --- diff --git a/mkdocs.yml b/mkdocs.yml index 94f31c9..cf4e5af 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -45,6 +45,8 @@ nav: - Security: security.md - Governance: governance.md - Sessions: sessions.md + - Context Management: context-management.md + - Context Store: context-store.md - Examples: examples.md - Design: design.md diff --git a/src/Cli/Commands/ContextCommand.cs b/src/Cli/Commands/ContextCommand.cs index 4fcccd7..87104aa 100644 --- a/src/Cli/Commands/ContextCommand.cs +++ b/src/Cli/Commands/ContextCommand.cs @@ -74,6 +74,10 @@ protected override async Task ExecuteAsync(CommandContext context, ContextA foreach (var f in item.Files.OrderBy(f => f.RelativePath)) AnsiConsole.MarkupLine($" [dim]{Markup.Escape(f.RelativePath)}[/]"); + if (item.ExtractionInfo is not null) + foreach (var note in item.ExtractionInfo.Split('\n', StringSplitOptions.RemoveEmptyEntries)) + AnsiConsole.MarkupLine($" [dim]{Markup.Escape(note)}[/]"); + AnsiConsole.MarkupLine( $"\n[dim]Agents will see this item listed in their system prompt " + $"and can read it via read_file from " + diff --git a/src/Cli/OrchestratorBuilder.cs b/src/Cli/OrchestratorBuilder.cs index 5208888..eded40e 100644 --- a/src/Cli/OrchestratorBuilder.cs +++ b/src/Cli/OrchestratorBuilder.cs @@ -13,6 +13,7 @@ using fuseraft.Core.Interfaces; using fuseraft.Core.Models; using fuseraft.Infrastructure; +using fuseraft.Infrastructure.KeyStore; using fuseraft.Infrastructure.Plugins; using fuseraft.Orchestration; using fuseraft.Orchestration.Saga; @@ -65,6 +66,14 @@ public static class OrchestratorBuilder // Expand ${ENV_VAR} tokens in security and API profile config before use. config = ExpandEnvVars(config); + // Fill in Endpoint and ApiKeyEnvVar from ~/.fuseraft/config for any agent + // model that doesn't declare them explicitly. + config = ApplyGlobalDefaults(config); + + // For models still missing both ApiKey and ApiKeyEnvVar, inject the key + // stored in the OS keychain so users don't have to set an env var at all. + config = await ApplyKeychainKeyAsync(config, cancellationToken); + // Apply per-config security constraints and API profiles to the security-sensitive plugins. var profiles = config.ApiProfiles.Count > 0 ? (IReadOnlyDictionary)config.ApiProfiles @@ -780,6 +789,78 @@ public static OrchestrationConfig LoadConfig(string configPath) return reader.ReadToEnd().Trim(); } + // Fills in ModelId, Endpoint, and ApiKeyEnvVar from ~/.fuseraft/config on any model + // config that doesn't set them explicitly. This lets the global config act as a + // default provider so agent files work without repeating connection details. + // Per-agent explicit values always win; only empty fields are filled. + private static OrchestrationConfig ApplyGlobalDefaults(OrchestrationConfig config) + { + var (globalCfg, _) = UserConfigStore.Load(); + var globalModelId = globalCfg is not null && !string.IsNullOrWhiteSpace(globalCfg.ModelId) ? globalCfg.ModelId : null; + var globalEndpoint = globalCfg is not null && !string.IsNullOrWhiteSpace(globalCfg.Endpoint) ? globalCfg.Endpoint : null; + var globalApiKeyEnvVar = globalCfg is not null && !string.IsNullOrWhiteSpace(globalCfg.ApiKeyEnvVar) ? globalCfg.ApiKeyEnvVar : null; + + if (globalModelId is null && globalEndpoint is null && globalApiKeyEnvVar is null) return config; + + ModelConfig Fill(ModelConfig m) => m with + { + ModelId = string.IsNullOrWhiteSpace(m.ModelId) && globalModelId is not null ? globalModelId : m.ModelId, + Endpoint = string.IsNullOrWhiteSpace(m.Endpoint) && globalEndpoint is not null ? globalEndpoint : m.Endpoint, + ApiKeyEnvVar = string.IsNullOrWhiteSpace(m.ApiKeyEnvVar) && globalApiKeyEnvVar is not null ? globalApiKeyEnvVar : m.ApiKeyEnvVar, + }; + + var agents = config.Agents.Select(a => a with { Model = Fill(a.Model) }).ToList(); + + var models = config.Models.ToDictionary(kv => kv.Key, kv => Fill(kv.Value)); + + var sel = config.Selection with + { + Model = config.Selection.Model is not null ? Fill(config.Selection.Model) : null, + Magentic = config.Selection.Magentic is not null + ? config.Selection.Magentic with { Model = config.Selection.Magentic.Model is not null ? Fill(config.Selection.Magentic.Model) : null } + : null, + }; + + return config with { Agents = agents, Models = models, Selection = sel }; + } + + // Injects the OS keychain key as a literal ApiKey on every model config that has + // neither ApiKey nor ApiKeyEnvVar set. The keychain is read at most once per call. + // Models that already have either field set are left untouched. + private static async Task ApplyKeychainKeyAsync( + OrchestrationConfig config, + CancellationToken cancellationToken = default) + { + // Quick check: any model actually needs a key? + bool NeedsKey(ModelConfig m) => + string.IsNullOrWhiteSpace(m.ApiKey) && string.IsNullOrWhiteSpace(m.ApiKeyEnvVar); + + bool anyAgentNeedsKey = config.Agents.Any(a => NeedsKey(a.Model)) + || config.Models.Values.Any(NeedsKey) + || (config.Selection.Model is not null && NeedsKey(config.Selection.Model)) + || (config.Selection.Magentic?.Model is not null && NeedsKey(config.Selection.Magentic.Model)); + + if (!anyAgentNeedsKey) return config; + + var keychainKey = await ApiKeyStoreFactory.Create().RetrieveAsync(); + if (string.IsNullOrWhiteSpace(keychainKey)) return config; + + ModelConfig Fill(ModelConfig m) => + NeedsKey(m) ? m with { ApiKey = keychainKey } : m; + + var agents = config.Agents.Select(a => a with { Model = Fill(a.Model) }).ToList(); + var models = config.Models.ToDictionary(kv => kv.Key, kv => Fill(kv.Value)); + var sel = config.Selection with + { + Model = config.Selection.Model is not null ? Fill(config.Selection.Model) : null, + Magentic = config.Selection.Magentic is not null + ? config.Selection.Magentic with { Model = config.Selection.Magentic.Model is not null ? Fill(config.Selection.Magentic.Model) : null } + : null, + }; + + return config with { Agents = agents, Models = models, Selection = sel }; + } + private static ModelConfig ResolveAlias( ModelConfig model, IReadOnlyDictionary registry) diff --git a/src/Cli/SessionRunner.cs b/src/Cli/SessionRunner.cs index 1a54ad2..e7ce8d8 100644 --- a/src/Cli/SessionRunner.cs +++ b/src/Cli/SessionRunner.cs @@ -190,7 +190,6 @@ await eventEmitter.EmitAsync("session_error", { succeeded = false; errorMessage = ex.Message; - AnsiConsole.WriteException(ex, ExceptionFormats.ShortenPaths); break; } diff --git a/src/Core/Models/UserConfig.cs b/src/Core/Models/UserConfig.cs index 896cabb..8b4761f 100644 --- a/src/Core/Models/UserConfig.cs +++ b/src/Core/Models/UserConfig.cs @@ -13,6 +13,9 @@ public sealed class UserConfig [JsonPropertyName("provider")] public string Provider { get; set; } = string.Empty; + [JsonPropertyName("apiKeyEnvVar")] + public string ApiKeyEnvVar { get; set; } = string.Empty; + // Never written to disk — populated at runtime from the OS keychain. [JsonIgnore] public string ApiKey { get; set; } = string.Empty; diff --git a/src/FuseraftCli.csproj b/src/FuseraftCli.csproj index cdc7538..34c090c 100644 --- a/src/FuseraftCli.csproj +++ b/src/FuseraftCli.csproj @@ -14,6 +14,7 @@ + @@ -32,6 +33,7 @@ + diff --git a/src/Infrastructure/ChatClientFactory.cs b/src/Infrastructure/ChatClientFactory.cs index 2574c1e..b6fe2ff 100644 --- a/src/Infrastructure/ChatClientFactory.cs +++ b/src/Infrastructure/ChatClientFactory.cs @@ -111,11 +111,12 @@ public ModelConfig Resolve(ModelConfig config) && (!string.IsNullOrEmpty(config.ApiKeyEnvVar) || !string.IsNullOrEmpty(config.ApiKey))) return config; - // 2b. Explicit endpoint + literal key (e.g. REPL wizard, custom/enterprise provider). + // 2b. Explicit endpoint + any form of auth (literal key or env-var reference). // Skip auto-detection and treat as OpenAI-compatible — the user supplied all necessary // connection info and auto-detection would only misidentify unusual model ID formats // (e.g. AWS Bedrock "anthropic.claude-...:0" being wrongly treated as an Ollama tag). - if (!string.IsNullOrEmpty(config.Endpoint) && !string.IsNullOrEmpty(config.ApiKey)) + if (!string.IsNullOrEmpty(config.Endpoint) + && (!string.IsNullOrEmpty(config.ApiKey) || !string.IsNullOrEmpty(config.ApiKeyEnvVar))) return config with { Provider = string.IsNullOrEmpty(config.Provider) ? "openai" : config.Provider }; // Ollama tag format: "modelname:tag" where the tag contains at least one letter @@ -131,8 +132,15 @@ public ModelConfig Resolve(ModelConfig config) if (detected is null) { - // If the caller provided at least Provider, we can proceed; otherwise fail fast - // with a helpful message rather than a cryptic missing-env-var error later. + // A custom Endpoint is an unambiguous signal that the caller knows which + // provider to use — treat as OpenAI-compatible and skip the prefix check. + // This covers non-standard model IDs (e.g. AWS Bedrock "anthropic.claude-...:0", + // Open WebUI deployments) where the endpoint is set via global config or inline. + if (!string.IsNullOrEmpty(config.Endpoint)) + return config with { Provider = string.IsNullOrEmpty(config.Provider) ? "openai" : config.Provider }; + + // No endpoint and no detectable prefix — fail fast with a helpful message + // rather than a cryptic missing-env-var error later. if (string.IsNullOrEmpty(config.Provider)) throw new InvalidOperationException( $"Cannot determine the LLM provider for model '{config.ModelId}'. " + @@ -178,6 +186,10 @@ public IChatClient Create(ModelConfig config) if (string.IsNullOrEmpty(config.Endpoint)) throw new InvalidOperationException( $"Provider 'azure' requires Endpoint to be set (deployment: '{config.ModelId}')."); + if (string.IsNullOrEmpty(apiKey)) + throw new InvalidOperationException( + $"No API key available for Azure deployment '{config.ModelId}' at '{config.Endpoint}'. " + + $"Run 'fuseraft repl' and complete the setup wizard, or add \"apiKeyEnvVar\": \"\" to ~/.fuseraft/config."); return new AzureOpenAIClient( new Uri(config.Endpoint), new ApiKeyCredential(apiKey), @@ -197,6 +209,10 @@ public IChatClient Create(ModelConfig config) throw new InvalidOperationException( $"Provider '{provider}' requires Endpoint to be set (model: '{config.ModelId}'). " + $"This should have been filled in by auto-detection — check the model ID prefix."); + if (string.IsNullOrEmpty(apiKey)) + throw new InvalidOperationException( + $"No API key available for model '{config.ModelId}' at '{config.Endpoint}'. " + + $"Run 'fuseraft repl' and complete the setup wizard, or add \"apiKeyEnvVar\": \"\" to ~/.fuseraft/config."); return new OpenAIClient( new ApiKeyCredential(apiKey), new OpenAIClientOptions { Transport = transport, Endpoint = new Uri(config.Endpoint), NetworkTimeout = HttpClientTimeout }) diff --git a/src/Infrastructure/ContextStore.cs b/src/Infrastructure/ContextStore.cs index 0ddbc1d..936c035 100644 --- a/src/Infrastructure/ContextStore.cs +++ b/src/Infrastructure/ContextStore.cs @@ -72,34 +72,42 @@ public async Task AddAsync( Directory.Delete(destDir, recursive: true); Directory.CreateDirectory(destDir); - var files = new List(); + var files = new List(); + var extractionNotes = new List(); if (isFile) { - var fileName = Path.GetFileName(fullSource); - File.Copy(fullSource, Path.Combine(destDir, fileName)); - files.Add(new ContextFileEntry(fileName, new FileInfo(fullSource).Length)); + var (entry, note) = await StoreFileAsync(fullSource, destDir, ct); + files.Add(entry); + if (note is not null) extractionNotes.Add(note); } else { foreach (var src in Directory.EnumerateFiles(fullSource, "*", SearchOption.AllDirectories)) { - var rel = Path.GetRelativePath(fullSource, src); - var dest = Path.Combine(destDir, rel); - Directory.CreateDirectory(Path.GetDirectoryName(dest)!); - File.Copy(src, dest); - files.Add(new ContextFileEntry(rel.Replace('\\', '/'), new FileInfo(src).Length)); + var rel = Path.GetRelativePath(fullSource, src); + var destSub = Path.Combine(destDir, Path.GetDirectoryName(rel) ?? string.Empty); + Directory.CreateDirectory(destSub); + var (entry, note) = await StoreFileAsync(src, destSub, ct); + var storedRel = Path.Combine( + Path.GetDirectoryName(rel) ?? string.Empty, + entry.RelativePath).Replace('\\', '/').TrimStart('/'); + files.Add(new ContextFileEntry(storedRel, entry.SizeBytes)); + if (note is not null) extractionNotes.Add(note); } } var index = await LoadIndexAsync(ct); index.Items[name] = new ContextItem { - Name = name, - Description = description, - SourcePath = fullSource, - ImportedAt = DateTime.UtcNow, - Files = files, + Name = name, + Description = description, + SourcePath = fullSource, + ImportedAt = DateTime.UtcNow, + Files = files, + ExtractionInfo = extractionNotes.Count > 0 + ? string.Join("\n", extractionNotes) + : null, }; await SaveIndexAsync(index, ct); } @@ -199,6 +207,42 @@ private async Task SaveIndexAsync(ContextIndex index, CancellationToken ct) await File.WriteAllTextAsync(indexPath, JsonSerializer.Serialize(index, JsonOpts), ct); } + // If the source file is a supported binary document format, extracts its text and + // stores it as a .txt sibling. Otherwise copies the file verbatim. Returns the stored + // file entry and an optional human-readable extraction note. + private static async Task<(ContextFileEntry Entry, string? Note)> StoreFileAsync( + string sourcePath, string destDir, CancellationToken ct) + { + if (DocumentTextExtractor.IsSupported(sourcePath)) + { + try + { + var (text, info) = DocumentTextExtractor.Extract(sourcePath); + var txtName = Path.GetFileNameWithoutExtension(sourcePath) + ".txt"; + var txtPath = Path.Combine(destDir, txtName); + await File.WriteAllTextAsync(txtPath, text, ct); + var size = new FileInfo(txtPath).Length; + var note = $"Extracted from {Path.GetFileName(sourcePath)}: {info} → {txtName}"; + return (new ContextFileEntry(txtName, size), note); + } + catch (Exception ex) + { + // Extraction failed — copy the binary so the item is still stored, but warn + // that agents will not be able to read it via read_file. + var binName = Path.GetFileName(sourcePath); + var binPath = Path.Combine(destDir, binName); + File.Copy(sourcePath, binPath); + var size = new FileInfo(sourcePath).Length; + var note = $"Warning: extraction failed for {binName} ({ex.Message}) — binary stored, not readable by agents"; + return (new ContextFileEntry(binName, size), note); + } + } + + var fileName = Path.GetFileName(sourcePath); + File.Copy(sourcePath, Path.Combine(destDir, fileName)); + return (new ContextFileEntry(fileName, new FileInfo(sourcePath).Length), null); + } + private static bool IsValidName(string name) => !string.IsNullOrWhiteSpace(name) && name.All(c => char.IsLetterOrDigit(c) || c == '-' || c == '_'); @@ -229,6 +273,13 @@ public sealed class ContextItem [JsonPropertyName("files")] public List Files { get; init; } = []; + + /// + /// Set when one or more source files were binary documents that were converted to + /// plain text at import time. Contains one note per extracted file. + /// + [JsonPropertyName("extractionInfo")] + public string? ExtractionInfo { get; init; } } public sealed record ContextFileEntry( diff --git a/src/Infrastructure/DocumentTextExtractor.cs b/src/Infrastructure/DocumentTextExtractor.cs new file mode 100644 index 0000000..b3436ca --- /dev/null +++ b/src/Infrastructure/DocumentTextExtractor.cs @@ -0,0 +1,207 @@ +using System.Text; +using DocumentFormat.OpenXml.Packaging; +using DocumentFormat.OpenXml.Spreadsheet; +using DocumentFormat.OpenXml.Wordprocessing; +using UglyToad.PdfPig; + +namespace fuseraft.Infrastructure; + +/// +/// Extracts plain text from rich document formats (PDF, DOCX, PPTX, XLSX). +/// Used by at import time and by +/// at agent runtime. +/// +public static class DocumentTextExtractor +{ + public static readonly IReadOnlySet SupportedExtensions = + new HashSet([".pdf", ".docx", ".pptx", ".xlsx"], StringComparer.OrdinalIgnoreCase); + + public static bool IsSupported(string path) => + SupportedExtensions.Contains(Path.GetExtension(path)); + + /// + /// Extracts plain text from . + /// Returns the extracted text and a short info line (e.g. "PDF — 12 page(s)"). + /// Throws for unsupported extensions. + /// + public static (string Text, string Info) Extract(string path) + { + var ext = Path.GetExtension(path).ToLowerInvariant(); + return ext switch + { + ".pdf" => ExtractPdf(path), + ".docx" => ExtractDocx(path), + ".pptx" => ExtractPptx(path), + ".xlsx" => ExtractXlsx(path), + _ => throw new NotSupportedException($"Unsupported document format: {ext}") + }; + } + + /// Returns the sheet names in an Excel file. + public static IReadOnlyList ListSheets(string path) + { + using var doc = SpreadsheetDocument.Open(path, false); + return doc.WorkbookPart?.Workbook?.Sheets?.Elements() + .Select(s => s.Name?.Value ?? string.Empty) + .ToList() ?? []; + } + + /// + /// Extracts a single sheet from an Excel file as pipe-delimited rows. + /// + public static (string Text, int RowCount) ExtractSheet(string path, string sheetName, int maxRows = 0) + { + using var doc = SpreadsheetDocument.Open(path, false); + var workbookPart = doc.WorkbookPart + ?? throw new InvalidOperationException("Workbook has no parts."); + + var sharedStrings = BuildSharedStrings(workbookPart); + + var sheet = workbookPart.Workbook?.Sheets?.Elements() + .FirstOrDefault(s => string.Equals(s.Name?.Value, sheetName, StringComparison.OrdinalIgnoreCase)) + ?? throw new KeyNotFoundException($"Sheet '{sheetName}' not found."); + + if (sheet.Id?.Value is null) + throw new InvalidOperationException($"Sheet '{sheetName}' has no part ID."); + + var wsPart = (WorksheetPart)workbookPart.GetPartById(sheet.Id.Value); + var data = wsPart.Worksheet.GetFirstChild(); + if (data is null) return (string.Empty, 0); + + var sb = new StringBuilder(); + int rowCount = 0; + foreach (var row in data.Elements()) + { + if (maxRows > 0 && rowCount >= maxRows) break; + var cells = row.Elements().Select(c => GetCellValue(c, sharedStrings)); + sb.AppendLine(string.Join(" | ", cells)); + rowCount++; + } + return (sb.ToString().Trim(), rowCount); + } + + // PDF + + private static (string Text, string Info) ExtractPdf(string path) + { + using var pdf = PdfDocument.Open(path); + var pages = pdf.GetPages().ToList(); + var sb = new StringBuilder(); + foreach (var page in pages) + { + var text = page.Text; + if (!string.IsNullOrWhiteSpace(text)) + sb.AppendLine(text); + } + return (sb.ToString().Trim(), $"PDF — {pages.Count} page(s)"); + } + + // DOCX + + private static (string Text, string Info) ExtractDocx(string path) + { + using var doc = WordprocessingDocument.Open(path, false); + var body = doc.MainDocumentPart?.Document?.Body; + if (body is null) return (string.Empty, "DOCX — empty document"); + + var sb = new StringBuilder(); + foreach (var elem in body.ChildElements) + { + if (elem is Paragraph para) + { + var text = para.InnerText; + if (!string.IsNullOrWhiteSpace(text)) + sb.AppendLine(text); + } + else if (elem is DocumentFormat.OpenXml.Wordprocessing.Table table) + { + foreach (var row in table.Elements()) + { + var cells = row.Elements() + .Select(c => c.InnerText.Trim()) + .Where(t => !string.IsNullOrEmpty(t)); + sb.AppendLine(string.Join(" | ", cells)); + } + } + } + + var extracted = sb.ToString().Trim(); + var wordCount = extracted.Split(' ', StringSplitOptions.RemoveEmptyEntries).Length; + return (extracted, $"DOCX — ~{wordCount:N0} word(s)"); + } + + // PPTX + + private static (string Text, string Info) ExtractPptx(string path) + { + using var pres = PresentationDocument.Open(path, false); + var slideParts = pres.PresentationPart?.SlideParts?.ToList() ?? []; + var sb = new StringBuilder(); + int slideNum = 0; + + foreach (var slidePart in slideParts) + { + slideNum++; + sb.AppendLine($"=== Slide {slideNum} ==="); + foreach (var text in slidePart.Slide.Descendants()) + { + if (!string.IsNullOrWhiteSpace(text.Text)) + sb.AppendLine(text.Text); + } + sb.AppendLine(); + } + + return (sb.ToString().Trim(), $"PPTX — {slideParts.Count} slide(s)"); + } + + // XLSX + + private static (string Text, string Info) ExtractXlsx(string path) + { + using var doc = SpreadsheetDocument.Open(path, false); + var workbookPart = doc.WorkbookPart; + if (workbookPart is null) return (string.Empty, "XLSX — empty workbook"); + + var sharedStrings = BuildSharedStrings(workbookPart); + var sheets = workbookPart.Workbook?.Sheets?.Elements().ToList() ?? []; + var sb = new StringBuilder(); + int totalRows = 0; + + foreach (var sheet in sheets) + { + sb.AppendLine($"=== Sheet: {sheet.Name} ==="); + if (sheet.Id?.Value is null) continue; + var wsPart = (WorksheetPart)workbookPart.GetPartById(sheet.Id.Value); + var data = wsPart.Worksheet.GetFirstChild(); + if (data is null) continue; + + foreach (var row in data.Elements()) + { + var cells = row.Elements().Select(c => GetCellValue(c, sharedStrings)); + sb.AppendLine(string.Join(" | ", cells)); + totalRows++; + } + sb.AppendLine(); + } + + return (sb.ToString().Trim(), $"XLSX — {sheets.Count} sheet(s), {totalRows:N0} row(s)"); + } + + // Helpers + + private static List BuildSharedStrings(WorkbookPart workbookPart) => + workbookPart.SharedStringTablePart?.SharedStringTable + ?.Elements() + .Select(s => s.InnerText) + .ToList() ?? []; + + private static string GetCellValue(Cell cell, List sharedStrings) + { + var value = cell.CellValue?.Text ?? string.Empty; + if (cell.DataType?.Value == CellValues.SharedString + && int.TryParse(value, out var idx) + && (uint)idx < (uint)sharedStrings.Count) + return sharedStrings[idx]; + return value; + } +} diff --git a/src/Infrastructure/Plugins/DocumentPlugin.cs b/src/Infrastructure/Plugins/DocumentPlugin.cs new file mode 100644 index 0000000..34dc5a9 --- /dev/null +++ b/src/Infrastructure/Plugins/DocumentPlugin.cs @@ -0,0 +1,149 @@ +using System.ComponentModel; +using fuseraft.Infrastructure; + +namespace fuseraft.Infrastructure.Plugins; + +/// +/// Reads rich document formats (PDF, DOCX, PPTX, XLSX) as plain text. +/// All operations are read-only. Path arguments are sandbox-checked when a +/// sandbox root is configured. +/// +public sealed class DocumentPlugin(string? sandboxRoot = null) +{ + private readonly string? _sandboxRoot = sandboxRoot is not null + ? Path.GetFullPath(ProcessHelper.ExpandHome(sandboxRoot)) + : null; + + [Description("Extract plain text from a document. Supports PDF, DOCX, PPTX, XLSX.")] + public string ExtractText([Description("Path to the document.")] string path) + { + var denial = ResolveSafe(path, out var resolved); + if (denial is not null) return denial; + if (!File.Exists(resolved)) return PluginResult.Error($"File not found: {resolved}"); + if (!DocumentTextExtractor.IsSupported(resolved)) + return PluginResult.Error( + $"Unsupported format '{Path.GetExtension(resolved)}'. " + + $"Supported: {string.Join(", ", DocumentTextExtractor.SupportedExtensions)}"); + + try + { + var (text, info) = DocumentTextExtractor.Extract(resolved); + return string.IsNullOrWhiteSpace(text) + ? PluginResult.Info($"{info} — no text content found.") + : $"[{info}]\n\n{text}"; + } + catch (Exception ex) + { + return PluginResult.Error($"Extraction failed: {ex.Message}"); + } + } + + [Description("Get format and size metadata for a document. Cheaper than extract_text. Supports PDF, DOCX, PPTX, XLSX.")] + public string GetInfo([Description("Path to the document.")] string path) + { + var denial = ResolveSafe(path, out var resolved); + if (denial is not null) return denial; + if (!File.Exists(resolved)) return PluginResult.Error($"File not found: {resolved}"); + if (!DocumentTextExtractor.IsSupported(resolved)) + return PluginResult.Error( + $"Unsupported format '{Path.GetExtension(resolved)}'. " + + $"Supported: {string.Join(", ", DocumentTextExtractor.SupportedExtensions)}"); + + try + { + var fi = new FileInfo(resolved); + var (text, info) = DocumentTextExtractor.Extract(resolved); + var charCount = text.Length; + return $"{info}\nFile size: {FormatSize(fi.Length)}\n" + + $"Extracted text: ~{charCount:N0} characters (~{charCount / 4:N0} tokens)"; + } + catch (Exception ex) + { + return PluginResult.Error($"Could not read document metadata: {ex.Message}"); + } + } + + [Description("List sheet names in an Excel file (.xlsx).")] + public string ListSheets([Description("Path to the .xlsx file.")] string path) + { + var denial = ResolveSafe(path, out var resolved); + if (denial is not null) return denial; + if (!File.Exists(resolved)) return PluginResult.Error($"File not found: {resolved}"); + + var ext = Path.GetExtension(resolved).ToLowerInvariant(); + if (ext != ".xlsx") + return PluginResult.Error($"list_sheets only works on .xlsx files, not '{ext}'."); + + try + { + var sheets = DocumentTextExtractor.ListSheets(resolved); + return sheets.Count == 0 + ? PluginResult.Info("No sheets found.") + : string.Join("\n", sheets.Select((s, i) => $"{i + 1}. {s}")); + } + catch (Exception ex) + { + return PluginResult.Error($"Could not read sheet list: {ex.Message}"); + } + } + + [Description("Read one sheet from an Excel file (.xlsx) as a pipe-delimited text table.")] + public string GetSheet( + [Description("Path to the .xlsx file.")] string path, + [Description("Sheet name.")] string sheetName, + [Description("Maximum rows to return (0 = all).")] int maxRows = 0) + { + var denial = ResolveSafe(path, out var resolved); + if (denial is not null) return denial; + if (!File.Exists(resolved)) return PluginResult.Error($"File not found: {resolved}"); + + var ext = Path.GetExtension(resolved).ToLowerInvariant(); + if (ext != ".xlsx") + return PluginResult.Error($"get_sheet only works on .xlsx files, not '{ext}'."); + + try + { + var (text, rowCount) = DocumentTextExtractor.ExtractSheet(resolved, sheetName, maxRows); + if (string.IsNullOrWhiteSpace(text)) + return PluginResult.Info($"Sheet '{sheetName}' is empty."); + var truncNote = maxRows > 0 && rowCount >= maxRows ? $" — first {maxRows} rows" : string.Empty; + return $"[Sheet: {sheetName} — {rowCount} row(s){truncNote}]\n\n{text}"; + } + catch (KeyNotFoundException ex) + { + return PluginResult.Error(ex.Message); + } + catch (Exception ex) + { + return PluginResult.Error($"Could not read sheet '{sheetName}': {ex.Message}"); + } + } + + private string? ResolveSafe(string path, out string resolved) + { + var expanded = ProcessHelper.ExpandHome(path); + resolved = _sandboxRoot is not null && !Path.IsPathRooted(expanded) + ? Path.GetFullPath(expanded, _sandboxRoot) + : Path.GetFullPath(expanded); + + if (_sandboxRoot is null) return null; + + var sandboxPrefix = _sandboxRoot.TrimEnd(Path.DirectorySeparatorChar) + Path.DirectorySeparatorChar; + var resolvedCheck = resolved.TrimEnd(Path.DirectorySeparatorChar) + Path.DirectorySeparatorChar; + var comparison = OperatingSystem.IsWindows() + ? StringComparison.OrdinalIgnoreCase + : StringComparison.Ordinal; + + return resolvedCheck.StartsWith(sandboxPrefix, comparison) + ? null + : PluginResult.Denied($"Path '{resolved}' is outside the configured sandbox '{_sandboxRoot}'."); + } + + private static string FormatSize(long bytes) => bytes switch + { + < 1_024 => $"{bytes} B", + < 1_048_576 => $"{bytes / 1_024.0:F1} KB", + < 1_073_741_824 => $"{bytes / 1_048_576.0:F1} MB", + _ => $"{bytes / 1_073_741_824.0:F1} GB", + }; +} diff --git a/src/Infrastructure/Plugins/PluginCapabilityMap.cs b/src/Infrastructure/Plugins/PluginCapabilityMap.cs index 08497ab..65c62de 100644 --- a/src/Infrastructure/Plugins/PluginCapabilityMap.cs +++ b/src/Infrastructure/Plugins/PluginCapabilityMap.cs @@ -23,6 +23,7 @@ namespace fuseraft.Infrastructure.Plugins; /// Gitread (status, diff, log, show, branch_list, stash_list) · write (add, commit, checkout, create_branch, init, push, pull, stash, stash_pop, reset) /// Httpget · post · put · patch · delete — one per HTTP verb /// Jsonread (format, minify, get, keys, search, to_text, validate) · write (merge) +/// Documentread (extract_text, get_info, list_sheets, get_sheet — all read-only) /// Searchread (all search operations are read-only) /// Planread (plan_get, plan_get_summary) · write (plan_create, plan_update_step, plan_add_step) /// Changesread (read, read_latest) @@ -102,6 +103,12 @@ internal static class PluginCapabilityMap ["json_validate"] = "read", ["json_merge"] = "write", + // Document (all read-only) + ["document_extract_text"] = "read", + ["document_get_info"] = "read", + ["document_list_sheets"] = "read", + ["document_get_sheet"] = "read", + // Search (all read-only) ["search_files"] = "read", ["search_content"] = "read", diff --git a/src/Infrastructure/Plugins/PluginRegistry.cs b/src/Infrastructure/Plugins/PluginRegistry.cs index 7948cb2..0eb1430 100644 --- a/src/Infrastructure/Plugins/PluginRegistry.cs +++ b/src/Infrastructure/Plugins/PluginRegistry.cs @@ -68,6 +68,7 @@ public PluginRegistry RegisterDefaults() Register("Http", () => new HttpPlugin(_sharedHttpClient, logger: _loggerFactory?.CreateLogger())); Register("Json", () => new JsonPlugin()); Register("Search", () => new SearchPlugin()); + Register("Document", () => new DocumentPlugin()); Register("Probe", () => new ProbePlugin()); Register("CodeExecution", () => new CodeExecutionPlugin()); Register("Handoff", () => new HandoffPlugin()); @@ -107,6 +108,7 @@ public PluginRegistry Configure( Register("FileSystem", () => new FileSystemPlugin(sandboxRoot, security.ReadFileSizeLimit, versionStore: fileVersionStore)); Register("Shell", () => new ShellPlugin(sandboxRoot, shellCommandApprover)); Register("Http", () => new HttpPlugin(_sharedHttpClient, allowedHosts, apiProfiles, allowPrivateHosts, _loggerFactory?.CreateLogger())); + Register("Document", () => new DocumentPlugin(sandboxRoot)); return this; } diff --git a/src/Infrastructure/UserConfigStore.cs b/src/Infrastructure/UserConfigStore.cs index 1264067..b7f24c2 100644 --- a/src/Infrastructure/UserConfigStore.cs +++ b/src/Infrastructure/UserConfigStore.cs @@ -30,9 +30,10 @@ public static (UserConfig? Config, string? LegacyKey) Load() var config = new UserConfig { - ModelId = onDisk.ModelId ?? string.Empty, - Endpoint = onDisk.Endpoint ?? string.Empty, - Provider = onDisk.Provider ?? string.Empty, + ModelId = onDisk.ModelId ?? string.Empty, + Endpoint = onDisk.Endpoint ?? string.Empty, + Provider = onDisk.Provider ?? string.Empty, + ApiKeyEnvVar = onDisk.ApiKeyEnvVar ?? string.Empty, }; return (config, onDisk.ApiKey); } @@ -48,9 +49,10 @@ public static void Save(UserConfig config) Directory.CreateDirectory(ConfigDir); var onDisk = new OnDiskConfig { - ModelId = config.ModelId, - Endpoint = config.Endpoint, - Provider = config.Provider, + ModelId = config.ModelId, + Endpoint = config.Endpoint, + Provider = config.Provider, + ApiKeyEnvVar = config.ApiKeyEnvVar, }; File.WriteAllText(ConfigPath, JsonSerializer.Serialize(onDisk, JsonOptions)); } @@ -68,6 +70,9 @@ private sealed class OnDiskConfig [JsonPropertyName("provider")] public string? Provider { get; set; } + [JsonPropertyName("apiKeyEnvVar")] + public string? ApiKeyEnvVar { get; set; } + // Present only in configs created before keychain support was added. [JsonPropertyName("apiKey")] public string? ApiKey { get; set; } diff --git a/src/Orchestration/Validation/HandoffToTesterValidator.cs b/src/Orchestration/Validation/HandoffToTesterValidator.cs index cebf124..a87c386 100644 --- a/src/Orchestration/Validation/HandoffToTesterValidator.cs +++ b/src/Orchestration/Validation/HandoffToTesterValidator.cs @@ -7,10 +7,10 @@ namespace fuseraft.Orchestration.Validation; /// -/// Blocks HANDOFF TO TESTER unless the source agent completed real work during -/// the current turn. "Real work" means either: +/// Blocks a handoff unless the source agent completed real work during the current turn. +/// "Real work" means at least one of: /// -/// At least one write_file tool call completed (the normal path), OR +/// A write_file or patch_file tool call completed (the normal path), OR /// /// When is supplied: a successful /// shell_run whose command matches at least one of the pipe-separated @@ -59,7 +59,8 @@ public async Task ValidateAsync( var funcName = HistoryHelpers.FindFunctionName(history, frc.CallId, i) ?? string.Empty; - if (funcName.Contains("write_file", StringComparison.OrdinalIgnoreCase)) + if (funcName.Contains("write_file", StringComparison.OrdinalIgnoreCase) || + funcName.Contains("patch_file", StringComparison.OrdinalIgnoreCase)) { hasWriteFile = true; break; @@ -112,15 +113,12 @@ public async Task ValidateAsync( { var failDetail = BuildFailDetail(); return RoutingValidationResult.Fail( - "HANDOFF TO TESTER blocked: no evidence of real work this turn\n" + - "(no write_file, no git_commit, no shell fallback matched).\n\n" + - "Required before handing off:\n" + - " 1. write_file for every changed file.\n" + - " 2. shell_run ./build.sh — fix until it passes.\n" + - " 3. git_add + git_commit.\n" + - " 4. Retry handoff.\n\n" + - failDetail + - "All tools available: write_file, shell_run, read_file. Code blocks are NOT saved to disk."); + "Handoff blocked: no evidence of real work this turn\n" + + "(no write_file, no patch_file, no git_commit, no shell fallback matched).\n\n" + + "You must write at least one file before handing off. Use write_file for new files\n" + + "or patch_file for surgical edits to existing files. Code blocks in your response\n" + + "are NOT saved to disk — you must call the tool.\n\n" + + failDetail); } return RoutingValidationResult.Pass();