From 79531ed3641e3cb18af6c248b619edbd7b036d17 Mon Sep 17 00:00:00 2001 From: Scott Stauffer Date: Mon, 11 May 2026 11:56:40 -0500 Subject: [PATCH 01/14] fix: accept patch_file as real-work evidence in RequireWriteFile validator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit patch_file is a legitimate file-writing tool call (surgical edit) but was invisible to HandoffToTesterValidator, causing agents that use patch_file to be permanently blocked at the handoff gate. Also generalized the error message away from "HANDOFF TO TESTER blocked" and Developer-specific build instructions — this validator backs RequireWriteFile which is used on edges beyond Developer→Tester (e.g. Archaeologist's RECON COMPLETE edge), so the old message misdirected agents into irrelevant recovery steps. --- .../Validation/HandoffToTesterValidator.cs | 24 +++++++++---------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/src/Orchestration/Validation/HandoffToTesterValidator.cs b/src/Orchestration/Validation/HandoffToTesterValidator.cs index cebf124..a87c386 100644 --- a/src/Orchestration/Validation/HandoffToTesterValidator.cs +++ b/src/Orchestration/Validation/HandoffToTesterValidator.cs @@ -7,10 +7,10 @@ namespace fuseraft.Orchestration.Validation; /// -/// Blocks HANDOFF TO TESTER unless the source agent completed real work during -/// the current turn. "Real work" means either: +/// Blocks a handoff unless the source agent completed real work during the current turn. +/// "Real work" means at least one of: /// -/// At least one write_file tool call completed (the normal path), OR +/// A write_file or patch_file tool call completed (the normal path), OR /// /// When is supplied: a successful /// shell_run whose command matches at least one of the pipe-separated @@ -59,7 +59,8 @@ public async Task ValidateAsync( var funcName = HistoryHelpers.FindFunctionName(history, frc.CallId, i) ?? string.Empty; - if (funcName.Contains("write_file", StringComparison.OrdinalIgnoreCase)) + if (funcName.Contains("write_file", StringComparison.OrdinalIgnoreCase) || + funcName.Contains("patch_file", StringComparison.OrdinalIgnoreCase)) { hasWriteFile = true; break; @@ -112,15 +113,12 @@ public async Task ValidateAsync( { var failDetail = BuildFailDetail(); return RoutingValidationResult.Fail( - "HANDOFF TO TESTER blocked: no evidence of real work this turn\n" + - "(no write_file, no git_commit, no shell fallback matched).\n\n" + - "Required before handing off:\n" + - " 1. write_file for every changed file.\n" + - " 2. shell_run ./build.sh — fix until it passes.\n" + - " 3. git_add + git_commit.\n" + - " 4. Retry handoff.\n\n" + - failDetail + - "All tools available: write_file, shell_run, read_file. Code blocks are NOT saved to disk."); + "Handoff blocked: no evidence of real work this turn\n" + + "(no write_file, no patch_file, no git_commit, no shell fallback matched).\n\n" + + "You must write at least one file before handing off. Use write_file for new files\n" + + "or patch_file for surgical edits to existing files. Code blocks in your response\n" + + "are NOT saved to disk — you must call the tool.\n\n" + + failDetail); } return RoutingValidationResult.Pass(); From 5c9abe6d92afeae006e8100a1f3580ecf3b187b8 Mon Sep 17 00:00:00 2001 From: Scott Stauffer Date: Mon, 11 May 2026 12:05:24 -0500 Subject: [PATCH 02/14] feat: apply global config endpoint and apiKeyEnvVar as agent defaults Agents generated by templates had no Endpoint or ApiKeyEnvVar, forcing users to manually add them to every agent file even when ~/.fuseraft/config already declared a provider URL. ApplyGlobalDefaults now runs after config binding (in both BuildAsync and LoadConfig) and fills in missing Endpoint and ApiKeyEnvVar on every agent model, named alias, and selection/magentic model. Explicit per-agent values always win; global values only fill empty fields. Also adds ApiKeyEnvVar to UserConfig and UserConfigStore so it can be set once in the global config and inherited everywhere. --- src/Cli/OrchestratorBuilder.cs | 39 ++++++++++++++++++++++++++- src/Core/Models/UserConfig.cs | 3 +++ src/Infrastructure/UserConfigStore.cs | 17 +++++++----- 3 files changed, 52 insertions(+), 7 deletions(-) diff --git a/src/Cli/OrchestratorBuilder.cs b/src/Cli/OrchestratorBuilder.cs index 5208888..b2804b5 100644 --- a/src/Cli/OrchestratorBuilder.cs +++ b/src/Cli/OrchestratorBuilder.cs @@ -65,6 +65,10 @@ public static class OrchestratorBuilder // Expand ${ENV_VAR} tokens in security and API profile config before use. config = ExpandEnvVars(config); + // Fill in Endpoint and ApiKeyEnvVar from ~/.fuseraft/config for any agent + // model that doesn't declare them explicitly. + config = ApplyGlobalDefaults(config); + // Apply per-config security constraints and API profiles to the security-sensitive plugins. var profiles = config.ApiProfiles.Count > 0 ? (IReadOnlyDictionary)config.ApiProfiles @@ -750,7 +754,7 @@ public static OrchestrationConfig LoadConfig(string configPath) .AddJsonFile(Path.GetFullPath(configPath), optional: false) .Build(); - return BindConfig(configPath, configuration); + return ApplyGlobalDefaults(BindConfig(configPath, configuration)); } // Resolves the base system prompt prepended to every agent. @@ -780,6 +784,39 @@ public static OrchestrationConfig LoadConfig(string configPath) return reader.ReadToEnd().Trim(); } + // Fills in Endpoint and ApiKeyEnvVar from ~/.fuseraft/config on any model config + // that doesn't set them explicitly. This lets the global config act as a default + // provider for all agents without requiring every agent file to repeat the values. + // Per-agent explicit values always win; only empty fields are filled. + private static OrchestrationConfig ApplyGlobalDefaults(OrchestrationConfig config) + { + var (globalCfg, _) = UserConfigStore.Load(); + var globalEndpoint = globalCfg is not null && !string.IsNullOrWhiteSpace(globalCfg.Endpoint) ? globalCfg.Endpoint : null; + var globalApiKeyEnvVar = globalCfg is not null && !string.IsNullOrWhiteSpace(globalCfg.ApiKeyEnvVar) ? globalCfg.ApiKeyEnvVar : null; + + if (globalEndpoint is null && globalApiKeyEnvVar is null) return config; + + ModelConfig Fill(ModelConfig m) => m with + { + Endpoint = string.IsNullOrWhiteSpace(m.Endpoint) && globalEndpoint is not null ? globalEndpoint : m.Endpoint, + ApiKeyEnvVar = string.IsNullOrWhiteSpace(m.ApiKeyEnvVar) && globalApiKeyEnvVar is not null ? globalApiKeyEnvVar : m.ApiKeyEnvVar, + }; + + var agents = config.Agents.Select(a => a with { Model = Fill(a.Model) }).ToList(); + + var models = config.Models.ToDictionary(kv => kv.Key, kv => Fill(kv.Value)); + + var sel = config.Selection with + { + Model = config.Selection.Model is not null ? Fill(config.Selection.Model) : null, + Magentic = config.Selection.Magentic is not null + ? config.Selection.Magentic with { Model = config.Selection.Magentic.Model is not null ? Fill(config.Selection.Magentic.Model) : null } + : null, + }; + + return config with { Agents = agents, Models = models, Selection = sel }; + } + private static ModelConfig ResolveAlias( ModelConfig model, IReadOnlyDictionary registry) diff --git a/src/Core/Models/UserConfig.cs b/src/Core/Models/UserConfig.cs index 896cabb..8b4761f 100644 --- a/src/Core/Models/UserConfig.cs +++ b/src/Core/Models/UserConfig.cs @@ -13,6 +13,9 @@ public sealed class UserConfig [JsonPropertyName("provider")] public string Provider { get; set; } = string.Empty; + [JsonPropertyName("apiKeyEnvVar")] + public string ApiKeyEnvVar { get; set; } = string.Empty; + // Never written to disk — populated at runtime from the OS keychain. [JsonIgnore] public string ApiKey { get; set; } = string.Empty; diff --git a/src/Infrastructure/UserConfigStore.cs b/src/Infrastructure/UserConfigStore.cs index 1264067..b7f24c2 100644 --- a/src/Infrastructure/UserConfigStore.cs +++ b/src/Infrastructure/UserConfigStore.cs @@ -30,9 +30,10 @@ public static (UserConfig? Config, string? LegacyKey) Load() var config = new UserConfig { - ModelId = onDisk.ModelId ?? string.Empty, - Endpoint = onDisk.Endpoint ?? string.Empty, - Provider = onDisk.Provider ?? string.Empty, + ModelId = onDisk.ModelId ?? string.Empty, + Endpoint = onDisk.Endpoint ?? string.Empty, + Provider = onDisk.Provider ?? string.Empty, + ApiKeyEnvVar = onDisk.ApiKeyEnvVar ?? string.Empty, }; return (config, onDisk.ApiKey); } @@ -48,9 +49,10 @@ public static void Save(UserConfig config) Directory.CreateDirectory(ConfigDir); var onDisk = new OnDiskConfig { - ModelId = config.ModelId, - Endpoint = config.Endpoint, - Provider = config.Provider, + ModelId = config.ModelId, + Endpoint = config.Endpoint, + Provider = config.Provider, + ApiKeyEnvVar = config.ApiKeyEnvVar, }; File.WriteAllText(ConfigPath, JsonSerializer.Serialize(onDisk, JsonOptions)); } @@ -68,6 +70,9 @@ private sealed class OnDiskConfig [JsonPropertyName("provider")] public string? Provider { get; set; } + [JsonPropertyName("apiKeyEnvVar")] + public string? ApiKeyEnvVar { get; set; } + // Present only in configs created before keychain support was added. [JsonPropertyName("apiKey")] public string? ApiKey { get; set; } From c5b8994861e99fe7e05006f9d7795931ac51559c Mon Sep 17 00:00:00 2001 From: Scott Stauffer Date: Mon, 11 May 2026 12:51:16 -0500 Subject: [PATCH 03/14] feat: inject OS keychain key into agents that have no ApiKey or ApiKeyEnvVar When a user stores their provider key in the OS keychain (via `fuseraft key set`) and no agent file declares ApiKey or ApiKeyEnvVar, the key was silently unused at run time and auth failed. ApplyKeychainKeyAsync now runs after ApplyGlobalDefaults in BuildAsync: it retrieves the keychain key once and injects it as a literal ApiKey on every model config that still has neither field set. Models with either field already populated are untouched. LoadConfig (used for display and validation) is deliberately kept sync and unchanged since the literal key is only needed at runtime, not for config inspection. --- src/Cli/OrchestratorBuilder.cs | 42 ++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/src/Cli/OrchestratorBuilder.cs b/src/Cli/OrchestratorBuilder.cs index b2804b5..7b1adf6 100644 --- a/src/Cli/OrchestratorBuilder.cs +++ b/src/Cli/OrchestratorBuilder.cs @@ -13,6 +13,7 @@ using fuseraft.Core.Interfaces; using fuseraft.Core.Models; using fuseraft.Infrastructure; +using fuseraft.Infrastructure.KeyStore; using fuseraft.Infrastructure.Plugins; using fuseraft.Orchestration; using fuseraft.Orchestration.Saga; @@ -69,6 +70,10 @@ public static class OrchestratorBuilder // model that doesn't declare them explicitly. config = ApplyGlobalDefaults(config); + // For models still missing both ApiKey and ApiKeyEnvVar, inject the key + // stored in the OS keychain so users don't have to set an env var at all. + config = await ApplyKeychainKeyAsync(config, cancellationToken); + // Apply per-config security constraints and API profiles to the security-sensitive plugins. var profiles = config.ApiProfiles.Count > 0 ? (IReadOnlyDictionary)config.ApiProfiles @@ -817,6 +822,43 @@ ModelConfig Fill(ModelConfig m) => m with return config with { Agents = agents, Models = models, Selection = sel }; } + // Injects the OS keychain key as a literal ApiKey on every model config that has + // neither ApiKey nor ApiKeyEnvVar set. The keychain is read at most once per call. + // Models that already have either field set are left untouched. + private static async Task ApplyKeychainKeyAsync( + OrchestrationConfig config, + CancellationToken cancellationToken = default) + { + // Quick check: any model actually needs a key? + bool NeedsKey(ModelConfig m) => + string.IsNullOrWhiteSpace(m.ApiKey) && string.IsNullOrWhiteSpace(m.ApiKeyEnvVar); + + bool anyAgentNeedsKey = config.Agents.Any(a => NeedsKey(a.Model)) + || config.Models.Values.Any(NeedsKey) + || (config.Selection.Model is not null && NeedsKey(config.Selection.Model)) + || (config.Selection.Magentic?.Model is not null && NeedsKey(config.Selection.Magentic.Model)); + + if (!anyAgentNeedsKey) return config; + + var keychainKey = await ApiKeyStoreFactory.Create().RetrieveAsync(); + if (string.IsNullOrWhiteSpace(keychainKey)) return config; + + ModelConfig Fill(ModelConfig m) => + NeedsKey(m) ? m with { ApiKey = keychainKey } : m; + + var agents = config.Agents.Select(a => a with { Model = Fill(a.Model) }).ToList(); + var models = config.Models.ToDictionary(kv => kv.Key, kv => Fill(kv.Value)); + var sel = config.Selection with + { + Model = config.Selection.Model is not null ? Fill(config.Selection.Model) : null, + Magentic = config.Selection.Magentic is not null + ? config.Selection.Magentic with { Model = config.Selection.Magentic.Model is not null ? Fill(config.Selection.Magentic.Model) : null } + : null, + }; + + return config with { Agents = agents, Models = models, Selection = sel }; + } + private static ModelConfig ResolveAlias( ModelConfig model, IReadOnlyDictionary registry) From 9ec2e848f09075316d31fafd1132674136618de0 Mon Sep 17 00:00:00 2001 From: Scott Stauffer Date: Mon, 11 May 2026 13:01:41 -0500 Subject: [PATCH 04/14] docs: update validators, models, and cli-reference for recent changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit validators.md — RequireWriteFile now accepts patch_file alongside write_file; update the "used on" header (no longer specific to HANDOFF TO TESTER), the pass/fail descriptions, the ShellFallbackPattern prose, and the error-message example to match the new generic message. models.md — add "Global config defaults" section documenting that endpoint and apiKeyEnvVar in ~/.fuseraft/config are applied to any agent model that omits them, and documenting the full auth priority chain (explicit ApiKey → ApiKeyEnvVar → global apiKeyEnvVar → OS keychain). Update the ModelConfig table rows for Endpoint and ApiKeyEnvVar to note the global-config fallback. cli-reference.md — note that the --endpoint flag value is also inherited by agents at run time; clarify that keychain-only agents skip the ApiKeyEnvVar env-var check in static validation. --- docs/cli-reference.md | 4 ++-- docs/models.md | 32 ++++++++++++++++++++++++++++++-- docs/validators.md | 26 +++++++++++--------------- 3 files changed, 43 insertions(+), 19 deletions(-) diff --git a/docs/cli-reference.md b/docs/cli-reference.md index 41704fc..2b9914b 100644 --- a/docs/cli-reference.md +++ b/docs/cli-reference.md @@ -516,7 +516,7 @@ fuseraft validate [options] 12. Agent names referenced in termination strategies exist in the agents list 13. If `Telemetry` is set: `OtlpEndpoint` is a valid absolute URI 14. With `--strict`: every plugin name in any agent's `Plugins` list is registered -15. For every `ApiKeyEnvVar` referenced: the environment variable is set in the current shell (warning if missing) +15. For every `ApiKeyEnvVar` referenced: the environment variable is set in the current shell (warning if missing). Note: agents that rely on the OS keychain rather than an env var skip this check — keychain auth is verified only when `--check-connectivity` is used. **Exit codes** @@ -634,7 +634,7 @@ fuseraft init [output] [options] |------|---------|-------------| | `-t, --template ` | interactive | Team template to use. See templates below. | | `-m, --model ` | auto-detected | Model ID to use for all agents. Auto-detected from your API keys if omitted. | -| `-e, --endpoint ` | `~/.fuseraft/config` | Provider API endpoint URL. Defaults to the endpoint saved in `~/.fuseraft/config` if present. | +| `-e, --endpoint ` | `~/.fuseraft/config` | Provider API endpoint URL. Defaults to the endpoint saved in `~/.fuseraft/config` if present. At run time, agents without an explicit `Endpoint` also inherit this value automatically. | | `--no-interactive` | off | Skip all prompts and generate with the supplied options and defaults. | **Templates** diff --git a/docs/models.md b/docs/models.md index 9efb27b..6b720fb 100644 --- a/docs/models.md +++ b/docs/models.md @@ -62,8 +62,8 @@ Any field left empty falls back to auto-detection. |-------|------|---------|-------------| | `ModelId` | string | — | Model identifier sent to the API. | | `Provider` | string | auto | Connector type: `openai`, `azure`, `google`, `mistral`, `ollama`. Auto-detected from `ModelId` if omitted. | -| `Endpoint` | string | auto | API base URL. Auto-detected from provider if omitted. Required for `azure`. | -| `ApiKeyEnvVar` | string | auto | Name of the environment variable holding the API key. Auto-detected from provider if omitted. Leave empty for `ollama`. | +| `Endpoint` | string | auto | API base URL. Auto-detected from provider if omitted. Required for `azure`. Falls back to `endpoint` in `~/.fuseraft/config` when blank. | +| `ApiKeyEnvVar` | string | auto | Name of the environment variable holding the API key. Auto-detected from provider if omitted. Leave empty for `ollama`. Falls back to `apiKeyEnvVar` in `~/.fuseraft/config` when blank. | | `MaxTokens` | int | `0` | Max tokens per response. `0` = use model default. | | `Temperature` | number | — | Sampling temperature (0.0–2.0). Omit for reasoning models that reject this parameter. | @@ -90,6 +90,34 @@ For any model not matching the table, specify `Provider`, `Endpoint`, and `ApiKe --- +## Global config defaults + +`~/.fuseraft/config` can define a default `endpoint` and `apiKeyEnvVar` that are applied to every agent model (and named alias) that doesn't set those fields itself. This means you only need to configure the provider once — generated agent files work out of the box without repeating the values. + +```json +{ + "modelId": "anthropic.claude-sonnet-4-5-20250929-v1:0", + "endpoint": "http://localhost:3000/api/openai/v1", + "apiKeyEnvVar": "OPENWEBUI_API_KEY" +} +``` + +Set this file via `fuseraft repl` (the setup wizard writes it automatically) or edit it directly. + +### OS keychain fallback + +If an agent model has neither `ApiKey` nor `ApiKeyEnvVar` set after global defaults are applied, fuseraft retrieves the key stored in the OS keychain (set via `fuseraft key set` or the REPL wizard) and injects it as a literal `ApiKey`. This means the full auth resolution order for any agent model is: + +1. Explicit `ApiKey` in the agent file (literal value) +2. `ApiKeyEnvVar` from the agent file (env var lookup) +3. `apiKeyEnvVar` from `~/.fuseraft/config` (env var lookup) +4. OS keychain (retrieved once at startup, injected as literal key) +5. Nothing — Ollama and other unauthenticated providers work without a key + +Per-agent values always win; global values only fill in empty fields. + +--- + ## Supported providers ### openai — OpenAI and OpenAI-compatible APIs diff --git a/docs/validators.md b/docs/validators.md index 7cd7b4c..30623e1 100644 --- a/docs/validators.md +++ b/docs/validators.md @@ -164,17 +164,17 @@ The validator checks THIS TURN ONLY — prior-turn runs do not carry forward. ## RequireWriteFile -**Used on:** `HANDOFF TO TESTER` (or any route where you require the agent to have written a file this turn) +**Used on:** Any route where the agent must have written at least one file this turn (e.g. `HANDOFF TO TESTER`, `RECON COMPLETE`, `HANDOFF TO REVIEWER`) -**What it checks:** Walks backward through the conversation history looking for completed `write_file` tool calls (`Role=Tool` messages with a `FunctionResultContent` whose function name contains `write_file`). Stops at the most recent user-role message. +**What it checks:** Walks backward through the conversation history looking for completed `write_file` or `patch_file` tool calls (`Role=Tool` messages with a `FunctionResultContent` whose function name contains either string). Stops at the most recent user-role message. -**Passes if:** At least one `write_file` call completed in the current agent turn. +**Passes if:** At least one `write_file` or `patch_file` call completed in the current agent turn. -**Fails if:** No `write_file` call is found — meaning the agent described a file write in text but never actually called the tool. +**Fails if:** Neither tool was called — meaning the agent described a file write in text but never actually called the tool. Text, code blocks, and responses are not saved to disk. ### ShellFallbackPattern -Some fixes require only a shell command (e.g. a dependency update) and produce no `write_file` call. Set `ShellFallbackPattern` on the route to allow a successful matching `shell_run` to satisfy the validator in place of `write_file`: +Some fixes require only a shell command (e.g. a dependency update) and produce no file-write call. Set `ShellFallbackPattern` on the route to allow a successful matching `shell_run` to satisfy the validator instead: ```yaml - Keyword: "HANDOFF TO TESTER" @@ -185,21 +185,17 @@ Some fixes require only a shell command (e.g. a dependency update) and produce n The pattern is a pipe-separated list of substrings (case-insensitive). The validator passes if the turn contains a successful `shell_run` whose command matches any alternative. A failed shell command (exit code non-zero, `[ERROR]`, `[TIMEOUT]`, `[DENIED]`) is never accepted regardless of the pattern. -When `ShellFallbackPattern` is omitted the validator behaves as before — only `write_file` satisfies it. +When `ShellFallbackPattern` is omitted the validator behaves as before — only `write_file`, `patch_file`, or `git_commit` satisfy it. **Error injected on failure:** ``` -HANDOFF TO TESTER blocked: no evidence of real work this turn -(no write_file, no git_commit, no shell fallback matched). +Handoff blocked: no evidence of real work this turn +(no write_file, no patch_file, no git_commit, no shell fallback matched). -Required before handing off: - 1. write_file for every changed file. - 2. shell_run ./build.sh — fix until it passes. - 3. git_add + git_commit. - 4. Retry handoff. - -All tools available: write_file, shell_run, read_file. Code blocks are NOT saved to disk. +You must write at least one file before handing off. Use write_file for new files +or patch_file for surgical edits to existing files. Code blocks in your response +are NOT saved to disk — you must call the tool. ``` --- From dd1576d62c0a6c97fac63afcccb24d4c959042ca Mon Sep 17 00:00:00 2001 From: Scott Stauffer Date: Mon, 11 May 2026 13:19:12 -0500 Subject: [PATCH 05/14] fix: remove ApplyGlobalDefaults from LoadConfig to keep validation env-independent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LoadConfig is used for static inspection and validation — it should check the config as-written, not as-runtime-resolved. Applying global defaults there made ValidateConfigCommand pass configs that are actually incomplete (e.g. unknown model prefix with no Endpoint), because ~/.fuseraft/config's endpoint silently filled the gap. Global defaults now apply only in BuildAsync where agents are actually invoked. Fixes ModelWithoutPrefix_NoEndpoint_Errors test. --- src/Cli/OrchestratorBuilder.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Cli/OrchestratorBuilder.cs b/src/Cli/OrchestratorBuilder.cs index 7b1adf6..e463a82 100644 --- a/src/Cli/OrchestratorBuilder.cs +++ b/src/Cli/OrchestratorBuilder.cs @@ -759,7 +759,7 @@ public static OrchestrationConfig LoadConfig(string configPath) .AddJsonFile(Path.GetFullPath(configPath), optional: false) .Build(); - return ApplyGlobalDefaults(BindConfig(configPath, configuration)); + return BindConfig(configPath, configuration); } // Resolves the base system prompt prepended to every agent. From 92a0bffc2c2835a3fd0dfef43daf48d3f9a22693 Mon Sep 17 00:00:00 2001 From: Scott Stauffer Date: Mon, 11 May 2026 13:30:54 -0500 Subject: [PATCH 06/14] fix: skip provider auto-detection when Endpoint + ApiKeyEnvVar are both set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The short-circuit that bypasses model-ID prefix detection only fired when both Endpoint and a literal ApiKey were present. When global config supplied Endpoint and ApiKeyEnvVar (env-var reference instead of literal key), the model fell through to DetectFromPrefix, which doesn't recognise AWS Bedrock-style IDs like anthropic.claude-sonnet-4-5-20250929-v1:0, throwing "Cannot determine the LLM provider". Extend the short-circuit to cover any form of auth (ApiKey OR ApiKeyEnvVar) — if the caller supplied their own endpoint and auth, treat as OpenAI-compatible and skip prefix detection entirely. --- src/Infrastructure/ChatClientFactory.cs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Infrastructure/ChatClientFactory.cs b/src/Infrastructure/ChatClientFactory.cs index 2574c1e..343f8b3 100644 --- a/src/Infrastructure/ChatClientFactory.cs +++ b/src/Infrastructure/ChatClientFactory.cs @@ -111,11 +111,12 @@ public ModelConfig Resolve(ModelConfig config) && (!string.IsNullOrEmpty(config.ApiKeyEnvVar) || !string.IsNullOrEmpty(config.ApiKey))) return config; - // 2b. Explicit endpoint + literal key (e.g. REPL wizard, custom/enterprise provider). + // 2b. Explicit endpoint + any form of auth (literal key or env-var reference). // Skip auto-detection and treat as OpenAI-compatible — the user supplied all necessary // connection info and auto-detection would only misidentify unusual model ID formats // (e.g. AWS Bedrock "anthropic.claude-...:0" being wrongly treated as an Ollama tag). - if (!string.IsNullOrEmpty(config.Endpoint) && !string.IsNullOrEmpty(config.ApiKey)) + if (!string.IsNullOrEmpty(config.Endpoint) + && (!string.IsNullOrEmpty(config.ApiKey) || !string.IsNullOrEmpty(config.ApiKeyEnvVar))) return config with { Provider = string.IsNullOrEmpty(config.Provider) ? "openai" : config.Provider }; // Ollama tag format: "modelname:tag" where the tag contains at least one letter From 5100de1668edfef8fb26890426f95aabc196aa44 Mon Sep 17 00:00:00 2001 From: Scott Stauffer Date: Mon, 11 May 2026 13:36:51 -0500 Subject: [PATCH 07/14] fix: treat custom-endpoint models as openai-compatible when prefix detection fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a model ID doesn't match any known prefix (e.g. AWS Bedrock-style IDs like anthropic.claude-sonnet-4-5-20250929-v1:0 used through Open WebUI), and an Endpoint is set (either inline or injected from ~/.fuseraft/config), Resolve was still throwing "Cannot determine the LLM provider". A custom Endpoint is an unambiguous signal that the caller knows which provider to use — fall back to openai-compatible instead of throwing, consistent with how the REPL wizard treats enterprise/custom endpoints. --- src/Infrastructure/ChatClientFactory.cs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/Infrastructure/ChatClientFactory.cs b/src/Infrastructure/ChatClientFactory.cs index 343f8b3..f1fd6de 100644 --- a/src/Infrastructure/ChatClientFactory.cs +++ b/src/Infrastructure/ChatClientFactory.cs @@ -132,8 +132,15 @@ public ModelConfig Resolve(ModelConfig config) if (detected is null) { - // If the caller provided at least Provider, we can proceed; otherwise fail fast - // with a helpful message rather than a cryptic missing-env-var error later. + // A custom Endpoint is an unambiguous signal that the caller knows which + // provider to use — treat as OpenAI-compatible and skip the prefix check. + // This covers non-standard model IDs (e.g. AWS Bedrock "anthropic.claude-...:0", + // Open WebUI deployments) where the endpoint is set via global config or inline. + if (!string.IsNullOrEmpty(config.Endpoint)) + return config with { Provider = string.IsNullOrEmpty(config.Provider) ? "openai" : config.Provider }; + + // No endpoint and no detectable prefix — fail fast with a helpful message + // rather than a cryptic missing-env-var error later. if (string.IsNullOrEmpty(config.Provider)) throw new InvalidOperationException( $"Cannot determine the LLM provider for model '{config.ModelId}'. " + From 910ee890c39c159419f1f33f955ce8c9b7e6b53f Mon Sep 17 00:00:00 2001 From: Scott Stauffer Date: Mon, 11 May 2026 13:42:24 -0500 Subject: [PATCH 08/14] feat: inherit ModelId from ~/.fuseraft/config when agent model leaves it blank MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ApplyGlobalDefaults already propagated Endpoint and ApiKeyEnvVar from the global config; extend it to cover ModelId too. This completes the contract: fuseraft run will not fail due to a missing connection field as long as ~/.fuseraft/config supplies the defaults — agent files only need to override what differs from the global config (e.g. a per-agent MaxTokens or a different model for one role). --- src/Cli/OrchestratorBuilder.cs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/Cli/OrchestratorBuilder.cs b/src/Cli/OrchestratorBuilder.cs index e463a82..eded40e 100644 --- a/src/Cli/OrchestratorBuilder.cs +++ b/src/Cli/OrchestratorBuilder.cs @@ -789,20 +789,22 @@ public static OrchestrationConfig LoadConfig(string configPath) return reader.ReadToEnd().Trim(); } - // Fills in Endpoint and ApiKeyEnvVar from ~/.fuseraft/config on any model config - // that doesn't set them explicitly. This lets the global config act as a default - // provider for all agents without requiring every agent file to repeat the values. + // Fills in ModelId, Endpoint, and ApiKeyEnvVar from ~/.fuseraft/config on any model + // config that doesn't set them explicitly. This lets the global config act as a + // default provider so agent files work without repeating connection details. // Per-agent explicit values always win; only empty fields are filled. private static OrchestrationConfig ApplyGlobalDefaults(OrchestrationConfig config) { var (globalCfg, _) = UserConfigStore.Load(); - var globalEndpoint = globalCfg is not null && !string.IsNullOrWhiteSpace(globalCfg.Endpoint) ? globalCfg.Endpoint : null; + var globalModelId = globalCfg is not null && !string.IsNullOrWhiteSpace(globalCfg.ModelId) ? globalCfg.ModelId : null; + var globalEndpoint = globalCfg is not null && !string.IsNullOrWhiteSpace(globalCfg.Endpoint) ? globalCfg.Endpoint : null; var globalApiKeyEnvVar = globalCfg is not null && !string.IsNullOrWhiteSpace(globalCfg.ApiKeyEnvVar) ? globalCfg.ApiKeyEnvVar : null; - if (globalEndpoint is null && globalApiKeyEnvVar is null) return config; + if (globalModelId is null && globalEndpoint is null && globalApiKeyEnvVar is null) return config; ModelConfig Fill(ModelConfig m) => m with { + ModelId = string.IsNullOrWhiteSpace(m.ModelId) && globalModelId is not null ? globalModelId : m.ModelId, Endpoint = string.IsNullOrWhiteSpace(m.Endpoint) && globalEndpoint is not null ? globalEndpoint : m.Endpoint, ApiKeyEnvVar = string.IsNullOrWhiteSpace(m.ApiKeyEnvVar) && globalApiKeyEnvVar is not null ? globalApiKeyEnvVar : m.ApiKeyEnvVar, }; From 2d05e6508e0ca9b84a0368f6397039e3dccabfbb Mon Sep 17 00:00:00 2001 From: Scott Stauffer Date: Mon, 11 May 2026 13:53:35 -0500 Subject: [PATCH 09/14] fix: throw actionable error when API key is empty instead of crashing in SDK ApiKeyCredential throws "Value cannot be an empty string" when given an empty key, producing a cryptic stack trace from deep in System.ClientModel. Guard both the openai-compatible and azure branches with an explicit check before the constructor call, surfacing a message that names the model, the endpoint, and the two ways to fix it (fuseraft key set or apiKeyEnvVar in global config). --- src/Infrastructure/ChatClientFactory.cs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/Infrastructure/ChatClientFactory.cs b/src/Infrastructure/ChatClientFactory.cs index f1fd6de..edbb82c 100644 --- a/src/Infrastructure/ChatClientFactory.cs +++ b/src/Infrastructure/ChatClientFactory.cs @@ -186,6 +186,10 @@ public IChatClient Create(ModelConfig config) if (string.IsNullOrEmpty(config.Endpoint)) throw new InvalidOperationException( $"Provider 'azure' requires Endpoint to be set (deployment: '{config.ModelId}')."); + if (string.IsNullOrEmpty(apiKey)) + throw new InvalidOperationException( + $"No API key available for Azure deployment '{config.ModelId}' at '{config.Endpoint}'. " + + $"Store one with 'fuseraft key set', or add \"apiKeyEnvVar\": \"\" to ~/.fuseraft/config."); return new AzureOpenAIClient( new Uri(config.Endpoint), new ApiKeyCredential(apiKey), @@ -205,6 +209,10 @@ public IChatClient Create(ModelConfig config) throw new InvalidOperationException( $"Provider '{provider}' requires Endpoint to be set (model: '{config.ModelId}'). " + $"This should have been filled in by auto-detection — check the model ID prefix."); + if (string.IsNullOrEmpty(apiKey)) + throw new InvalidOperationException( + $"No API key available for model '{config.ModelId}' at '{config.Endpoint}'. " + + $"Store one with 'fuseraft key set', or add \"apiKeyEnvVar\": \"\" to ~/.fuseraft/config."); return new OpenAIClient( new ApiKeyCredential(apiKey), new OpenAIClientOptions { Transport = transport, Endpoint = new Uri(config.Endpoint), NetworkTimeout = HttpClientTimeout }) From 34446f0ac253bbe3cef4dbba5ea3c4a2987bd06a Mon Sep 17 00:00:00 2001 From: Scott Stauffer Date: Mon, 11 May 2026 13:54:01 -0500 Subject: [PATCH 10/14] =?UTF-8?q?fix:=20correct=20API=20key=20error=20mess?= =?UTF-8?q?age=20=E2=80=94=20fuseraft=20repl,=20not=20nonexistent=20'fuser?= =?UTF-8?q?aft=20key=20set'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/Infrastructure/ChatClientFactory.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Infrastructure/ChatClientFactory.cs b/src/Infrastructure/ChatClientFactory.cs index edbb82c..b6fe2ff 100644 --- a/src/Infrastructure/ChatClientFactory.cs +++ b/src/Infrastructure/ChatClientFactory.cs @@ -189,7 +189,7 @@ public IChatClient Create(ModelConfig config) if (string.IsNullOrEmpty(apiKey)) throw new InvalidOperationException( $"No API key available for Azure deployment '{config.ModelId}' at '{config.Endpoint}'. " + - $"Store one with 'fuseraft key set', or add \"apiKeyEnvVar\": \"\" to ~/.fuseraft/config."); + $"Run 'fuseraft repl' and complete the setup wizard, or add \"apiKeyEnvVar\": \"\" to ~/.fuseraft/config."); return new AzureOpenAIClient( new Uri(config.Endpoint), new ApiKeyCredential(apiKey), @@ -212,7 +212,7 @@ public IChatClient Create(ModelConfig config) if (string.IsNullOrEmpty(apiKey)) throw new InvalidOperationException( $"No API key available for model '{config.ModelId}' at '{config.Endpoint}'. " + - $"Store one with 'fuseraft key set', or add \"apiKeyEnvVar\": \"\" to ~/.fuseraft/config."); + $"Run 'fuseraft repl' and complete the setup wizard, or add \"apiKeyEnvVar\": \"\" to ~/.fuseraft/config."); return new OpenAIClient( new ApiKeyCredential(apiKey), new OpenAIClientOptions { Transport = transport, Endpoint = new Uri(config.Endpoint), NetworkTimeout = HttpClientTimeout }) From 72cd1514441512c890fd6533b01ab4e5cbdbc880 Mon Sep 17 00:00:00 2001 From: Scott Stauffer Date: Mon, 11 May 2026 13:58:58 -0500 Subject: [PATCH 11/14] =?UTF-8?q?fix:=20remove=20duplicate=20error=20displ?= =?UTF-8?q?ay=20=E2=80=94=20drop=20WriteException=20from=20generic=20catch?= =?UTF-8?q?=20in=20SessionRunner?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The generic catch block called AnsiConsole.WriteException (printing the full stack trace) and also set errorMessage, which RenderSummary then printed again as '✗ Error: ...'. The message already surfaces clearly through RenderSummary; the stack trace added noise without value for user-facing operational errors. --- src/Cli/SessionRunner.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Cli/SessionRunner.cs b/src/Cli/SessionRunner.cs index 1a54ad2..e7ce8d8 100644 --- a/src/Cli/SessionRunner.cs +++ b/src/Cli/SessionRunner.cs @@ -190,7 +190,6 @@ await eventEmitter.EmitAsync("session_error", { succeeded = false; errorMessage = ex.Message; - AnsiConsole.WriteException(ex, ExceptionFormats.ShortenPaths); break; } From 280296b1958041e6875ec04689d612ab13d42120 Mon Sep 17 00:00:00 2001 From: Scott Stauffer Date: Mon, 11 May 2026 21:11:20 -0500 Subject: [PATCH 12/14] docs: add context management overview, rename context store page MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add docs/context-management.md — a unified overview of all four context layers (context store, persistent memory, ContextWindow filtering, compaction). Covers TextOnly, ExcludeAgents, MaxTurnAge, MaxTailMessages, ContextCapFraction, replay truncation, all five compaction modes, IncludeReasoning, IncludeSymbolGraph, change log grounding, and the head+tail pinning behaviour. Includes a flow diagram and opinionated strategy guide. Rename docs/context.md → docs/context-store.md so the file name matches what it actually documents (the file-import CLI, not conversation context). Update mkdocs.yml nav and index.md guide table accordingly. --- docs/context-management.md | 354 ++++++++++++++++++++++++++ docs/{context.md => context-store.md} | 0 docs/index.md | 3 +- mkdocs.yml | 2 + 4 files changed, 358 insertions(+), 1 deletion(-) create mode 100644 docs/context-management.md rename docs/{context.md => context-store.md} (100%) diff --git a/docs/context-management.md b/docs/context-management.md new file mode 100644 index 0000000..41aaddf --- /dev/null +++ b/docs/context-management.md @@ -0,0 +1,354 @@ +# Context Management + +Context is the most important resource in a long-running agent session. Every token an agent +sees costs money and time; everything it misses is a potential hallucination or regression. +fuseraft manages context through four layers that fire at different points in a session's +lifetime: + +``` +Session start + └─ Layer 1: Context Store → files imported before the session + └─ Layer 2: Persistent Memory → facts recalled from prior sessions + +Each agent turn + └─ Layer 3: ContextWindow → per-agent history filter (every turn) + +History too long + └─ Layer 4: Compaction → replace old turns with a summary +``` + +Each layer is optional and independently configured. Most sessions need only one or two. + +--- + +## Layer 1: Context Store + +The context store pre-loads static reference files into `.fuseraft/context/` before a session +starts. Every agent sees a compact index block at the top of its system prompt listing what is +available, and can access the full content with `read_file`. + +```yaml +# No config required — populated by CLI before running: +# fuseraft context add ~/docs/schema.sql --name db-schema +# fuseraft context add ~/specs/ --name specs +``` + +**When to use:** Database schemas, API specs, architecture docs, task briefs — anything too +large to paste into the task argument but that agents should know exists from turn one. + +See [Context Store](context-store.md) for the full CLI reference. + +--- + +## Layer 2: Persistent Memory + +When `EnableMemory: true` is set on an agent, fuseraft loads that agent's persistent memory +store at session start and prepends a structured block to its instructions. Memories survive +between sessions — they accumulate over time, giving agents a working knowledge of the project. + +```yaml +Agents: + - Name: Developer + EnableMemory: true + Instructions: | + You are a Go developer. Write idiomatic, tested code. +``` + +At session start, the agent sees: + +``` +MEMORY — facts recalled from prior sessions: +[preference] preferred-test-runner: Use `go test -race ./...` for all test runs. +[fact] auth-middleware: The auth middleware was rewritten in v2.3 — do not touch the legacy layer. +``` + +**Storage locations:** + +| Context | Path | +|---|---| +| REPL sessions | `~/.fuseraft/memory/repl/` | +| Orchestration agents | `~/.fuseraft/memory/agents/{AgentName}/` | + +**Memory scoping:** In a project directory that has `.fuseraft/`, only memories saved in that +directory are loaded. Directories without `.fuseraft/` fall back to all global memories. + +**REPL:** Memory is always active in the REPL — no config flag needed. Memories are extracted +automatically at the end of each session and scoped to the working directory via +`.fuseraft/memory_refs.json`. Use `/memory` commands to inspect or delete them. + +**Memory cap:** The prompt block is capped at 8,000 characters. Entries are ordered by type +then name; entries that would exceed the cap are dropped (header only is kept for visibility). + +See [Configuration — Memory](configuration.md#memory) for the full field reference. + +--- + +## Layer 3: ContextWindow (per-agent history filter) + +By default every agent receives the full accumulated conversation history, including tool-call +frames and tool-result messages from all prior turns. In a long multi-agent session this can +reach hundreds of thousands of tokens — most of it irrelevant to late-stage agents. + +`ContextWindow` lets each agent declare a lighter view. The shared history is never mutated; +only the slice passed to that agent's turn is affected. + +### Filters and their order + +Filters are applied in this order every turn: + +1. **TextOnly / ExcludeAgents** — strip tool noise or specific agents' output +2. **MaxTurnAge** — keep only messages from the last N agent turns (semantic cut) +3. **MaxTailMessages** — hard cap: keep only the last N messages (raw count) + +```yaml +Agents: + - Name: Reviewer + ContextWindow: + TextOnly: true # strip all tool-call frames and tool results + ExcludeAgents: # also strip all output from these agents + - Tester + MaxTurnAge: 5 # only keep messages from the last 5 assistant turns + MaxTailMessages: 40 # hard cap after the above filters + ContextCapFraction: 0.8 # emit context_cap_warning when at 80% of MaxTailMessages +``` + +### TextOnly + +Strips all tool-call frames (assistant messages containing only a function-call request) and +all tool-result messages from the history slice. Text-bearing assistant messages and all user +messages are kept. + +**This is the primary lever for context reduction.** A Reviewer that independently re-reads +files and re-runs commands gains nothing from seeing the hundreds of tool results produced by +the Developer — stripping them can reduce input tokens by 90%+ in typical sessions. + +When `ExcludeAgents` is set, tool-result messages are stripped automatically even when +`TextOnly` is false. Tool results are not attributed to a specific agent; leaving them without +their corresponding call frames produces a malformed context with orphaned result IDs. + +### ExcludeAgents + +Names of agents whose messages should be excluded entirely — both text-bearing replies and +tool-call frames. + +### MaxTurnAge + +Keeps only messages from the last N *agent turns*, where each turn ends with an assistant +reply. Unlike `MaxTailMessages` (a raw message count), `MaxTurnAge` is semantic: it counts +backward from the end of history and discards everything before the cut-point. + +Use this to discard early-session context from phases or agents no longer relevant to the +current work — without needing to know the exact message count. + +### MaxTailMessages + +Hard cap applied after the other filters. When the filtered list still exceeds this count, +the oldest messages are dropped. Set `ContextCapFraction` to receive a `context_cap_warning` +event as an early signal before the hard cap is reached. + +### Replay truncation + +Agents sometimes produce verbose stream-of-consciousness output (3–5k tokens). When that text +is replayed verbatim in every subsequent turn, compaction summaries grow each cycle and input +tokens balloon. fuseraft automatically truncates verbose non-summary assistant messages to +2,000 characters when replaying them into the next turn's history. Compaction summaries are +never truncated. + +--- + +## Layer 4: Compaction + +When conversation history grows long enough to approach a model's context window, compaction +fires. It replaces the oldest turns with a single context message that agents treat as +background, then resumes from the retained tail. + +### Trigger + +```yaml +Compaction: + TriggerTurnCount: 50 # fire when assistant-turn count reaches this + KeepRecentTurns: 10 # keep this many turns verbatim; compact the rest +``` + +Compaction fires in two situations: +- Before a session stream starts, when resuming a checkpoint already over the threshold. +- Mid-session, after each checkpoint save, once the live history crosses the threshold. + +`TriggerTurnCount` must be greater than `KeepRecentTurns`. + +### Modes + +| Mode | How context is reconstructed | LLM call? | Requirements | +|---|---|---|---| +| `llm` | LLM summarizes the compacted turns | Yes | A model | +| `intent` | Deterministic `✓`/`✗`/`⧖` per tool call from `intents.json` | No | `ChangeTracking` | +| `lossless` | Evidence graph + contract status + state machine position | No | `statemachine` strategy + `EvidenceStore` | +| `hybrid` | Lossless reconstruction prepended before the LLM summary | Yes | `statemachine` strategy + `EvidenceStore` | +| `window` | Oldest user+assistant pairs dropped until within `TokenBudget` | No | — | + +**`intent` is the recommended mode** for most sessions when `ChangeTracking` is configured. +It requires no state machine and produces a deterministic record of every tool call: + +``` +[INTENT-DERIVED RECONSTRUCTION — covers turns 1–20] + +OPERATIONS (chronological): + ✓ write_file → "src/api/users.go" (turn 3, Developer) + ✗ patch_file → "src/api/auth.go" — oldText not found… (turn 4, Developer) + ✓ shell_run → "go test ./..." (turn 5, Tester) + +RESUMPTION NOTE: History compacted from intent log — deterministic ground truth. +Do not re-execute operations marked ✓ (applied). +Operations marked ✗ (failed) should be retried if the task requires them. +``` + +**`lossless` is the recommended mode** for `statemachine` sessions with an `EvidenceStore`. +Instead of summarizing the conversation, it reads disk state directly — state machine position, +contract pass/fail, evidence items — and injects it as ground truth. No hallucination is +possible because no LLM generates the summary. + +**`window` mode** trades context continuity for simplicity. No summary is injected; the oldest +turns are silently dropped. Useful for exploratory sessions where older context genuinely +doesn't matter, or when you want no compaction LLM cost at all. + +### Pinned summaries + +Prior compaction summaries (`IsCompactionSummary`) are pinned and never dropped by `window` +mode. This preserves the head of the conversation — each compaction cycle adds a new summary +at the front while the window trims from behind it. + +### Compaction model + +By default, `llm` and `hybrid` modes use the first agent's model to generate the summary. +Override with `Compaction.Model` to use a cheaper model for compaction: + +```yaml +Compaction: + TriggerTurnCount: 50 + KeepRecentTurns: 10 + Mode: hybrid + Model: + ModelId: gpt-4o-mini +``` + +### Enriching summaries + +Two optional flags add structured context blocks before the LLM summary text. Both are +prefixed in this order when both are enabled: symbol graph first, then reasoning excerpts. + +**`IncludeReasoning`** — prepends a `[REASONING EXCERPTS]` block containing the model's +thinking for each compacted turn (truncated to ~500 tokens per turn). Useful when the *why* +behind prior decisions matters as much as the *what*. Requires `Events` to be configured +(reasoning excerpts are read from the session events log). + +**`IncludeSymbolGraph`** — prepends a `[SYMBOL DEPENDENCY GRAPH]` block listing every +`SymbolDefinition` and `SymbolReference` node in the evidence store for files written during +the session. Gives agents an explicit map of what symbols were in scope during the compacted +turns. Requires `EvidenceStore` and `ChangeTracking` to be configured. + +```yaml +Compaction: + TriggerTurnCount: 40 + KeepRecentTurns: 8 + Mode: hybrid + IncludeReasoning: true + IncludeSymbolGraph: true +``` + +### Change log grounding + +When `ChangeTracking` or `Validation.ChangeLogPath` is configured, `llm` and `hybrid` +compactors read `changes.json` at compaction time and inject it into the summary prompt as +authoritative ground truth. Agent success claims are overridden by what `changes.json` actually +records — exit codes and file writes are facts; assistant self-reports are not. + +### Cost accounting + +The summary message's cumulative cost includes all the turns it replaced. Budget tracking +remains exact across compaction boundaries. `intent`, `lossless`, and `window` modes incur +no LLM cost at compaction time. + +--- + +## How the layers fit together + +Here is the full sequence from session start through a long-running session: + +``` +1. fuseraft run + ├─ Context Store index → injected into every agent's system prompt + └─ Persistent Memory → prepended to each agent's instructions (if EnableMemory: true) + +2. Each agent turn + └─ ContextWindow filter applied to conversation history + ├─ TextOnly / ExcludeAgents strip tool noise + ├─ MaxTurnAge semantic cut + └─ MaxTailMessages hard cap + └─ Filtered slice + replay-truncated content → sent to LLM + +3. After each checkpoint save + └─ Compaction check + ├─ (llm/intent/lossless/hybrid) assistant-turn count ≥ TriggerTurnCount? + │ YES → compact oldest (Count − KeepRecentTurns) turns into one message + │ save checkpoint with compacted history → continue + └─ (window) estimated token count > TokenBudget? + YES → drop oldest user+assistant pairs until within budget + (pinned summaries are never dropped) +``` + +--- + +## Choosing a strategy + +**For most sessions with `ChangeTracking`:** use `intent` mode. + +```yaml +ChangeTracking: + Path: .fuseraft/changes.json + IntentLogPath: .fuseraft/state/intents.json + +Compaction: + TriggerTurnCount: 40 + KeepRecentTurns: 8 + Mode: intent +``` + +**For `statemachine` sessions with `EvidenceStore`:** use `lossless` or `hybrid`. + +```yaml +Compaction: + TriggerTurnCount: 50 + KeepRecentTurns: 10 + Mode: lossless # or "hybrid" to add an LLM narrative on top +``` + +**For exploratory / throw-away sessions:** use `window` to avoid any compaction cost. + +```yaml +Compaction: + Mode: window + TokenBudget: 60000 +``` + +**For a downstream agent (Reviewer, Tester) that needs less history:** use `ContextWindow`. + +```yaml +Agents: + - Name: Reviewer + ContextWindow: + TextOnly: true + MaxTurnAge: 3 +``` + +**For an agent that should know nothing about earlier phases:** combine `ExcludeAgents` with +`MaxTailMessages` so it only sees the final handoff. + +```yaml +Agents: + - Name: Auditor + ContextWindow: + ExcludeAgents: + - Developer + - Tester + MaxTailMessages: 20 +``` diff --git a/docs/context.md b/docs/context-store.md similarity index 100% rename from docs/context.md rename to docs/context-store.md diff --git a/docs/index.md b/docs/index.md index 820815c..ff23104 100644 --- a/docs/index.md +++ b/docs/index.md @@ -31,7 +31,8 @@ This is a personal project and a work in progress, suited for experimentation an | [Security & Sandbox](security.md) | File and network containment | | [Governance](governance.md) | Execution rings, audit log, circuit breaker, SLO tracking | | [Sessions](sessions.md) | Resumption, HITL, cost tracking, compaction | -| [Context Store](context.md) | Importing reference material for agents | +| [Context Management](context-management.md) | How fuseraft manages context across a long session | +| [Context Store](context-store.md) | Importing reference material for agents | | [Examples](examples.md) | Ready-to-use config examples | ## VS Code Extension diff --git a/mkdocs.yml b/mkdocs.yml index 94f31c9..cf4e5af 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -45,6 +45,8 @@ nav: - Security: security.md - Governance: governance.md - Sessions: sessions.md + - Context Management: context-management.md + - Context Store: context-store.md - Examples: examples.md - Design: design.md From eafa181581df67b7d1cf7149a3df5d417b386905 Mon Sep 17 00:00:00 2001 From: Scott Stauffer Date: Mon, 11 May 2026 21:32:22 -0500 Subject: [PATCH 13/14] feat: add Document plugin and auto-extract text from binary docs at context import MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add DocumentTextExtractor (PDF via PdfPig, DOCX/PPTX/XLSX via DocumentFormat.OpenXml) — fully cross-platform pure-.NET extraction, no shell tools required. Add DocumentPlugin with four read-only tools: document_extract_text — full text from PDF, DOCX, PPTX, or XLSX document_get_info — format/size metadata without full extraction document_list_sheets — sheet names from an XLSX workbook document_get_sheet — single sheet as pipe-delimited text table Fix ContextStore.AddAsync: binary documents (.pdf, .docx, .pptx, .xlsx) are now extracted to .txt at import time so agents can access them via read_file. Falls back to storing the binary with a warning on extraction failure. ExtractionInfo recorded on ContextItem; notes surfaced in CLI output. Register Document in PluginRegistry (default + sandboxed); add capability entries (all read) to PluginCapabilityMap. Update plugins.md and context-store.md. --- docs/context-store.md | 25 ++- docs/plugins.md | 17 ++ src/Cli/Commands/ContextCommand.cs | 4 + src/FuseraftCli.csproj | 2 + src/Infrastructure/ContextStore.cs | 79 +++++-- src/Infrastructure/DocumentTextExtractor.cs | 207 ++++++++++++++++++ src/Infrastructure/Plugins/DocumentPlugin.cs | 149 +++++++++++++ .../Plugins/PluginCapabilityMap.cs | 7 + src/Infrastructure/Plugins/PluginRegistry.cs | 2 + 9 files changed, 477 insertions(+), 15 deletions(-) create mode 100644 src/Infrastructure/DocumentTextExtractor.cs create mode 100644 src/Infrastructure/Plugins/DocumentPlugin.cs diff --git a/docs/context-store.md b/docs/context-store.md index 5fd433d..13fa3f5 100644 --- a/docs/context-store.md +++ b/docs/context-store.md @@ -92,13 +92,36 @@ fuseraft context list --dir ~/projects/my-app fuseraft context remove runbook --dir ~/projects/my-app ``` +## Document extraction + +When you import a PDF, Word document, PowerPoint presentation, or Excel spreadsheet, +fuseraft automatically extracts the plain text at import time and stores a `.txt` version +in the context directory. Agents can then access the extracted text via `read_file` — +no special plugin required. + +``` +fuseraft context add ~/docs/architecture.pdf +# ✓ architecture — 1 file(s), 48.2 KB +# Extracted from architecture.pdf: PDF — 24 page(s) → architecture.txt +``` + +**Supported formats:** `.pdf`, `.docx`, `.pptx`, `.xlsx` + +If text extraction fails (encrypted document, corrupted file), the original binary is stored +instead and a warning is printed. Binary files cannot be read by agents via `read_file`. + +For working with documents found *during* a session, or reading individual Excel sheets, +use the [`Document` plugin](plugins.md#document) directly. + ## What to import The context store works well for: - **Database schemas** — schema SQL, ERDs, or migration history - **API specifications** — OpenAPI/Swagger YAML, Postman collections -- **Architecture documents** — design docs, ADRs, system diagrams +- **Architecture documents** — design docs, ADRs, system diagrams (PDF, DOCX) +- **Slide decks** — PPTX presentations extracted to slide-by-slide text +- **Spreadsheets** — XLSX workbooks with multiple sheets, each extracted as a table - **Reference data** — seed data, sample payloads, fixture files - **Task briefs** — detailed specs too long to paste into the task argument diff --git a/docs/plugins.md b/docs/plugins.md index d2918d9..6d45ab8 100644 --- a/docs/plugins.md +++ b/docs/plugins.md @@ -387,6 +387,23 @@ Agents: --- +## Document + +Read rich document formats as plain text. All operations are read-only. Sandbox rules apply when `FileSystemSandboxPath` is configured. + +| Function | Parameters | Description | +|----------|-----------|-------------| +| `document_extract_text` | `path` | Extract full plain text from a PDF, DOCX, PPTX, or XLSX file. Returns a format/size header followed by the extracted text. | +| `document_get_info` | `path` | Return format metadata (page/sheet count, file size, extracted character count) without returning the full text. Cheaper than `extract_text` for planning. | +| `document_list_sheets` | `path` | List sheet names in an Excel file (`.xlsx` only). | +| `document_get_sheet` | `path`, `sheetName`, `maxRows` (default 0 = all) | Extract one sheet from an Excel file as a pipe-delimited text table. | + +**Supported formats:** `.pdf`, `.docx`, `.pptx`, `.xlsx` + +**Context store integration:** When you run `fuseraft context add` on a supported document, the text is automatically extracted and stored as a `.txt` file at import time. Agents can then access it via `read_file` without needing the `Document` plugin. Use `Document` when you need on-demand extraction inside a session (e.g. processing documents found during a task, or working with individual Excel sheets). + +--- + ## MCP plugins In addition to the built-in plugins above, tools from any connected MCP server are available as plugins. The plugin name is the `Name` field from `McpServers` config. diff --git a/src/Cli/Commands/ContextCommand.cs b/src/Cli/Commands/ContextCommand.cs index 4fcccd7..87104aa 100644 --- a/src/Cli/Commands/ContextCommand.cs +++ b/src/Cli/Commands/ContextCommand.cs @@ -74,6 +74,10 @@ protected override async Task ExecuteAsync(CommandContext context, ContextA foreach (var f in item.Files.OrderBy(f => f.RelativePath)) AnsiConsole.MarkupLine($" [dim]{Markup.Escape(f.RelativePath)}[/]"); + if (item.ExtractionInfo is not null) + foreach (var note in item.ExtractionInfo.Split('\n', StringSplitOptions.RemoveEmptyEntries)) + AnsiConsole.MarkupLine($" [dim]{Markup.Escape(note)}[/]"); + AnsiConsole.MarkupLine( $"\n[dim]Agents will see this item listed in their system prompt " + $"and can read it via read_file from " + diff --git a/src/FuseraftCli.csproj b/src/FuseraftCli.csproj index cdc7538..34c090c 100644 --- a/src/FuseraftCli.csproj +++ b/src/FuseraftCli.csproj @@ -14,6 +14,7 @@ + @@ -32,6 +33,7 @@ + diff --git a/src/Infrastructure/ContextStore.cs b/src/Infrastructure/ContextStore.cs index 0ddbc1d..936c035 100644 --- a/src/Infrastructure/ContextStore.cs +++ b/src/Infrastructure/ContextStore.cs @@ -72,34 +72,42 @@ public async Task AddAsync( Directory.Delete(destDir, recursive: true); Directory.CreateDirectory(destDir); - var files = new List(); + var files = new List(); + var extractionNotes = new List(); if (isFile) { - var fileName = Path.GetFileName(fullSource); - File.Copy(fullSource, Path.Combine(destDir, fileName)); - files.Add(new ContextFileEntry(fileName, new FileInfo(fullSource).Length)); + var (entry, note) = await StoreFileAsync(fullSource, destDir, ct); + files.Add(entry); + if (note is not null) extractionNotes.Add(note); } else { foreach (var src in Directory.EnumerateFiles(fullSource, "*", SearchOption.AllDirectories)) { - var rel = Path.GetRelativePath(fullSource, src); - var dest = Path.Combine(destDir, rel); - Directory.CreateDirectory(Path.GetDirectoryName(dest)!); - File.Copy(src, dest); - files.Add(new ContextFileEntry(rel.Replace('\\', '/'), new FileInfo(src).Length)); + var rel = Path.GetRelativePath(fullSource, src); + var destSub = Path.Combine(destDir, Path.GetDirectoryName(rel) ?? string.Empty); + Directory.CreateDirectory(destSub); + var (entry, note) = await StoreFileAsync(src, destSub, ct); + var storedRel = Path.Combine( + Path.GetDirectoryName(rel) ?? string.Empty, + entry.RelativePath).Replace('\\', '/').TrimStart('/'); + files.Add(new ContextFileEntry(storedRel, entry.SizeBytes)); + if (note is not null) extractionNotes.Add(note); } } var index = await LoadIndexAsync(ct); index.Items[name] = new ContextItem { - Name = name, - Description = description, - SourcePath = fullSource, - ImportedAt = DateTime.UtcNow, - Files = files, + Name = name, + Description = description, + SourcePath = fullSource, + ImportedAt = DateTime.UtcNow, + Files = files, + ExtractionInfo = extractionNotes.Count > 0 + ? string.Join("\n", extractionNotes) + : null, }; await SaveIndexAsync(index, ct); } @@ -199,6 +207,42 @@ private async Task SaveIndexAsync(ContextIndex index, CancellationToken ct) await File.WriteAllTextAsync(indexPath, JsonSerializer.Serialize(index, JsonOpts), ct); } + // If the source file is a supported binary document format, extracts its text and + // stores it as a .txt sibling. Otherwise copies the file verbatim. Returns the stored + // file entry and an optional human-readable extraction note. + private static async Task<(ContextFileEntry Entry, string? Note)> StoreFileAsync( + string sourcePath, string destDir, CancellationToken ct) + { + if (DocumentTextExtractor.IsSupported(sourcePath)) + { + try + { + var (text, info) = DocumentTextExtractor.Extract(sourcePath); + var txtName = Path.GetFileNameWithoutExtension(sourcePath) + ".txt"; + var txtPath = Path.Combine(destDir, txtName); + await File.WriteAllTextAsync(txtPath, text, ct); + var size = new FileInfo(txtPath).Length; + var note = $"Extracted from {Path.GetFileName(sourcePath)}: {info} → {txtName}"; + return (new ContextFileEntry(txtName, size), note); + } + catch (Exception ex) + { + // Extraction failed — copy the binary so the item is still stored, but warn + // that agents will not be able to read it via read_file. + var binName = Path.GetFileName(sourcePath); + var binPath = Path.Combine(destDir, binName); + File.Copy(sourcePath, binPath); + var size = new FileInfo(sourcePath).Length; + var note = $"Warning: extraction failed for {binName} ({ex.Message}) — binary stored, not readable by agents"; + return (new ContextFileEntry(binName, size), note); + } + } + + var fileName = Path.GetFileName(sourcePath); + File.Copy(sourcePath, Path.Combine(destDir, fileName)); + return (new ContextFileEntry(fileName, new FileInfo(sourcePath).Length), null); + } + private static bool IsValidName(string name) => !string.IsNullOrWhiteSpace(name) && name.All(c => char.IsLetterOrDigit(c) || c == '-' || c == '_'); @@ -229,6 +273,13 @@ public sealed class ContextItem [JsonPropertyName("files")] public List Files { get; init; } = []; + + /// + /// Set when one or more source files were binary documents that were converted to + /// plain text at import time. Contains one note per extracted file. + /// + [JsonPropertyName("extractionInfo")] + public string? ExtractionInfo { get; init; } } public sealed record ContextFileEntry( diff --git a/src/Infrastructure/DocumentTextExtractor.cs b/src/Infrastructure/DocumentTextExtractor.cs new file mode 100644 index 0000000..b3436ca --- /dev/null +++ b/src/Infrastructure/DocumentTextExtractor.cs @@ -0,0 +1,207 @@ +using System.Text; +using DocumentFormat.OpenXml.Packaging; +using DocumentFormat.OpenXml.Spreadsheet; +using DocumentFormat.OpenXml.Wordprocessing; +using UglyToad.PdfPig; + +namespace fuseraft.Infrastructure; + +/// +/// Extracts plain text from rich document formats (PDF, DOCX, PPTX, XLSX). +/// Used by at import time and by +/// at agent runtime. +/// +public static class DocumentTextExtractor +{ + public static readonly IReadOnlySet SupportedExtensions = + new HashSet([".pdf", ".docx", ".pptx", ".xlsx"], StringComparer.OrdinalIgnoreCase); + + public static bool IsSupported(string path) => + SupportedExtensions.Contains(Path.GetExtension(path)); + + /// + /// Extracts plain text from . + /// Returns the extracted text and a short info line (e.g. "PDF — 12 page(s)"). + /// Throws for unsupported extensions. + /// + public static (string Text, string Info) Extract(string path) + { + var ext = Path.GetExtension(path).ToLowerInvariant(); + return ext switch + { + ".pdf" => ExtractPdf(path), + ".docx" => ExtractDocx(path), + ".pptx" => ExtractPptx(path), + ".xlsx" => ExtractXlsx(path), + _ => throw new NotSupportedException($"Unsupported document format: {ext}") + }; + } + + /// Returns the sheet names in an Excel file. + public static IReadOnlyList ListSheets(string path) + { + using var doc = SpreadsheetDocument.Open(path, false); + return doc.WorkbookPart?.Workbook?.Sheets?.Elements() + .Select(s => s.Name?.Value ?? string.Empty) + .ToList() ?? []; + } + + /// + /// Extracts a single sheet from an Excel file as pipe-delimited rows. + /// + public static (string Text, int RowCount) ExtractSheet(string path, string sheetName, int maxRows = 0) + { + using var doc = SpreadsheetDocument.Open(path, false); + var workbookPart = doc.WorkbookPart + ?? throw new InvalidOperationException("Workbook has no parts."); + + var sharedStrings = BuildSharedStrings(workbookPart); + + var sheet = workbookPart.Workbook?.Sheets?.Elements() + .FirstOrDefault(s => string.Equals(s.Name?.Value, sheetName, StringComparison.OrdinalIgnoreCase)) + ?? throw new KeyNotFoundException($"Sheet '{sheetName}' not found."); + + if (sheet.Id?.Value is null) + throw new InvalidOperationException($"Sheet '{sheetName}' has no part ID."); + + var wsPart = (WorksheetPart)workbookPart.GetPartById(sheet.Id.Value); + var data = wsPart.Worksheet.GetFirstChild(); + if (data is null) return (string.Empty, 0); + + var sb = new StringBuilder(); + int rowCount = 0; + foreach (var row in data.Elements()) + { + if (maxRows > 0 && rowCount >= maxRows) break; + var cells = row.Elements().Select(c => GetCellValue(c, sharedStrings)); + sb.AppendLine(string.Join(" | ", cells)); + rowCount++; + } + return (sb.ToString().Trim(), rowCount); + } + + // PDF + + private static (string Text, string Info) ExtractPdf(string path) + { + using var pdf = PdfDocument.Open(path); + var pages = pdf.GetPages().ToList(); + var sb = new StringBuilder(); + foreach (var page in pages) + { + var text = page.Text; + if (!string.IsNullOrWhiteSpace(text)) + sb.AppendLine(text); + } + return (sb.ToString().Trim(), $"PDF — {pages.Count} page(s)"); + } + + // DOCX + + private static (string Text, string Info) ExtractDocx(string path) + { + using var doc = WordprocessingDocument.Open(path, false); + var body = doc.MainDocumentPart?.Document?.Body; + if (body is null) return (string.Empty, "DOCX — empty document"); + + var sb = new StringBuilder(); + foreach (var elem in body.ChildElements) + { + if (elem is Paragraph para) + { + var text = para.InnerText; + if (!string.IsNullOrWhiteSpace(text)) + sb.AppendLine(text); + } + else if (elem is DocumentFormat.OpenXml.Wordprocessing.Table table) + { + foreach (var row in table.Elements()) + { + var cells = row.Elements() + .Select(c => c.InnerText.Trim()) + .Where(t => !string.IsNullOrEmpty(t)); + sb.AppendLine(string.Join(" | ", cells)); + } + } + } + + var extracted = sb.ToString().Trim(); + var wordCount = extracted.Split(' ', StringSplitOptions.RemoveEmptyEntries).Length; + return (extracted, $"DOCX — ~{wordCount:N0} word(s)"); + } + + // PPTX + + private static (string Text, string Info) ExtractPptx(string path) + { + using var pres = PresentationDocument.Open(path, false); + var slideParts = pres.PresentationPart?.SlideParts?.ToList() ?? []; + var sb = new StringBuilder(); + int slideNum = 0; + + foreach (var slidePart in slideParts) + { + slideNum++; + sb.AppendLine($"=== Slide {slideNum} ==="); + foreach (var text in slidePart.Slide.Descendants()) + { + if (!string.IsNullOrWhiteSpace(text.Text)) + sb.AppendLine(text.Text); + } + sb.AppendLine(); + } + + return (sb.ToString().Trim(), $"PPTX — {slideParts.Count} slide(s)"); + } + + // XLSX + + private static (string Text, string Info) ExtractXlsx(string path) + { + using var doc = SpreadsheetDocument.Open(path, false); + var workbookPart = doc.WorkbookPart; + if (workbookPart is null) return (string.Empty, "XLSX — empty workbook"); + + var sharedStrings = BuildSharedStrings(workbookPart); + var sheets = workbookPart.Workbook?.Sheets?.Elements().ToList() ?? []; + var sb = new StringBuilder(); + int totalRows = 0; + + foreach (var sheet in sheets) + { + sb.AppendLine($"=== Sheet: {sheet.Name} ==="); + if (sheet.Id?.Value is null) continue; + var wsPart = (WorksheetPart)workbookPart.GetPartById(sheet.Id.Value); + var data = wsPart.Worksheet.GetFirstChild(); + if (data is null) continue; + + foreach (var row in data.Elements()) + { + var cells = row.Elements().Select(c => GetCellValue(c, sharedStrings)); + sb.AppendLine(string.Join(" | ", cells)); + totalRows++; + } + sb.AppendLine(); + } + + return (sb.ToString().Trim(), $"XLSX — {sheets.Count} sheet(s), {totalRows:N0} row(s)"); + } + + // Helpers + + private static List BuildSharedStrings(WorkbookPart workbookPart) => + workbookPart.SharedStringTablePart?.SharedStringTable + ?.Elements() + .Select(s => s.InnerText) + .ToList() ?? []; + + private static string GetCellValue(Cell cell, List sharedStrings) + { + var value = cell.CellValue?.Text ?? string.Empty; + if (cell.DataType?.Value == CellValues.SharedString + && int.TryParse(value, out var idx) + && (uint)idx < (uint)sharedStrings.Count) + return sharedStrings[idx]; + return value; + } +} diff --git a/src/Infrastructure/Plugins/DocumentPlugin.cs b/src/Infrastructure/Plugins/DocumentPlugin.cs new file mode 100644 index 0000000..34dc5a9 --- /dev/null +++ b/src/Infrastructure/Plugins/DocumentPlugin.cs @@ -0,0 +1,149 @@ +using System.ComponentModel; +using fuseraft.Infrastructure; + +namespace fuseraft.Infrastructure.Plugins; + +/// +/// Reads rich document formats (PDF, DOCX, PPTX, XLSX) as plain text. +/// All operations are read-only. Path arguments are sandbox-checked when a +/// sandbox root is configured. +/// +public sealed class DocumentPlugin(string? sandboxRoot = null) +{ + private readonly string? _sandboxRoot = sandboxRoot is not null + ? Path.GetFullPath(ProcessHelper.ExpandHome(sandboxRoot)) + : null; + + [Description("Extract plain text from a document. Supports PDF, DOCX, PPTX, XLSX.")] + public string ExtractText([Description("Path to the document.")] string path) + { + var denial = ResolveSafe(path, out var resolved); + if (denial is not null) return denial; + if (!File.Exists(resolved)) return PluginResult.Error($"File not found: {resolved}"); + if (!DocumentTextExtractor.IsSupported(resolved)) + return PluginResult.Error( + $"Unsupported format '{Path.GetExtension(resolved)}'. " + + $"Supported: {string.Join(", ", DocumentTextExtractor.SupportedExtensions)}"); + + try + { + var (text, info) = DocumentTextExtractor.Extract(resolved); + return string.IsNullOrWhiteSpace(text) + ? PluginResult.Info($"{info} — no text content found.") + : $"[{info}]\n\n{text}"; + } + catch (Exception ex) + { + return PluginResult.Error($"Extraction failed: {ex.Message}"); + } + } + + [Description("Get format and size metadata for a document. Cheaper than extract_text. Supports PDF, DOCX, PPTX, XLSX.")] + public string GetInfo([Description("Path to the document.")] string path) + { + var denial = ResolveSafe(path, out var resolved); + if (denial is not null) return denial; + if (!File.Exists(resolved)) return PluginResult.Error($"File not found: {resolved}"); + if (!DocumentTextExtractor.IsSupported(resolved)) + return PluginResult.Error( + $"Unsupported format '{Path.GetExtension(resolved)}'. " + + $"Supported: {string.Join(", ", DocumentTextExtractor.SupportedExtensions)}"); + + try + { + var fi = new FileInfo(resolved); + var (text, info) = DocumentTextExtractor.Extract(resolved); + var charCount = text.Length; + return $"{info}\nFile size: {FormatSize(fi.Length)}\n" + + $"Extracted text: ~{charCount:N0} characters (~{charCount / 4:N0} tokens)"; + } + catch (Exception ex) + { + return PluginResult.Error($"Could not read document metadata: {ex.Message}"); + } + } + + [Description("List sheet names in an Excel file (.xlsx).")] + public string ListSheets([Description("Path to the .xlsx file.")] string path) + { + var denial = ResolveSafe(path, out var resolved); + if (denial is not null) return denial; + if (!File.Exists(resolved)) return PluginResult.Error($"File not found: {resolved}"); + + var ext = Path.GetExtension(resolved).ToLowerInvariant(); + if (ext != ".xlsx") + return PluginResult.Error($"list_sheets only works on .xlsx files, not '{ext}'."); + + try + { + var sheets = DocumentTextExtractor.ListSheets(resolved); + return sheets.Count == 0 + ? PluginResult.Info("No sheets found.") + : string.Join("\n", sheets.Select((s, i) => $"{i + 1}. {s}")); + } + catch (Exception ex) + { + return PluginResult.Error($"Could not read sheet list: {ex.Message}"); + } + } + + [Description("Read one sheet from an Excel file (.xlsx) as a pipe-delimited text table.")] + public string GetSheet( + [Description("Path to the .xlsx file.")] string path, + [Description("Sheet name.")] string sheetName, + [Description("Maximum rows to return (0 = all).")] int maxRows = 0) + { + var denial = ResolveSafe(path, out var resolved); + if (denial is not null) return denial; + if (!File.Exists(resolved)) return PluginResult.Error($"File not found: {resolved}"); + + var ext = Path.GetExtension(resolved).ToLowerInvariant(); + if (ext != ".xlsx") + return PluginResult.Error($"get_sheet only works on .xlsx files, not '{ext}'."); + + try + { + var (text, rowCount) = DocumentTextExtractor.ExtractSheet(resolved, sheetName, maxRows); + if (string.IsNullOrWhiteSpace(text)) + return PluginResult.Info($"Sheet '{sheetName}' is empty."); + var truncNote = maxRows > 0 && rowCount >= maxRows ? $" — first {maxRows} rows" : string.Empty; + return $"[Sheet: {sheetName} — {rowCount} row(s){truncNote}]\n\n{text}"; + } + catch (KeyNotFoundException ex) + { + return PluginResult.Error(ex.Message); + } + catch (Exception ex) + { + return PluginResult.Error($"Could not read sheet '{sheetName}': {ex.Message}"); + } + } + + private string? ResolveSafe(string path, out string resolved) + { + var expanded = ProcessHelper.ExpandHome(path); + resolved = _sandboxRoot is not null && !Path.IsPathRooted(expanded) + ? Path.GetFullPath(expanded, _sandboxRoot) + : Path.GetFullPath(expanded); + + if (_sandboxRoot is null) return null; + + var sandboxPrefix = _sandboxRoot.TrimEnd(Path.DirectorySeparatorChar) + Path.DirectorySeparatorChar; + var resolvedCheck = resolved.TrimEnd(Path.DirectorySeparatorChar) + Path.DirectorySeparatorChar; + var comparison = OperatingSystem.IsWindows() + ? StringComparison.OrdinalIgnoreCase + : StringComparison.Ordinal; + + return resolvedCheck.StartsWith(sandboxPrefix, comparison) + ? null + : PluginResult.Denied($"Path '{resolved}' is outside the configured sandbox '{_sandboxRoot}'."); + } + + private static string FormatSize(long bytes) => bytes switch + { + < 1_024 => $"{bytes} B", + < 1_048_576 => $"{bytes / 1_024.0:F1} KB", + < 1_073_741_824 => $"{bytes / 1_048_576.0:F1} MB", + _ => $"{bytes / 1_073_741_824.0:F1} GB", + }; +} diff --git a/src/Infrastructure/Plugins/PluginCapabilityMap.cs b/src/Infrastructure/Plugins/PluginCapabilityMap.cs index 08497ab..65c62de 100644 --- a/src/Infrastructure/Plugins/PluginCapabilityMap.cs +++ b/src/Infrastructure/Plugins/PluginCapabilityMap.cs @@ -23,6 +23,7 @@ namespace fuseraft.Infrastructure.Plugins; /// Gitread (status, diff, log, show, branch_list, stash_list) · write (add, commit, checkout, create_branch, init, push, pull, stash, stash_pop, reset) /// Httpget · post · put · patch · delete — one per HTTP verb /// Jsonread (format, minify, get, keys, search, to_text, validate) · write (merge) +/// Documentread (extract_text, get_info, list_sheets, get_sheet — all read-only) /// Searchread (all search operations are read-only) /// Planread (plan_get, plan_get_summary) · write (plan_create, plan_update_step, plan_add_step) /// Changesread (read, read_latest) @@ -102,6 +103,12 @@ internal static class PluginCapabilityMap ["json_validate"] = "read", ["json_merge"] = "write", + // Document (all read-only) + ["document_extract_text"] = "read", + ["document_get_info"] = "read", + ["document_list_sheets"] = "read", + ["document_get_sheet"] = "read", + // Search (all read-only) ["search_files"] = "read", ["search_content"] = "read", diff --git a/src/Infrastructure/Plugins/PluginRegistry.cs b/src/Infrastructure/Plugins/PluginRegistry.cs index 7948cb2..0eb1430 100644 --- a/src/Infrastructure/Plugins/PluginRegistry.cs +++ b/src/Infrastructure/Plugins/PluginRegistry.cs @@ -68,6 +68,7 @@ public PluginRegistry RegisterDefaults() Register("Http", () => new HttpPlugin(_sharedHttpClient, logger: _loggerFactory?.CreateLogger())); Register("Json", () => new JsonPlugin()); Register("Search", () => new SearchPlugin()); + Register("Document", () => new DocumentPlugin()); Register("Probe", () => new ProbePlugin()); Register("CodeExecution", () => new CodeExecutionPlugin()); Register("Handoff", () => new HandoffPlugin()); @@ -107,6 +108,7 @@ public PluginRegistry Configure( Register("FileSystem", () => new FileSystemPlugin(sandboxRoot, security.ReadFileSizeLimit, versionStore: fileVersionStore)); Register("Shell", () => new ShellPlugin(sandboxRoot, shellCommandApprover)); Register("Http", () => new HttpPlugin(_sharedHttpClient, allowedHosts, apiProfiles, allowPrivateHosts, _loggerFactory?.CreateLogger())); + Register("Document", () => new DocumentPlugin(sandboxRoot)); return this; } From b811f4f77dc124988860e95107b0c492c2f33e9b Mon Sep 17 00:00:00 2001 From: Scott Stauffer Date: Mon, 11 May 2026 21:51:00 -0500 Subject: [PATCH 14/14] docs: update configuration, cli-reference, and context-management for Document plugin - configuration.md: add Document to the capability tag table - cli-reference.md: document binary extraction behaviour in fuseraft context add - context-management.md: update Layer 1 to mention document formats and cross-link Document plugin --- docs/cli-reference.md | 9 +++++++++ docs/configuration.md | 1 + docs/context-management.md | 13 ++++++++++--- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/docs/cli-reference.md b/docs/cli-reference.md index 2b9914b..c04101b 100644 --- a/docs/cli-reference.md +++ b/docs/cli-reference.md @@ -760,6 +760,15 @@ fuseraft context add ~/specs/ --name specs --description "Product specifications fuseraft context add ~/docs/runbook.md --dir ~/projects/my-app ``` +**Binary document extraction:** When the source is a `.pdf`, `.docx`, `.pptx`, or `.xlsx` file, fuseraft automatically extracts the plain text and stores it as a `.txt` file. Agents read the extracted text via `read_file` — no `Document` plugin required. A note is printed on import: + +``` +✓ architecture — 1 file(s), 48.2 KB + Extracted from architecture.pdf: PDF — 24 page(s) → architecture.txt +``` + +If extraction fails (encrypted file, corrupt format), the binary is stored with a warning and will not be readable by agents via `read_file`. + After importing, agents see an entry like this at the top of their system prompt: ``` diff --git a/docs/configuration.md b/docs/configuration.md index 9756860..5ab38a0 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -148,6 +148,7 @@ Per-plugin tool filter. Keys are plugin names; values are arrays of capability t | `Git` | `read` (git_status, git_diff, git_log, git_show, git_branch_list, git_stash_list) · `write` (git_add, git_commit, git_checkout, git_create_branch, git_init, git_push, git_pull, git_stash, git_stash_pop, git_reset) | | `Http` | `get` · `head` · `post` · `put` · `patch` · `delete` — one per HTTP verb | | `Json` | `read` · `write` (json_merge) | +| `Document` | `read` (document_extract_text, document_get_info, document_list_sheets, document_get_sheet) | | `Search` | `read` | | `Changes` | `read` | | `Scratchpad` | `read` · `write` | diff --git a/docs/context-management.md b/docs/context-management.md index 41aaddf..fce007f 100644 --- a/docs/context-management.md +++ b/docs/context-management.md @@ -30,11 +30,18 @@ available, and can access the full content with `read_file`. ```yaml # No config required — populated by CLI before running: # fuseraft context add ~/docs/schema.sql --name db-schema -# fuseraft context add ~/specs/ --name specs +# fuseraft context add ~/specs/ --name specs +# fuseraft context add ~/docs/design.pdf --name design # text extracted automatically ``` -**When to use:** Database schemas, API specs, architecture docs, task briefs — anything too -large to paste into the task argument but that agents should know exists from turn one. +**When to use:** Database schemas, API specs, architecture docs, slide decks, spreadsheets, +task briefs — anything too large to paste into the task argument but that agents should know +exists from turn one. + +**Binary documents:** When you import a `.pdf`, `.docx`, `.pptx`, or `.xlsx` file, fuseraft +extracts the plain text at import time and stores a `.txt` file instead. Agents access it +via `read_file` with no extra plugin. For documents found *during* a session — or when you +need individual Excel sheets — use the [`Document` plugin](plugins.md#document) directly. See [Context Store](context-store.md) for the full CLI reference.