From 79531ed3641e3cb18af6c248b619edbd7b036d17 Mon Sep 17 00:00:00 2001
From: Scott Stauffer <scott@fuseraft.com>
Date: Mon, 11 May 2026 11:56:40 -0500
Subject: [PATCH 01/14] fix: accept patch_file as real-work evidence in
 RequireWriteFile validator
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

patch_file is a legitimate file-writing tool call (surgical edit) but was
invisible to HandoffToTesterValidator, causing agents that use patch_file
to be permanently blocked at the handoff gate. Also generalized the error
message away from "HANDOFF TO TESTER blocked" and Developer-specific build
instructions — this validator backs RequireWriteFile which is used on edges
beyond Developer→Tester (e.g. Archaeologist's RECON COMPLETE edge), so the
old message misdirected agents into irrelevant recovery steps.
---
 .../Validation/HandoffToTesterValidator.cs    | 24 +++++++++----------
 1 file changed, 11 insertions(+), 13 deletions(-)
diff --git a/src/Orchestration/Validation/HandoffToTesterValidator.cs b/src/Orchestration/Validation/HandoffToTesterValidator.cs
index cebf124..a87c386 100644
--- a/src/Orchestration/Validation/HandoffToTesterValidator.cs
+++ b/src/Orchestration/Validation/HandoffToTesterValidator.cs
@@ -7,10 +7,10 @@
 namespace fuseraft.Orchestration.Validation;
 
 /// <summary>
-/// Blocks <c>HANDOFF TO TESTER</c> unless the source agent completed real work during
-/// the current turn. "Real work" means either:
+/// Blocks a handoff unless the source agent completed real work during the current turn.
+/// "Real work" means at least one of:
 /// <list type="bullet">
-///   <item>At least one <c>write_file</c> tool call completed (the normal path), OR</item>
+///   <item>A <c>write_file</c> or <c>patch_file</c> tool call completed (the normal path), OR</item>
 ///   <item>
 ///     When <paramref name="shellFallbackPattern"/> is supplied: a successful
 ///     <c>shell_run</c> whose command matches at least one of the pipe-separated
@@ -59,7 +59,8 @@ public async Task<RoutingValidationResult> ValidateAsync(
 
                     var funcName = HistoryHelpers.FindFunctionName(history, frc.CallId, i) ?? string.Empty;
 
-                    if (funcName.Contains("write_file", StringComparison.OrdinalIgnoreCase))
+                    if (funcName.Contains("write_file", StringComparison.OrdinalIgnoreCase) ||
+                        funcName.Contains("patch_file", StringComparison.OrdinalIgnoreCase))
                     {
                         hasWriteFile = true;
                         break;
@@ -112,15 +113,12 @@ public async Task<RoutingValidationResult> ValidateAsync(
         {
             var failDetail = BuildFailDetail();
             return RoutingValidationResult.Fail(
-                "HANDOFF TO TESTER blocked: no evidence of real work this turn\n" +
-                "(no write_file, no git_commit, no shell fallback matched).\n\n" +
-                "Required before handing off:\n" +
-                "  1. write_file for every changed file.\n" +
-                "  2. shell_run ./build.sh — fix until it passes.\n" +
-                "  3. git_add + git_commit.\n" +
-                "  4. Retry handoff.\n\n" +
-                failDetail +
-                "All tools available: write_file, shell_run, read_file. Code blocks are NOT saved to disk.");
+                "Handoff blocked: no evidence of real work this turn\n" +
+                "(no write_file, no patch_file, no git_commit, no shell fallback matched).\n\n" +
+                "You must write at least one file before handing off. Use write_file for new files\n" +
+                "or patch_file for surgical edits to existing files. Code blocks in your response\n" +
+                "are NOT saved to disk — you must call the tool.\n\n" +
+                failDetail);
         }
 
         return RoutingValidationResult.Pass();

From 5c9abe6d92afeae006e8100a1f3580ecf3b187b8 Mon Sep 17 00:00:00 2001
From: Scott Stauffer <scott@fuseraft.com>
Date: Mon, 11 May 2026 12:05:24 -0500
Subject: [PATCH 02/14] feat: apply global config endpoint and apiKeyEnvVar as
 agent defaults

Agents generated by templates had no Endpoint or ApiKeyEnvVar, forcing
users to manually add them to every agent file even when ~/.fuseraft/config
already declared a provider URL. ApplyGlobalDefaults now runs after config
binding (in both BuildAsync and LoadConfig) and fills in missing Endpoint
and ApiKeyEnvVar on every agent model, named alias, and selection/magentic
model. Explicit per-agent values always win; global values only fill empty
fields. Also adds ApiKeyEnvVar to UserConfig and UserConfigStore so it can
be set once in the global config and inherited everywhere.
---
 src/Cli/OrchestratorBuilder.cs        | 39 ++++++++++++++++++++++++++-
 src/Core/Models/UserConfig.cs         |  3 +++
 src/Infrastructure/UserConfigStore.cs | 17 +++++++-----
 3 files changed, 52 insertions(+), 7 deletions(-)

diff --git a/src/Cli/OrchestratorBuilder.cs b/src/Cli/OrchestratorBuilder.cs
index 5208888..b2804b5 100644
--- a/src/Cli/OrchestratorBuilder.cs
+++ b/src/Cli/OrchestratorBuilder.cs
@@ -65,6 +65,10 @@ public static class OrchestratorBuilder
         // Expand ${ENV_VAR} tokens in security and API profile config before use.
         config = ExpandEnvVars(config);
 
+        // Fill in Endpoint and ApiKeyEnvVar from ~/.fuseraft/config for any agent
+        // model that doesn't declare them explicitly.
+        config = ApplyGlobalDefaults(config);
+
         // Apply per-config security constraints and API profiles to the security-sensitive plugins.
         var profiles = config.ApiProfiles.Count > 0
             ? (IReadOnlyDictionary<string, ApiProfileConfig>)config.ApiProfiles
@@ -750,7 +754,7 @@ public static OrchestrationConfig LoadConfig(string configPath)
                 .AddJsonFile(Path.GetFullPath(configPath), optional: false)
                 .Build();
 
-        return BindConfig(configPath, configuration);
+        return ApplyGlobalDefaults(BindConfig(configPath, configuration));
     }
 
     // Resolves the base system prompt prepended to every agent.
@@ -780,6 +784,39 @@ public static OrchestrationConfig LoadConfig(string configPath)
         return reader.ReadToEnd().Trim();
     }
 
+    // Fills in Endpoint and ApiKeyEnvVar from ~/.fuseraft/config on any model config
+    // that doesn't set them explicitly. This lets the global config act as a default
+    // provider for all agents without requiring every agent file to repeat the values.
+    // Per-agent explicit values always win; only empty fields are filled.
+    private static OrchestrationConfig ApplyGlobalDefaults(OrchestrationConfig config)
+    {
+        var (globalCfg, _) = UserConfigStore.Load();
+        var globalEndpoint   = globalCfg is not null && !string.IsNullOrWhiteSpace(globalCfg.Endpoint)     ? globalCfg.Endpoint     : null;
+        var globalApiKeyEnvVar = globalCfg is not null && !string.IsNullOrWhiteSpace(globalCfg.ApiKeyEnvVar) ? globalCfg.ApiKeyEnvVar : null;
+
+        if (globalEndpoint is null && globalApiKeyEnvVar is null) return config;
+
+        ModelConfig Fill(ModelConfig m) => m with
+        {
+            Endpoint     = string.IsNullOrWhiteSpace(m.Endpoint)     && globalEndpoint     is not null ? globalEndpoint     : m.Endpoint,
+            ApiKeyEnvVar = string.IsNullOrWhiteSpace(m.ApiKeyEnvVar) && globalApiKeyEnvVar is not null ? globalApiKeyEnvVar : m.ApiKeyEnvVar,
+        };
+
+        var agents = config.Agents.Select(a => a with { Model = Fill(a.Model) }).ToList();
+
+        var models = config.Models.ToDictionary(kv => kv.Key, kv => Fill(kv.Value));
+
+        var sel = config.Selection with
+        {
+            Model    = config.Selection.Model    is not null ? Fill(config.Selection.Model)    : null,
+            Magentic = config.Selection.Magentic is not null
+                ? config.Selection.Magentic with { Model = config.Selection.Magentic.Model is not null ? Fill(config.Selection.Magentic.Model) : null }
+                : null,
+        };
+
+        return config with { Agents = agents, Models = models, Selection = sel };
+    }
+
     private static ModelConfig ResolveAlias(
         ModelConfig model,
         IReadOnlyDictionary<string, ModelConfig> registry)
diff --git a/src/Core/Models/UserConfig.cs b/src/Core/Models/UserConfig.cs
index 896cabb..8b4761f 100644
--- a/src/Core/Models/UserConfig.cs
+++ b/src/Core/Models/UserConfig.cs
@@ -13,6 +13,9 @@ public sealed class UserConfig
     [JsonPropertyName("provider")]
     public string Provider { get; set; } = string.Empty;
 
+    [JsonPropertyName("apiKeyEnvVar")]
+    public string ApiKeyEnvVar { get; set; } = string.Empty;
+
     // Never written to disk — populated at runtime from the OS keychain.
     [JsonIgnore]
     public string ApiKey { get; set; } = string.Empty;
diff --git a/src/Infrastructure/UserConfigStore.cs b/src/Infrastructure/UserConfigStore.cs
index 1264067..b7f24c2 100644
--- a/src/Infrastructure/UserConfigStore.cs
+++ b/src/Infrastructure/UserConfigStore.cs
@@ -30,9 +30,10 @@ public static (UserConfig? Config, string? LegacyKey) Load()
 
             var config = new UserConfig
             {
-                ModelId  = onDisk.ModelId  ?? string.Empty,
-                Endpoint = onDisk.Endpoint ?? string.Empty,
-                Provider = onDisk.Provider ?? string.Empty,
+                ModelId      = onDisk.ModelId      ?? string.Empty,
+                Endpoint     = onDisk.Endpoint     ?? string.Empty,
+                Provider     = onDisk.Provider     ?? string.Empty,
+                ApiKeyEnvVar = onDisk.ApiKeyEnvVar ?? string.Empty,
             };
             return (config, onDisk.ApiKey);
         }
@@ -48,9 +49,10 @@ public static void Save(UserConfig config)
         Directory.CreateDirectory(ConfigDir);
         var onDisk = new OnDiskConfig
         {
-            ModelId  = config.ModelId,
-            Endpoint = config.Endpoint,
-            Provider = config.Provider,
+            ModelId      = config.ModelId,
+            Endpoint     = config.Endpoint,
+            Provider     = config.Provider,
+            ApiKeyEnvVar = config.ApiKeyEnvVar,
         };
         File.WriteAllText(ConfigPath, JsonSerializer.Serialize(onDisk, JsonOptions));
     }
@@ -68,6 +70,9 @@ private sealed class OnDiskConfig
         [JsonPropertyName("provider")]
         public string? Provider { get; set; }
 
+        [JsonPropertyName("apiKeyEnvVar")]
+        public string? ApiKeyEnvVar { get; set; }
+
         // Present only in configs created before keychain support was added.
         [JsonPropertyName("apiKey")]
         public string? ApiKey { get; set; }

From c5b8994861e99fe7e05006f9d7795931ac51559c Mon Sep 17 00:00:00 2001
From: Scott Stauffer <scott@fuseraft.com>
Date: Mon, 11 May 2026 12:51:16 -0500
Subject: [PATCH 03/14] feat: inject OS keychain key into agents that have no
 ApiKey or ApiKeyEnvVar

When a user stores their provider key in the OS keychain (via `fuseraft key
set`) and no agent file declares ApiKey or ApiKeyEnvVar, the key was silently
unused at run time and auth failed. ApplyKeychainKeyAsync now runs after
ApplyGlobalDefaults in BuildAsync: it retrieves the keychain key once and
injects it as a literal ApiKey on every model config that still has neither
field set. Models with either field already populated are untouched. LoadConfig
(used for display and validation) is deliberately kept sync and unchanged since
the literal key is only needed at runtime, not for config inspection.
---
 src/Cli/OrchestratorBuilder.cs | 42 ++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/src/Cli/OrchestratorBuilder.cs b/src/Cli/OrchestratorBuilder.cs
index b2804b5..7b1adf6 100644
--- a/src/Cli/OrchestratorBuilder.cs
+++ b/src/Cli/OrchestratorBuilder.cs
@@ -13,6 +13,7 @@
 using fuseraft.Core.Interfaces;
 using fuseraft.Core.Models;
 using fuseraft.Infrastructure;
+using fuseraft.Infrastructure.KeyStore;
 using fuseraft.Infrastructure.Plugins;
 using fuseraft.Orchestration;
 using fuseraft.Orchestration.Saga;
@@ -69,6 +70,10 @@ public static class OrchestratorBuilder
         // model that doesn't declare them explicitly.
         config = ApplyGlobalDefaults(config);
 
+        // For models still missing both ApiKey and ApiKeyEnvVar, inject the key
+        // stored in the OS keychain so users don't have to set an env var at all.
+        config = await ApplyKeychainKeyAsync(config, cancellationToken);
+
         // Apply per-config security constraints and API profiles to the security-sensitive plugins.
         var profiles = config.ApiProfiles.Count > 0
             ? (IReadOnlyDictionary<string, ApiProfileConfig>)config.ApiProfiles
@@ -817,6 +822,43 @@ ModelConfig Fill(ModelConfig m) => m with
         return config with { Agents = agents, Models = models, Selection = sel };
     }
 
+    // Injects the OS keychain key as a literal ApiKey on every model config that has
+    // neither ApiKey nor ApiKeyEnvVar set. The keychain is read at most once per call.
+    // Models that already have either field set are left untouched.
+    private static async Task<OrchestrationConfig> ApplyKeychainKeyAsync(
+        OrchestrationConfig config,
+        CancellationToken cancellationToken = default)
+    {
+        // Quick check: any model actually needs a key?
+        bool NeedsKey(ModelConfig m) =>
+            string.IsNullOrWhiteSpace(m.ApiKey) && string.IsNullOrWhiteSpace(m.ApiKeyEnvVar);
+
+        bool anyAgentNeedsKey = config.Agents.Any(a => NeedsKey(a.Model))
+            || config.Models.Values.Any(NeedsKey)
+            || (config.Selection.Model    is not null && NeedsKey(config.Selection.Model))
+            || (config.Selection.Magentic?.Model is not null && NeedsKey(config.Selection.Magentic.Model));
+
+        if (!anyAgentNeedsKey) return config;
+
+        var keychainKey = await ApiKeyStoreFactory.Create().RetrieveAsync();
+        if (string.IsNullOrWhiteSpace(keychainKey)) return config;
+
+        ModelConfig Fill(ModelConfig m) =>
+            NeedsKey(m) ? m with { ApiKey = keychainKey } : m;
+
+        var agents = config.Agents.Select(a => a with { Model = Fill(a.Model) }).ToList();
+        var models  = config.Models.ToDictionary(kv => kv.Key, kv => Fill(kv.Value));
+        var sel     = config.Selection with
+        {
+            Model    = config.Selection.Model    is not null ? Fill(config.Selection.Model)    : null,
+            Magentic = config.Selection.Magentic is not null
+                ? config.Selection.Magentic with { Model = config.Selection.Magentic.Model is not null ? Fill(config.Selection.Magentic.Model) : null }
+                : null,
+        };
+
+        return config with { Agents = agents, Models = models, Selection = sel };
+    }
+
     private static ModelConfig ResolveAlias(
         ModelConfig model,
         IReadOnlyDictionary<string, ModelConfig> registry)

From 9ec2e848f09075316d31fafd1132674136618de0 Mon Sep 17 00:00:00 2001
From: Scott Stauffer <scott@fuseraft.com>
Date: Mon, 11 May 2026 13:01:41 -0500
Subject: [PATCH 04/14] docs: update validators, models, and cli-reference for
 recent changes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

validators.md — RequireWriteFile now accepts patch_file alongside write_file;
update the "used on" header (no longer specific to HANDOFF TO TESTER), the
pass/fail descriptions, the ShellFallbackPattern prose, and the error-message
example to match the new generic message.

models.md — add "Global config defaults" section documenting that endpoint and
apiKeyEnvVar in ~/.fuseraft/config are applied to any agent model that omits
them, and documenting the full auth priority chain (explicit ApiKey → ApiKeyEnvVar
→ global apiKeyEnvVar → OS keychain). Update the ModelConfig table rows for
Endpoint and ApiKeyEnvVar to note the global-config fallback.

cli-reference.md — note that the --endpoint flag value is also inherited by
agents at run time; clarify that keychain-only agents skip the ApiKeyEnvVar
env-var check in static validation.
---
 docs/cli-reference.md |  4 ++--
 docs/models.md        | 32 ++++++++++++++++++++++++++++++--
 docs/validators.md    | 26 +++++++++++---------------
 3 files changed, 43 insertions(+), 19 deletions(-)

diff --git a/docs/cli-reference.md b/docs/cli-reference.md
index 41704fc..2b9914b 100644
--- a/docs/cli-reference.md
+++ b/docs/cli-reference.md
@@ -516,7 +516,7 @@ fuseraft validate <path> [options]
 12. Agent names referenced in termination strategies exist in the agents list
 13. If `Telemetry` is set: `OtlpEndpoint` is a valid absolute URI
 14. With `--strict`: every plugin name in any agent's `Plugins` list is registered
-15. For every `ApiKeyEnvVar` referenced: the environment variable is set in the current shell (warning if missing)
+15. For every `ApiKeyEnvVar` referenced: the environment variable is set in the current shell (warning if missing). Note: agents that rely on the OS keychain rather than an env var skip this check — keychain auth is verified only when `--check-connectivity` is used.
 
 **Exit codes**
 
@@ -634,7 +634,7 @@ fuseraft init [output] [options]
 |------|---------|-------------|
 | `-t, --template <name>` | interactive | Team template to use. See templates below. |
 | `-m, --model <id>` | auto-detected | Model ID to use for all agents. Auto-detected from your API keys if omitted. |
-| `-e, --endpoint <url>` | `~/.fuseraft/config` | Provider API endpoint URL. Defaults to the endpoint saved in `~/.fuseraft/config` if present. |
+| `-e, --endpoint <url>` | `~/.fuseraft/config` | Provider API endpoint URL. Defaults to the endpoint saved in `~/.fuseraft/config` if present. At run time, agents without an explicit `Endpoint` also inherit this value automatically. |
 | `--no-interactive` | off | Skip all prompts and generate with the supplied options and defaults. |
 
 **Templates**
diff --git a/docs/models.md b/docs/models.md
index 9efb27b..6b720fb 100644
--- a/docs/models.md
+++ b/docs/models.md
@@ -62,8 +62,8 @@ Any field left empty falls back to auto-detection.
 |-------|------|---------|-------------|
 | `ModelId` | string | — | Model identifier sent to the API. |
 | `Provider` | string | auto | Connector type: `openai`, `azure`, `google`, `mistral`, `ollama`. Auto-detected from `ModelId` if omitted. |
-| `Endpoint` | string | auto | API base URL. Auto-detected from provider if omitted. Required for `azure`. |
-| `ApiKeyEnvVar` | string | auto | Name of the environment variable holding the API key. Auto-detected from provider if omitted. Leave empty for `ollama`. |
+| `Endpoint` | string | auto | API base URL. Auto-detected from provider if omitted. Required for `azure`. Falls back to `endpoint` in `~/.fuseraft/config` when blank. |
+| `ApiKeyEnvVar` | string | auto | Name of the environment variable holding the API key. Auto-detected from provider if omitted. Leave empty for `ollama`. Falls back to `apiKeyEnvVar` in `~/.fuseraft/config` when blank. |
 | `MaxTokens` | int | `0` | Max tokens per response. `0` = use model default. |
 | `Temperature` | number | — | Sampling temperature (0.0–2.0). Omit for reasoning models that reject this parameter. |
 
@@ -90,6 +90,34 @@ For any model not matching the table, specify `Provider`, `Endpoint`, and `ApiKe
 
 ---
 
+## Global config defaults
+
+`~/.fuseraft/config` can define a default `endpoint` and `apiKeyEnvVar` that are applied to every agent model (and named alias) that doesn't set those fields itself. This means you only need to configure the provider once — generated agent files work out of the box without repeating the values.
+
+```json
+{
+  "modelId": "anthropic.claude-sonnet-4-5-20250929-v1:0",
+  "endpoint": "http://localhost:3000/api/openai/v1",
+  "apiKeyEnvVar": "OPENWEBUI_API_KEY"
+}
+```
+
+Set this file via `fuseraft repl` (the setup wizard writes it automatically) or edit it directly.
+
+### OS keychain fallback
+
+If an agent model has neither `ApiKey` nor `ApiKeyEnvVar` set after global defaults are applied, fuseraft retrieves the key stored in the OS keychain (set via `fuseraft key set` or the REPL wizard) and injects it as a literal `ApiKey`. This means the full auth resolution order for any agent model is:
+
+1. Explicit `ApiKey` in the agent file (literal value)
+2. `ApiKeyEnvVar` from the agent file (env var lookup)
+3. `apiKeyEnvVar` from `~/.fuseraft/config` (env var lookup)
+4. OS keychain (retrieved once at startup, injected as literal key)
+5. Nothing — Ollama and other unauthenticated providers work without a key
+
+Per-agent values always win; global values only fill in empty fields.
+
+---
+
 ## Supported providers
 
 ### openai — OpenAI and OpenAI-compatible APIs
diff --git a/docs/validators.md b/docs/validators.md
index 7cd7b4c..30623e1 100644
--- a/docs/validators.md
+++ b/docs/validators.md
@@ -164,17 +164,17 @@ The validator checks THIS TURN ONLY — prior-turn runs do not carry forward.
 
 ## RequireWriteFile
 
-**Used on:** `HANDOFF TO TESTER` (or any route where you require the agent to have written a file this turn)
+**Used on:** Any route where the agent must have written at least one file this turn (e.g. `HANDOFF TO TESTER`, `RECON COMPLETE`, `HANDOFF TO REVIEWER`)
 
-**What it checks:** Walks backward through the conversation history looking for completed `write_file` tool calls (`Role=Tool` messages with a `FunctionResultContent` whose function name contains `write_file`). Stops at the most recent user-role message.
+**What it checks:** Walks backward through the conversation history looking for completed `write_file` or `patch_file` tool calls (`Role=Tool` messages with a `FunctionResultContent` whose function name contains either string). Stops at the most recent user-role message.
 
-**Passes if:** At least one `write_file` call completed in the current agent turn.
+**Passes if:** At least one `write_file` or `patch_file` call completed in the current agent turn.
 
-**Fails if:** No `write_file` call is found — meaning the agent described a file write in text but never actually called the tool.
+**Fails if:** Neither tool was called — meaning the agent described a file write in text but never actually called the tool. Text, code blocks, and responses are not saved to disk.
 
 ### ShellFallbackPattern
 
-Some fixes require only a shell command (e.g. a dependency update) and produce no `write_file` call. Set `ShellFallbackPattern` on the route to allow a successful matching `shell_run` to satisfy the validator in place of `write_file`:
+Some fixes require only a shell command (e.g. a dependency update) and produce no file-write call. Set `ShellFallbackPattern` on the route to allow a successful matching `shell_run` to satisfy the validator instead:
 
 ```yaml
 - Keyword: "HANDOFF TO TESTER"
@@ -185,21 +185,17 @@ Some fixes require only a shell command (e.g. a dependency update) and produce n
 
 The pattern is a pipe-separated list of substrings (case-insensitive). The validator passes if the turn contains a successful `shell_run` whose command matches any alternative. A failed shell command (exit code non-zero, `[ERROR]`, `[TIMEOUT]`, `[DENIED]`) is never accepted regardless of the pattern.
 
-When `ShellFallbackPattern` is omitted the validator behaves as before — only `write_file` satisfies it.
+When `ShellFallbackPattern` is omitted the validator behaves as before — only `write_file`, `patch_file`, or `git_commit` satisfy it.
 
 **Error injected on failure:**
 
 ```
-HANDOFF TO TESTER blocked: no evidence of real work this turn
-(no write_file, no git_commit, no shell fallback matched).
+Handoff blocked: no evidence of real work this turn
+(no write_file, no patch_file, no git_commit, no shell fallback matched).
 
-Required before handing off:
-  1. write_file for every changed file.
-  2. shell_run ./build.sh — fix until it passes.
-  3. git_add + git_commit.
-  4. Retry handoff.
-
-All tools available: write_file, shell_run, read_file. Code blocks are NOT saved to disk.
+You must write at least one file before handing off. Use write_file for new files
+or patch_file for surgical edits to existing files. Code blocks in your response
+are NOT saved to disk — you must call the tool.
 ```
 
 ---

From dd1576d62c0a6c97fac63afcccb24d4c959042ca Mon Sep 17 00:00:00 2001
From: Scott Stauffer <scott@fuseraft.com>
Date: Mon, 11 May 2026 13:19:12 -0500
Subject: [PATCH 05/14] fix: remove ApplyGlobalDefaults from LoadConfig to keep
 validation env-independent
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

LoadConfig is used for static inspection and validation — it should check the
config as-written, not as-runtime-resolved. Applying global defaults there made
ValidateConfigCommand pass configs that are actually incomplete (e.g. unknown
model prefix with no Endpoint), because ~/.fuseraft/config's endpoint silently
filled the gap. Global defaults now apply only in BuildAsync where agents are
actually invoked. Fixes ModelWithoutPrefix_NoEndpoint_Errors test.
---
 src/Cli/OrchestratorBuilder.cs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Cli/OrchestratorBuilder.cs b/src/Cli/OrchestratorBuilder.cs
index 7b1adf6..e463a82 100644
--- a/src/Cli/OrchestratorBuilder.cs
+++ b/src/Cli/OrchestratorBuilder.cs
@@ -759,7 +759,7 @@ public static OrchestrationConfig LoadConfig(string configPath)
                 .AddJsonFile(Path.GetFullPath(configPath), optional: false)
                 .Build();
 
-        return ApplyGlobalDefaults(BindConfig(configPath, configuration));
+        return BindConfig(configPath, configuration);
     }
 
     // Resolves the base system prompt prepended to every agent.

From 92a0bffc2c2835a3fd0dfef43daf48d3f9a22693 Mon Sep 17 00:00:00 2001
From: Scott Stauffer <scott@fuseraft.com>
Date: Mon, 11 May 2026 13:30:54 -0500
Subject: [PATCH 06/14] fix: skip provider auto-detection when Endpoint +
 ApiKeyEnvVar are both set
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The short-circuit that bypasses model-ID prefix detection only fired when both
Endpoint and a literal ApiKey were present. When global config supplied Endpoint
and ApiKeyEnvVar (env-var reference instead of literal key), the model fell
through to DetectFromPrefix, which doesn't recognise AWS Bedrock-style IDs like
anthropic.claude-sonnet-4-5-20250929-v1:0, throwing "Cannot determine the LLM
provider". Extend the short-circuit to cover any form of auth (ApiKey OR
ApiKeyEnvVar) — if the caller supplied their own endpoint and auth, treat as
OpenAI-compatible and skip prefix detection entirely.
---
 src/Infrastructure/ChatClientFactory.cs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/Infrastructure/ChatClientFactory.cs b/src/Infrastructure/ChatClientFactory.cs
index 2574c1e..343f8b3 100644
--- a/src/Infrastructure/ChatClientFactory.cs
+++ b/src/Infrastructure/ChatClientFactory.cs
@@ -111,11 +111,12 @@ public ModelConfig Resolve(ModelConfig config)
             && (!string.IsNullOrEmpty(config.ApiKeyEnvVar) || !string.IsNullOrEmpty(config.ApiKey)))
             return config;
 
-        // 2b. Explicit endpoint + literal key (e.g. REPL wizard, custom/enterprise provider).
+        // 2b. Explicit endpoint + any form of auth (literal key or env-var reference).
         // Skip auto-detection and treat as OpenAI-compatible — the user supplied all necessary
         // connection info and auto-detection would only misidentify unusual model ID formats
         // (e.g. AWS Bedrock "anthropic.claude-...:0" being wrongly treated as an Ollama tag).
-        if (!string.IsNullOrEmpty(config.Endpoint) && !string.IsNullOrEmpty(config.ApiKey))
+        if (!string.IsNullOrEmpty(config.Endpoint)
+            && (!string.IsNullOrEmpty(config.ApiKey) || !string.IsNullOrEmpty(config.ApiKeyEnvVar)))
             return config with { Provider = string.IsNullOrEmpty(config.Provider) ? "openai" : config.Provider };
 
         // Ollama tag format: "modelname:tag" where the tag contains at least one letter

From 5100de1668edfef8fb26890426f95aabc196aa44 Mon Sep 17 00:00:00 2001
From: Scott Stauffer <scott@fuseraft.com>
Date: Mon, 11 May 2026 13:36:51 -0500
Subject: [PATCH 07/14] fix: treat custom-endpoint models as openai-compatible
 when prefix detection fails
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When a model ID doesn't match any known prefix (e.g. AWS Bedrock-style IDs like
anthropic.claude-sonnet-4-5-20250929-v1:0 used through Open WebUI), and an
Endpoint is set (either inline or injected from ~/.fuseraft/config), Resolve
was still throwing "Cannot determine the LLM provider". A custom Endpoint is
an unambiguous signal that the caller knows which provider to use — fall back
to openai-compatible instead of throwing, consistent with how the REPL wizard
treats enterprise/custom endpoints.
---
 src/Infrastructure/ChatClientFactory.cs | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/Infrastructure/ChatClientFactory.cs b/src/Infrastructure/ChatClientFactory.cs
index 343f8b3..f1fd6de 100644
--- a/src/Infrastructure/ChatClientFactory.cs
+++ b/src/Infrastructure/ChatClientFactory.cs
@@ -132,8 +132,15 @@ public ModelConfig Resolve(ModelConfig config)
 
         if (detected is null)
         {
-            // If the caller provided at least Provider, we can proceed; otherwise fail fast
-            // with a helpful message rather than a cryptic missing-env-var error later.
+            // A custom Endpoint is an unambiguous signal that the caller knows which
+            // provider to use — treat as OpenAI-compatible and skip the prefix check.
+            // This covers non-standard model IDs (e.g. AWS Bedrock "anthropic.claude-...:0",
+            // Open WebUI deployments) where the endpoint is set via global config or inline.
+            if (!string.IsNullOrEmpty(config.Endpoint))
+                return config with { Provider = string.IsNullOrEmpty(config.Provider) ? "openai" : config.Provider };
+
+            // No endpoint and no detectable prefix — fail fast with a helpful message
+            // rather than a cryptic missing-env-var error later.
             if (string.IsNullOrEmpty(config.Provider))
                 throw new InvalidOperationException(
                     $"Cannot determine the LLM provider for model '{config.ModelId}'. " +

From 910ee890c39c159419f1f33f955ce8c9b7e6b53f Mon Sep 17 00:00:00 2001
From: Scott Stauffer <scott@fuseraft.com>
Date: Mon, 11 May 2026 13:42:24 -0500
Subject: [PATCH 08/14] feat: inherit ModelId from ~/.fuseraft/config when
 agent model leaves it blank
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ApplyGlobalDefaults already propagated Endpoint and ApiKeyEnvVar from the
global config; extend it to cover ModelId too. This completes the contract:
fuseraft run will not fail due to a missing connection field as long as
~/.fuseraft/config supplies the defaults — agent files only need to override
what differs from the global config (e.g. a per-agent MaxTokens or a
different model for one role).
---
 src/Cli/OrchestratorBuilder.cs | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/Cli/OrchestratorBuilder.cs b/src/Cli/OrchestratorBuilder.cs
index e463a82..eded40e 100644
--- a/src/Cli/OrchestratorBuilder.cs
+++ b/src/Cli/OrchestratorBuilder.cs
@@ -789,20 +789,22 @@ public static OrchestrationConfig LoadConfig(string configPath)
         return reader.ReadToEnd().Trim();
     }
 
-    // Fills in Endpoint and ApiKeyEnvVar from ~/.fuseraft/config on any model config
-    // that doesn't set them explicitly. This lets the global config act as a default
-    // provider for all agents without requiring every agent file to repeat the values.
+    // Fills in ModelId, Endpoint, and ApiKeyEnvVar from ~/.fuseraft/config on any model
+    // config that doesn't set them explicitly. This lets the global config act as a
+    // default provider so agent files work without repeating connection details.
     // Per-agent explicit values always win; only empty fields are filled.
     private static OrchestrationConfig ApplyGlobalDefaults(OrchestrationConfig config)
     {
         var (globalCfg, _) = UserConfigStore.Load();
-        var globalEndpoint   = globalCfg is not null && !string.IsNullOrWhiteSpace(globalCfg.Endpoint)     ? globalCfg.Endpoint     : null;
+        var globalModelId      = globalCfg is not null && !string.IsNullOrWhiteSpace(globalCfg.ModelId)      ? globalCfg.ModelId      : null;
+        var globalEndpoint     = globalCfg is not null && !string.IsNullOrWhiteSpace(globalCfg.Endpoint)     ? globalCfg.Endpoint     : null;
         var globalApiKeyEnvVar = globalCfg is not null && !string.IsNullOrWhiteSpace(globalCfg.ApiKeyEnvVar) ? globalCfg.ApiKeyEnvVar : null;
 
-        if (globalEndpoint is null && globalApiKeyEnvVar is null) return config;
+        if (globalModelId is null && globalEndpoint is null && globalApiKeyEnvVar is null) return config;
 
         ModelConfig Fill(ModelConfig m) => m with
         {
+            ModelId      = string.IsNullOrWhiteSpace(m.ModelId)      && globalModelId      is not null ? globalModelId      : m.ModelId,
             Endpoint     = string.IsNullOrWhiteSpace(m.Endpoint)     && globalEndpoint     is not null ? globalEndpoint     : m.Endpoint,
             ApiKeyEnvVar = string.IsNullOrWhiteSpace(m.ApiKeyEnvVar) && globalApiKeyEnvVar is not null ? globalApiKeyEnvVar : m.ApiKeyEnvVar,
         };

From 2d05e6508e0ca9b84a0368f6397039e3dccabfbb Mon Sep 17 00:00:00 2001
From: Scott Stauffer <scott@fuseraft.com>
Date: Mon, 11 May 2026 13:53:35 -0500
Subject: [PATCH 09/14] fix: throw actionable error when API key is empty
 instead of crashing in SDK

ApiKeyCredential throws "Value cannot be an empty string" when given an empty
key, producing a cryptic stack trace from deep in System.ClientModel. Guard
both the openai-compatible and azure branches with an explicit check before
the constructor call, surfacing a message that names the model, the endpoint,
and the two ways to fix it (fuseraft key set or apiKeyEnvVar in global config).
---
 src/Infrastructure/ChatClientFactory.cs | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/Infrastructure/ChatClientFactory.cs b/src/Infrastructure/ChatClientFactory.cs
index f1fd6de..edbb82c 100644
--- a/src/Infrastructure/ChatClientFactory.cs
+++ b/src/Infrastructure/ChatClientFactory.cs
@@ -186,6 +186,10 @@ public IChatClient Create(ModelConfig config)
                 if (string.IsNullOrEmpty(config.Endpoint))
                     throw new InvalidOperationException(
                         $"Provider 'azure' requires Endpoint to be set (deployment: '{config.ModelId}').");
+                if (string.IsNullOrEmpty(apiKey))
+                    throw new InvalidOperationException(
+                        $"No API key available for Azure deployment '{config.ModelId}' at '{config.Endpoint}'. " +
+                        $"Store one with 'fuseraft key set', or add \"apiKeyEnvVar\": \"<VAR>\" to ~/.fuseraft/config.");
                 return new AzureOpenAIClient(
                     new Uri(config.Endpoint),
                     new ApiKeyCredential(apiKey),
@@ -205,6 +209,10 @@ public IChatClient Create(ModelConfig config)
                     throw new InvalidOperationException(
                         $"Provider '{provider}' requires Endpoint to be set (model: '{config.ModelId}'). " +
                         $"This should have been filled in by auto-detection — check the model ID prefix.");
+                if (string.IsNullOrEmpty(apiKey))
+                    throw new InvalidOperationException(
+                        $"No API key available for model '{config.ModelId}' at '{config.Endpoint}'. " +
+                        $"Store one with 'fuseraft key set', or add \"apiKeyEnvVar\": \"<VAR>\" to ~/.fuseraft/config.");
                 return new OpenAIClient(
                     new ApiKeyCredential(apiKey),
                     new OpenAIClientOptions { Transport = transport, Endpoint = new Uri(config.Endpoint), NetworkTimeout = HttpClientTimeout })

From 34446f0ac253bbe3cef4dbba5ea3c4a2987bd06a Mon Sep 17 00:00:00 2001
From: Scott Stauffer <scott@fuseraft.com>
Date: Mon, 11 May 2026 13:54:01 -0500
Subject: [PATCH 10/14] =?UTF-8?q?fix:=20correct=20API=20key=20error=20mess?=
 =?UTF-8?q?age=20=E2=80=94=20fuseraft=20repl,=20not=20nonexistent=20'fuser?=
 =?UTF-8?q?aft=20key=20set'?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/Infrastructure/ChatClientFactory.cs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Infrastructure/ChatClientFactory.cs b/src/Infrastructure/ChatClientFactory.cs
index edbb82c..b6fe2ff 100644
--- a/src/Infrastructure/ChatClientFactory.cs
+++ b/src/Infrastructure/ChatClientFactory.cs
@@ -189,7 +189,7 @@ public IChatClient Create(ModelConfig config)
                 if (string.IsNullOrEmpty(apiKey))
                     throw new InvalidOperationException(
                         $"No API key available for Azure deployment '{config.ModelId}' at '{config.Endpoint}'. " +
-                        $"Store one with 'fuseraft key set', or add \"apiKeyEnvVar\": \"<VAR>\" to ~/.fuseraft/config.");
+                        $"Run 'fuseraft repl' and complete the setup wizard, or add \"apiKeyEnvVar\": \"<VAR>\" to ~/.fuseraft/config.");
                 return new AzureOpenAIClient(
                     new Uri(config.Endpoint),
                     new ApiKeyCredential(apiKey),
@@ -212,7 +212,7 @@ public IChatClient Create(ModelConfig config)
                 if (string.IsNullOrEmpty(apiKey))
                     throw new InvalidOperationException(
                         $"No API key available for model '{config.ModelId}' at '{config.Endpoint}'. " +
-                        $"Store one with 'fuseraft key set', or add \"apiKeyEnvVar\": \"<VAR>\" to ~/.fuseraft/config.");
+                        $"Run 'fuseraft repl' and complete the setup wizard, or add \"apiKeyEnvVar\": \"<VAR>\" to ~/.fuseraft/config.");
                 return new OpenAIClient(
                     new ApiKeyCredential(apiKey),
                     new OpenAIClientOptions { Transport = transport, Endpoint = new Uri(config.Endpoint), NetworkTimeout = HttpClientTimeout })

From 72cd1514441512c890fd6533b01ab4e5cbdbc880 Mon Sep 17 00:00:00 2001
From: Scott Stauffer <scott@fuseraft.com>
Date: Mon, 11 May 2026 13:58:58 -0500
Subject: [PATCH 11/14] =?UTF-8?q?fix:=20remove=20duplicate=20error=20displ?=
 =?UTF-8?q?ay=20=E2=80=94=20drop=20WriteException=20from=20generic=20catch?=
 =?UTF-8?q?=20in=20SessionRunner?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The generic catch block called AnsiConsole.WriteException (printing the full
stack trace) and also set errorMessage, which RenderSummary then printed again
as '✗ Error: ...'. The message already surfaces clearly through RenderSummary;
the stack trace added noise without value for user-facing operational errors.
---
 src/Cli/SessionRunner.cs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Cli/SessionRunner.cs b/src/Cli/SessionRunner.cs
index 1a54ad2..e7ce8d8 100644
--- a/src/Cli/SessionRunner.cs
+++ b/src/Cli/SessionRunner.cs
@@ -190,7 +190,6 @@ await eventEmitter.EmitAsync("session_error",
             {
                 succeeded    = false;
                 errorMessage = ex.Message;
-                AnsiConsole.WriteException(ex, ExceptionFormats.ShortenPaths);
                 break;
             }
 

From 280296b1958041e6875ec04689d612ab13d42120 Mon Sep 17 00:00:00 2001
From: Scott Stauffer <scott@fuseraft.com>
Date: Mon, 11 May 2026 21:11:20 -0500
Subject: [PATCH 12/14] docs: add context management overview, rename context
 store page
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add docs/context-management.md — a unified overview of all four context
layers (context store, persistent memory, ContextWindow filtering,
compaction). Covers TextOnly, ExcludeAgents, MaxTurnAge, MaxTailMessages,
ContextCapFraction, replay truncation, all five compaction modes,
IncludeReasoning, IncludeSymbolGraph, change log grounding, and the
head+tail pinning behaviour. Includes a flow diagram and opinionated
strategy guide.

Rename docs/context.md → docs/context-store.md so the file name matches
what it actually documents (the file-import CLI, not conversation context).
Update mkdocs.yml nav and index.md guide table accordingly.
---
 docs/context-management.md            | 354 ++++++++++++++++++++++++++
 docs/{context.md => context-store.md} |   0
 docs/index.md                         |   3 +-
 mkdocs.yml                            |   2 +
 4 files changed, 358 insertions(+), 1 deletion(-)
 create mode 100644 docs/context-management.md
 rename docs/{context.md => context-store.md} (100%)

diff --git a/docs/context-management.md b/docs/context-management.md
new file mode 100644
index 0000000..41aaddf
--- /dev/null
+++ b/docs/context-management.md
@@ -0,0 +1,354 @@
+# Context Management
+
+Context is the most important resource in a long-running agent session. Every token an agent
+sees costs money and time; everything it misses is a potential hallucination or regression.
+fuseraft manages context through four layers that fire at different points in a session's
+lifetime:
+
+```
+Session start
+  └─ Layer 1: Context Store      → files imported before the session
+  └─ Layer 2: Persistent Memory  → facts recalled from prior sessions
+
+Each agent turn
+  └─ Layer 3: ContextWindow      → per-agent history filter (every turn)
+
+History too long
+  └─ Layer 4: Compaction         → replace old turns with a summary
+```
+
+Each layer is optional and independently configured. Most sessions need only one or two.
+
+---
+
+## Layer 1: Context Store
+
+The context store pre-loads static reference files into `.fuseraft/context/` before a session
+starts. Every agent sees a compact index block at the top of its system prompt listing what is
+available, and can access the full content with `read_file`.
+
+```yaml
+# No config required — populated by CLI before running:
+#   fuseraft context add ~/docs/schema.sql --name db-schema
+#   fuseraft context add ~/specs/ --name specs
+```
+
+**When to use:** Database schemas, API specs, architecture docs, task briefs — anything too
+large to paste into the task argument but that agents should know exists from turn one.
+
+See [Context Store](context-store.md) for the full CLI reference.
+
+---
+
+## Layer 2: Persistent Memory
+
+When `EnableMemory: true` is set on an agent, fuseraft loads that agent's persistent memory
+store at session start and prepends a structured block to its instructions. Memories survive
+between sessions — they accumulate over time, giving agents a working knowledge of the project.
+
+```yaml
+Agents:
+  - Name: Developer
+    EnableMemory: true
+    Instructions: |
+      You are a Go developer. Write idiomatic, tested code.
+```
+
+At session start, the agent sees:
+
+```
+MEMORY — facts recalled from prior sessions:
+[preference] preferred-test-runner: Use `go test -race ./...` for all test runs.
+[fact] auth-middleware: The auth middleware was rewritten in v2.3 — do not touch the legacy layer.
+```
+
+**Storage locations:**
+
+| Context | Path |
+|---|---|
+| REPL sessions | `~/.fuseraft/memory/repl/` |
+| Orchestration agents | `~/.fuseraft/memory/agents/{AgentName}/` |
+
+**Memory scoping:** In a project directory that has `.fuseraft/`, only memories saved in that
+directory are loaded. Directories without `.fuseraft/` fall back to all global memories.
+
+**REPL:** Memory is always active in the REPL — no config flag needed. Memories are extracted
+automatically at the end of each session and scoped to the working directory via
+`.fuseraft/memory_refs.json`. Use `/memory` commands to inspect or delete them.
+
+**Memory cap:** The prompt block is capped at 8,000 characters. Entries are ordered by type
+then name; entries that would exceed the cap are dropped (header only is kept for visibility).
+
+See [Configuration — Memory](configuration.md#memory) for the full field reference.
+
+---
+
+## Layer 3: ContextWindow (per-agent history filter)
+
+By default every agent receives the full accumulated conversation history, including tool-call
+frames and tool-result messages from all prior turns. In a long multi-agent session this can
+reach hundreds of thousands of tokens — most of it irrelevant to late-stage agents.
+
+`ContextWindow` lets each agent declare a lighter view. The shared history is never mutated;
+only the slice passed to that agent's turn is affected.
+
+### Filters and their order
+
+Filters are applied in this order every turn:
+
+1. **TextOnly / ExcludeAgents** — strip tool noise or specific agents' output
+2. **MaxTurnAge** — keep only messages from the last N agent turns (semantic cut)
+3. **MaxTailMessages** — hard cap: keep only the last N messages (raw count)
+
+```yaml
+Agents:
+  - Name: Reviewer
+    ContextWindow:
+      TextOnly: true          # strip all tool-call frames and tool results
+      ExcludeAgents:          # also strip all output from these agents
+        - Tester
+      MaxTurnAge: 5           # only keep messages from the last 5 assistant turns
+      MaxTailMessages: 40     # hard cap after the above filters
+      ContextCapFraction: 0.8 # emit context_cap_warning when at 80% of MaxTailMessages
+```
+
+### TextOnly
+
+Strips all tool-call frames (assistant messages containing only a function-call request) and
+all tool-result messages from the history slice. Text-bearing assistant messages and all user
+messages are kept.
+
+**This is the primary lever for context reduction.** A Reviewer that independently re-reads
+files and re-runs commands gains nothing from seeing the hundreds of tool results produced by
+the Developer — stripping them can reduce input tokens by 90%+ in typical sessions.
+
+When `ExcludeAgents` is set, tool-result messages are stripped automatically even when
+`TextOnly` is false. Tool results are not attributed to a specific agent; leaving them without
+their corresponding call frames produces a malformed context with orphaned result IDs.
+
+### ExcludeAgents
+
+Names of agents whose messages should be excluded entirely — both text-bearing replies and
+tool-call frames.
+
+### MaxTurnAge
+
+Keeps only messages from the last N *agent turns*, where each turn ends with an assistant
+reply. Unlike `MaxTailMessages` (a raw message count), `MaxTurnAge` is semantic: it counts
+backward from the end of history and discards everything before the cut-point.
+
+Use this to discard early-session context from phases or agents no longer relevant to the
+current work — without needing to know the exact message count.
+
+### MaxTailMessages
+
+Hard cap applied after the other filters. When the filtered list still exceeds this count,
+the oldest messages are dropped. Set `ContextCapFraction` to receive a `context_cap_warning`
+event as an early signal before the hard cap is reached.
+
+### Replay truncation
+
+Agents sometimes produce verbose stream-of-consciousness output (3–5k tokens). When that text
+is replayed verbatim in every subsequent turn, compaction summaries grow each cycle and input
+tokens balloon. fuseraft automatically truncates verbose non-summary assistant messages to
+2,000 characters when replaying them into the next turn's history. Compaction summaries are
+never truncated.
+
+---
+
+## Layer 4: Compaction
+
+When conversation history grows long enough to approach a model's context window, compaction
+fires. It replaces the oldest turns with a single context message that agents treat as
+background, then resumes from the retained tail.
+
+### Trigger
+
+```yaml
+Compaction:
+  TriggerTurnCount: 50   # fire when assistant-turn count reaches this
+  KeepRecentTurns: 10    # keep this many turns verbatim; compact the rest
+```
+
+Compaction fires in two situations:
+- Before a session stream starts, when resuming a checkpoint already over the threshold.
+- Mid-session, after each checkpoint save, once the live history crosses the threshold.
+
+`TriggerTurnCount` must be greater than `KeepRecentTurns`.
+
+### Modes
+
+| Mode | How context is reconstructed | LLM call? | Requirements |
+|---|---|---|---|
+| `llm` | LLM summarizes the compacted turns | Yes | A model |
+| `intent` | Deterministic `✓`/`✗`/`⧖` per tool call from `intents.json` | No | `ChangeTracking` |
+| `lossless` | Evidence graph + contract status + state machine position | No | `statemachine` strategy + `EvidenceStore` |
+| `hybrid` | Lossless reconstruction prepended before the LLM summary | Yes | `statemachine` strategy + `EvidenceStore` |
+| `window` | Oldest user+assistant pairs dropped until within `TokenBudget` | No | — |
+
+**`intent` is the recommended mode** for most sessions when `ChangeTracking` is configured.
+It requires no state machine and produces a deterministic record of every tool call:
+
+```
+[INTENT-DERIVED RECONSTRUCTION — covers turns 1–20]
+
+OPERATIONS (chronological):
+  ✓ write_file → "src/api/users.go" (turn 3, Developer)
+  ✗ patch_file → "src/api/auth.go" — oldText not found… (turn 4, Developer)
+  ✓ shell_run → "go test ./..." (turn 5, Tester)
+
+RESUMPTION NOTE: History compacted from intent log — deterministic ground truth.
+Do not re-execute operations marked ✓ (applied).
+Operations marked ✗ (failed) should be retried if the task requires them.
+```
+
+**`lossless` is the recommended mode** for `statemachine` sessions with an `EvidenceStore`.
+Instead of summarizing the conversation, it reads disk state directly — state machine position,
+contract pass/fail, evidence items — and injects it as ground truth. No hallucination is
+possible because no LLM generates the summary.
+
+**`window` mode** trades context continuity for simplicity. No summary is injected; the oldest
+turns are silently dropped. Useful for exploratory sessions where older context genuinely
+doesn't matter, or when you want no compaction LLM cost at all.
+
+### Pinned summaries
+
+Prior compaction summaries (`IsCompactionSummary`) are pinned and never dropped by `window`
+mode. This preserves the head of the conversation — each compaction cycle adds a new summary
+at the front while the window trims from behind it.
+
+### Compaction model
+
+By default, `llm` and `hybrid` modes use the first agent's model to generate the summary.
+Override with `Compaction.Model` to use a cheaper model for compaction:
+
+```yaml
+Compaction:
+  TriggerTurnCount: 50
+  KeepRecentTurns: 10
+  Mode: hybrid
+  Model:
+    ModelId: gpt-4o-mini
+```
+
+### Enriching summaries
+
+Two optional flags add structured context blocks before the LLM summary text. Both are
+prefixed in this order when both are enabled: symbol graph first, then reasoning excerpts.
+
+**`IncludeReasoning`** — prepends a `[REASONING EXCERPTS]` block containing the model's
+thinking for each compacted turn (truncated to ~500 tokens per turn). Useful when the *why*
+behind prior decisions matters as much as the *what*. Requires `Events` to be configured
+(reasoning excerpts are read from the session events log).
+
+**`IncludeSymbolGraph`** — prepends a `[SYMBOL DEPENDENCY GRAPH]` block listing every
+`SymbolDefinition` and `SymbolReference` node in the evidence store for files written during
+the session. Gives agents an explicit map of what symbols were in scope during the compacted
+turns. Requires `EvidenceStore` and `ChangeTracking` to be configured.
+
+```yaml
+Compaction:
+  TriggerTurnCount: 40
+  KeepRecentTurns: 8
+  Mode: hybrid
+  IncludeReasoning: true
+  IncludeSymbolGraph: true
+```
+
+### Change log grounding
+
+When `ChangeTracking` or `Validation.ChangeLogPath` is configured, `llm` and `hybrid`
+compactors read `changes.json` at compaction time and inject it into the summary prompt as
+authoritative ground truth. Agent success claims are overridden by what `changes.json` actually
+records — exit codes and file writes are facts; assistant self-reports are not.
+
+### Cost accounting
+
+The summary message's cumulative cost includes all the turns it replaced. Budget tracking
+remains exact across compaction boundaries. `intent`, `lossless`, and `window` modes incur
+no LLM cost at compaction time.
+
+---
+
+## How the layers fit together
+
+Here is the full sequence from session start through a long-running session:
+
+```
+1. fuseraft run
+   ├─ Context Store index → injected into every agent's system prompt
+   └─ Persistent Memory  → prepended to each agent's instructions (if EnableMemory: true)
+
+2. Each agent turn
+   └─ ContextWindow filter applied to conversation history
+      ├─ TextOnly / ExcludeAgents strip tool noise
+      ├─ MaxTurnAge semantic cut
+      └─ MaxTailMessages hard cap
+         └─ Filtered slice + replay-truncated content → sent to LLM
+
+3. After each checkpoint save
+   └─ Compaction check
+      ├─ (llm/intent/lossless/hybrid) assistant-turn count ≥ TriggerTurnCount?
+      │     YES → compact oldest (Count − KeepRecentTurns) turns into one message
+      │           save checkpoint with compacted history → continue
+      └─ (window) estimated token count > TokenBudget?
+            YES → drop oldest user+assistant pairs until within budget
+                  (pinned summaries are never dropped)
+```
+
+---
+
+## Choosing a strategy
+
+**For most sessions with `ChangeTracking`:** use `intent` mode.
+
+```yaml
+ChangeTracking:
+  Path: .fuseraft/changes.json
+  IntentLogPath: .fuseraft/state/intents.json
+
+Compaction:
+  TriggerTurnCount: 40
+  KeepRecentTurns: 8
+  Mode: intent
+```
+
+**For `statemachine` sessions with `EvidenceStore`:** use `lossless` or `hybrid`.
+
+```yaml
+Compaction:
+  TriggerTurnCount: 50
+  KeepRecentTurns: 10
+  Mode: lossless   # or "hybrid" to add an LLM narrative on top
+```
+
+**For exploratory / throw-away sessions:** use `window` to avoid any compaction cost.
+
+```yaml
+Compaction:
+  Mode: window
+  TokenBudget: 60000
+```
+
+**For a downstream agent (Reviewer, Tester) that needs less history:** use `ContextWindow`.
+
+```yaml
+Agents:
+  - Name: Reviewer
+    ContextWindow:
+      TextOnly: true
+      MaxTurnAge: 3
+```
+
+**For an agent that should know nothing about earlier phases:** combine `ExcludeAgents` with
+`MaxTailMessages` so it only sees the final handoff.
+
+```yaml
+Agents:
+  - Name: Auditor
+    ContextWindow:
+      ExcludeAgents:
+        - Developer
+        - Tester
+      MaxTailMessages: 20
+```
diff --git a/docs/context.md b/docs/context-store.md
similarity index 100%
rename from docs/context.md
rename to docs/context-store.md
diff --git a/docs/index.md b/docs/index.md
index 820815c..ff23104 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -31,7 +31,8 @@ This is a personal project and a work in progress, suited for experimentation an
 | [Security & Sandbox](security.md) | File and network containment |
 | [Governance](governance.md) | Execution rings, audit log, circuit breaker, SLO tracking |
 | [Sessions](sessions.md) | Resumption, HITL, cost tracking, compaction |
-| [Context Store](context.md) | Importing reference material for agents |
+| [Context Management](context-management.md) | How fuseraft manages context across a long session |
+| [Context Store](context-store.md) | Importing reference material for agents |
 | [Examples](examples.md) | Ready-to-use config examples |
 
 ## VS Code Extension
diff --git a/mkdocs.yml b/mkdocs.yml
index 94f31c9..cf4e5af 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -45,6 +45,8 @@ nav:
   - Security: security.md
   - Governance: governance.md
   - Sessions: sessions.md
+  - Context Management: context-management.md
+  - Context Store: context-store.md
   - Examples: examples.md
   - Design: design.md
 

From eafa181581df67b7d1cf7149a3df5d417b386905 Mon Sep 17 00:00:00 2001
From: Scott Stauffer <scott@fuseraft.com>
Date: Mon, 11 May 2026 21:32:22 -0500
Subject: [PATCH 13/14] feat: add Document plugin and auto-extract text from
 binary docs at context import
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add DocumentTextExtractor (PDF via PdfPig, DOCX/PPTX/XLSX via
DocumentFormat.OpenXml) — fully cross-platform pure-.NET extraction,
no shell tools required.

Add DocumentPlugin with four read-only tools:
  document_extract_text — full text from PDF, DOCX, PPTX, or XLSX
  document_get_info     — format/size metadata without full extraction
  document_list_sheets  — sheet names from an XLSX workbook
  document_get_sheet    — single sheet as pipe-delimited text table

Fix ContextStore.AddAsync: binary documents (.pdf, .docx, .pptx, .xlsx)
are now extracted to .txt at import time so agents can access them via
read_file. Falls back to storing the binary with a warning on extraction
failure. ExtractionInfo recorded on ContextItem; notes surfaced in CLI
output.

Register Document in PluginRegistry (default + sandboxed); add capability
entries (all read) to PluginCapabilityMap. Update plugins.md and
context-store.md.
---
 docs/context-store.md                         |  25 ++-
 docs/plugins.md                               |  17 ++
 src/Cli/Commands/ContextCommand.cs            |   4 +
 src/FuseraftCli.csproj                        |   2 +
 src/Infrastructure/ContextStore.cs            |  79 +++++--
 src/Infrastructure/DocumentTextExtractor.cs   | 207 ++++++++++++++++++
 src/Infrastructure/Plugins/DocumentPlugin.cs  | 149 +++++++++++++
 .../Plugins/PluginCapabilityMap.cs            |   7 +
 src/Infrastructure/Plugins/PluginRegistry.cs  |   2 +
 9 files changed, 477 insertions(+), 15 deletions(-)
 create mode 100644 src/Infrastructure/DocumentTextExtractor.cs
 create mode 100644 src/Infrastructure/Plugins/DocumentPlugin.cs

diff --git a/docs/context-store.md b/docs/context-store.md
index 5fd433d..13fa3f5 100644
--- a/docs/context-store.md
+++ b/docs/context-store.md
@@ -92,13 +92,36 @@ fuseraft context list --dir ~/projects/my-app
 fuseraft context remove runbook --dir ~/projects/my-app
 ```
 
+## Document extraction
+
+When you import a PDF, Word document, PowerPoint presentation, or Excel spreadsheet,
+fuseraft automatically extracts the plain text at import time and stores a `.txt` version
+in the context directory. Agents can then access the extracted text via `read_file` —
+no special plugin required.
+
+```
+fuseraft context add ~/docs/architecture.pdf
+# ✓ architecture — 1 file(s), 48.2 KB
+#   Extracted from architecture.pdf: PDF — 24 page(s) → architecture.txt
+```
+
+**Supported formats:** `.pdf`, `.docx`, `.pptx`, `.xlsx`
+
+If text extraction fails (encrypted document, corrupted file), the original binary is stored
+instead and a warning is printed. Binary files cannot be read by agents via `read_file`.
+
+For working with documents found *during* a session, or reading individual Excel sheets,
+use the [`Document` plugin](plugins.md#document) directly.
+
 ## What to import
 
 The context store works well for:
 
 - **Database schemas** — schema SQL, ERDs, or migration history
 - **API specifications** — OpenAPI/Swagger YAML, Postman collections
-- **Architecture documents** — design docs, ADRs, system diagrams
+- **Architecture documents** — design docs, ADRs, system diagrams (PDF, DOCX)
+- **Slide decks** — PPTX presentations extracted to slide-by-slide text
+- **Spreadsheets** — XLSX workbooks with multiple sheets, each extracted as a table
 - **Reference data** — seed data, sample payloads, fixture files
 - **Task briefs** — detailed specs too long to paste into the task argument
 
diff --git a/docs/plugins.md b/docs/plugins.md
index d2918d9..6d45ab8 100644
--- a/docs/plugins.md
+++ b/docs/plugins.md
@@ -387,6 +387,23 @@ Agents:
 
 ---
 
+## Document
+
+Read rich document formats as plain text. All operations are read-only. Sandbox rules apply when `FileSystemSandboxPath` is configured.
+
+| Function | Parameters | Description |
+|----------|-----------|-------------|
+| `document_extract_text` | `path` | Extract full plain text from a PDF, DOCX, PPTX, or XLSX file. Returns a format/size header followed by the extracted text. |
+| `document_get_info` | `path` | Return format metadata (page/sheet count, file size, extracted character count) without returning the full text. Cheaper than `extract_text` for planning. |
+| `document_list_sheets` | `path` | List sheet names in an Excel file (`.xlsx` only). |
+| `document_get_sheet` | `path`, `sheetName`, `maxRows` (default 0 = all) | Extract one sheet from an Excel file as a pipe-delimited text table. |
+
+**Supported formats:** `.pdf`, `.docx`, `.pptx`, `.xlsx`
+
+**Context store integration:** When you run `fuseraft context add` on a supported document, the text is automatically extracted and stored as a `.txt` file at import time. Agents can then access it via `read_file` without needing the `Document` plugin. Use `Document` when you need on-demand extraction inside a session (e.g. processing documents found during a task, or working with individual Excel sheets).
+
+---
+
 ## MCP plugins
 
 In addition to the built-in plugins above, tools from any connected MCP server are available as plugins. The plugin name is the `Name` field from `McpServers` config.
diff --git a/src/Cli/Commands/ContextCommand.cs b/src/Cli/Commands/ContextCommand.cs
index 4fcccd7..87104aa 100644
--- a/src/Cli/Commands/ContextCommand.cs
+++ b/src/Cli/Commands/ContextCommand.cs
@@ -74,6 +74,10 @@ protected override async Task<int> ExecuteAsync(CommandContext context, ContextA
                 foreach (var f in item.Files.OrderBy(f => f.RelativePath))
                     AnsiConsole.MarkupLine($"  [dim]{Markup.Escape(f.RelativePath)}[/]");
 
+            if (item.ExtractionInfo is not null)
+                foreach (var note in item.ExtractionInfo.Split('\n', StringSplitOptions.RemoveEmptyEntries))
+                    AnsiConsole.MarkupLine($"  [dim]{Markup.Escape(note)}[/]");
+
             AnsiConsole.MarkupLine(
                 $"\n[dim]Agents will see this item listed in their system prompt " +
                 $"and can read it via read_file from " +
diff --git a/src/FuseraftCli.csproj b/src/FuseraftCli.csproj
index cdc7538..34c090c 100644
--- a/src/FuseraftCli.csproj
+++ b/src/FuseraftCli.csproj
@@ -14,6 +14,7 @@
 
   <ItemGroup>
     <!-- Microsoft Agent Framework -->
+    <PackageReference Include="DocumentFormat.OpenXml" Version="3.3.0" />
     <PackageReference Include="Microsoft.Agents.AI.OpenAI" Version="1.3.0" />
     <PackageReference Include="Microsoft.Agents.AI.Workflows" Version="1.3.0" />
 
@@ -32,6 +33,7 @@
     <PackageReference Include="OpenTelemetry.Exporter.OpenTelemetryProtocol" Version="1.15.3" />
     <PackageReference Include="OpenTelemetry.Instrumentation.Http" Version="1.15.1" />
     <PackageReference Include="OpenTelemetry.Instrumentation.Runtime" Version="1.15.1" />
+    <PackageReference Include="PdfPig" Version="0.1.14" />
 
     <!-- Structured logging -->
     <PackageReference Include="Serilog.Extensions.Hosting" Version="10.0.0" />
diff --git a/src/Infrastructure/ContextStore.cs b/src/Infrastructure/ContextStore.cs
index 0ddbc1d..936c035 100644
--- a/src/Infrastructure/ContextStore.cs
+++ b/src/Infrastructure/ContextStore.cs
@@ -72,34 +72,42 @@ public async Task AddAsync(
             Directory.Delete(destDir, recursive: true);
         Directory.CreateDirectory(destDir);
 
-        var files = new List<ContextFileEntry>();
+        var files          = new List<ContextFileEntry>();
+        var extractionNotes = new List<string>();
 
         if (isFile)
         {
-            var fileName = Path.GetFileName(fullSource);
-            File.Copy(fullSource, Path.Combine(destDir, fileName));
-            files.Add(new ContextFileEntry(fileName, new FileInfo(fullSource).Length));
+            var (entry, note) = await StoreFileAsync(fullSource, destDir, ct);
+            files.Add(entry);
+            if (note is not null) extractionNotes.Add(note);
         }
         else
         {
             foreach (var src in Directory.EnumerateFiles(fullSource, "*", SearchOption.AllDirectories))
             {
-                var rel  = Path.GetRelativePath(fullSource, src);
-                var dest = Path.Combine(destDir, rel);
-                Directory.CreateDirectory(Path.GetDirectoryName(dest)!);
-                File.Copy(src, dest);
-                files.Add(new ContextFileEntry(rel.Replace('\\', '/'), new FileInfo(src).Length));
+                var rel     = Path.GetRelativePath(fullSource, src);
+                var destSub = Path.Combine(destDir, Path.GetDirectoryName(rel) ?? string.Empty);
+                Directory.CreateDirectory(destSub);
+                var (entry, note) = await StoreFileAsync(src, destSub, ct);
+                var storedRel = Path.Combine(
+                    Path.GetDirectoryName(rel) ?? string.Empty,
+                    entry.RelativePath).Replace('\\', '/').TrimStart('/');
+                files.Add(new ContextFileEntry(storedRel, entry.SizeBytes));
+                if (note is not null) extractionNotes.Add(note);
             }
         }
 
         var index = await LoadIndexAsync(ct);
         index.Items[name] = new ContextItem
         {
-            Name        = name,
-            Description = description,
-            SourcePath  = fullSource,
-            ImportedAt  = DateTime.UtcNow,
-            Files       = files,
+            Name           = name,
+            Description    = description,
+            SourcePath     = fullSource,
+            ImportedAt     = DateTime.UtcNow,
+            Files          = files,
+            ExtractionInfo = extractionNotes.Count > 0
+                ? string.Join("\n", extractionNotes)
+                : null,
         };
         await SaveIndexAsync(index, ct);
     }
@@ -199,6 +207,42 @@ private async Task SaveIndexAsync(ContextIndex index, CancellationToken ct)
         await File.WriteAllTextAsync(indexPath, JsonSerializer.Serialize(index, JsonOpts), ct);
     }
 
+    // If the source file is a supported binary document format, extracts its text and
+    // stores it as a .txt sibling. Otherwise copies the file verbatim. Returns the stored
+    // file entry and an optional human-readable extraction note.
+    private static async Task<(ContextFileEntry Entry, string? Note)> StoreFileAsync(
+        string sourcePath, string destDir, CancellationToken ct)
+    {
+        if (DocumentTextExtractor.IsSupported(sourcePath))
+        {
+            try
+            {
+                var (text, info) = DocumentTextExtractor.Extract(sourcePath);
+                var txtName = Path.GetFileNameWithoutExtension(sourcePath) + ".txt";
+                var txtPath = Path.Combine(destDir, txtName);
+                await File.WriteAllTextAsync(txtPath, text, ct);
+                var size = new FileInfo(txtPath).Length;
+                var note = $"Extracted from {Path.GetFileName(sourcePath)}: {info} → {txtName}";
+                return (new ContextFileEntry(txtName, size), note);
+            }
+            catch (Exception ex)
+            {
+                // Extraction failed — copy the binary so the item is still stored, but warn
+                // that agents will not be able to read it via read_file.
+                var binName = Path.GetFileName(sourcePath);
+                var binPath = Path.Combine(destDir, binName);
+                File.Copy(sourcePath, binPath);
+                var size = new FileInfo(sourcePath).Length;
+                var note = $"Warning: extraction failed for {binName} ({ex.Message}) — binary stored, not readable by agents";
+                return (new ContextFileEntry(binName, size), note);
+            }
+        }
+
+        var fileName = Path.GetFileName(sourcePath);
+        File.Copy(sourcePath, Path.Combine(destDir, fileName));
+        return (new ContextFileEntry(fileName, new FileInfo(sourcePath).Length), null);
+    }
+
     private static bool IsValidName(string name) =>
         !string.IsNullOrWhiteSpace(name) &&
         name.All(c => char.IsLetterOrDigit(c) || c == '-' || c == '_');
@@ -229,6 +273,13 @@ public sealed class ContextItem
 
     [JsonPropertyName("files")]
     public List<ContextFileEntry> Files { get; init; } = [];
+
+    /// <summary>
+    /// Set when one or more source files were binary documents that were converted to
+    /// plain text at import time. Contains one note per extracted file.
+    /// </summary>
+    [JsonPropertyName("extractionInfo")]
+    public string? ExtractionInfo { get; init; }
 }
 
 public sealed record ContextFileEntry(
diff --git a/src/Infrastructure/DocumentTextExtractor.cs b/src/Infrastructure/DocumentTextExtractor.cs
new file mode 100644
index 0000000..b3436ca
--- /dev/null
+++ b/src/Infrastructure/DocumentTextExtractor.cs
@@ -0,0 +1,207 @@
+using System.Text;
+using DocumentFormat.OpenXml.Packaging;
+using DocumentFormat.OpenXml.Spreadsheet;
+using DocumentFormat.OpenXml.Wordprocessing;
+using UglyToad.PdfPig;
+
+namespace fuseraft.Infrastructure;
+
+/// <summary>
+/// Extracts plain text from rich document formats (PDF, DOCX, PPTX, XLSX).
+/// Used by <see cref="ContextStore"/> at import time and by
+/// <see cref="Plugins.DocumentPlugin"/> at agent runtime.
+/// </summary>
+public static class DocumentTextExtractor
+{
+    public static readonly IReadOnlySet<string> SupportedExtensions =
+        new HashSet<string>([".pdf", ".docx", ".pptx", ".xlsx"], StringComparer.OrdinalIgnoreCase);
+
+    public static bool IsSupported(string path) =>
+        SupportedExtensions.Contains(Path.GetExtension(path));
+
+    /// <summary>
+    /// Extracts plain text from <paramref name="path"/>.
+    /// Returns the extracted text and a short info line (e.g. "PDF — 12 page(s)").
+    /// Throws <see cref="NotSupportedException"/> for unsupported extensions.
+    /// </summary>
+    public static (string Text, string Info) Extract(string path)
+    {
+        var ext = Path.GetExtension(path).ToLowerInvariant();
+        return ext switch
+        {
+            ".pdf"  => ExtractPdf(path),
+            ".docx" => ExtractDocx(path),
+            ".pptx" => ExtractPptx(path),
+            ".xlsx" => ExtractXlsx(path),
+            _ => throw new NotSupportedException($"Unsupported document format: {ext}")
+        };
+    }
+
+    /// <summary>Returns the sheet names in an Excel file.</summary>
+    public static IReadOnlyList<string> ListSheets(string path)
+    {
+        using var doc = SpreadsheetDocument.Open(path, false);
+        return doc.WorkbookPart?.Workbook?.Sheets?.Elements<Sheet>()
+            .Select(s => s.Name?.Value ?? string.Empty)
+            .ToList() ?? [];
+    }
+
+    /// <summary>
+    /// Extracts a single sheet from an Excel file as pipe-delimited rows.
+    /// </summary>
+    public static (string Text, int RowCount) ExtractSheet(string path, string sheetName, int maxRows = 0)
+    {
+        using var doc = SpreadsheetDocument.Open(path, false);
+        var workbookPart = doc.WorkbookPart
+            ?? throw new InvalidOperationException("Workbook has no parts.");
+
+        var sharedStrings = BuildSharedStrings(workbookPart);
+
+        var sheet = workbookPart.Workbook?.Sheets?.Elements<Sheet>()
+            .FirstOrDefault(s => string.Equals(s.Name?.Value, sheetName, StringComparison.OrdinalIgnoreCase))
+            ?? throw new KeyNotFoundException($"Sheet '{sheetName}' not found.");
+
+        if (sheet.Id?.Value is null)
+            throw new InvalidOperationException($"Sheet '{sheetName}' has no part ID.");
+
+        var wsPart = (WorksheetPart)workbookPart.GetPartById(sheet.Id.Value);
+        var data = wsPart.Worksheet.GetFirstChild<SheetData>();
+        if (data is null) return (string.Empty, 0);
+
+        var sb = new StringBuilder();
+        int rowCount = 0;
+        foreach (var row in data.Elements<Row>())
+        {
+            if (maxRows > 0 && rowCount >= maxRows) break;
+            var cells = row.Elements<Cell>().Select(c => GetCellValue(c, sharedStrings));
+            sb.AppendLine(string.Join(" | ", cells));
+            rowCount++;
+        }
+        return (sb.ToString().Trim(), rowCount);
+    }
+
+    // PDF
+
+    private static (string Text, string Info) ExtractPdf(string path)
+    {
+        using var pdf = PdfDocument.Open(path);
+        var pages = pdf.GetPages().ToList();
+        var sb    = new StringBuilder();
+        foreach (var page in pages)
+        {
+            var text = page.Text;
+            if (!string.IsNullOrWhiteSpace(text))
+                sb.AppendLine(text);
+        }
+        return (sb.ToString().Trim(), $"PDF — {pages.Count} page(s)");
+    }
+
+    // DOCX
+
+    private static (string Text, string Info) ExtractDocx(string path)
+    {
+        using var doc  = WordprocessingDocument.Open(path, false);
+        var body = doc.MainDocumentPart?.Document?.Body;
+        if (body is null) return (string.Empty, "DOCX — empty document");
+
+        var sb = new StringBuilder();
+        foreach (var elem in body.ChildElements)
+        {
+            if (elem is Paragraph para)
+            {
+                var text = para.InnerText;
+                if (!string.IsNullOrWhiteSpace(text))
+                    sb.AppendLine(text);
+            }
+            else if (elem is DocumentFormat.OpenXml.Wordprocessing.Table table)
+            {
+                foreach (var row in table.Elements<TableRow>())
+                {
+                    var cells = row.Elements<TableCell>()
+                        .Select(c => c.InnerText.Trim())
+                        .Where(t => !string.IsNullOrEmpty(t));
+                    sb.AppendLine(string.Join(" | ", cells));
+                }
+            }
+        }
+
+        var extracted = sb.ToString().Trim();
+        var wordCount = extracted.Split(' ', StringSplitOptions.RemoveEmptyEntries).Length;
+        return (extracted, $"DOCX — ~{wordCount:N0} word(s)");
+    }
+
+    // PPTX
+
+    private static (string Text, string Info) ExtractPptx(string path)
+    {
+        using var pres      = PresentationDocument.Open(path, false);
+        var slideParts = pres.PresentationPart?.SlideParts?.ToList() ?? [];
+        var sb         = new StringBuilder();
+        int slideNum   = 0;
+
+        foreach (var slidePart in slideParts)
+        {
+            slideNum++;
+            sb.AppendLine($"=== Slide {slideNum} ===");
+            foreach (var text in slidePart.Slide.Descendants<DocumentFormat.OpenXml.Drawing.Text>())
+            {
+                if (!string.IsNullOrWhiteSpace(text.Text))
+                    sb.AppendLine(text.Text);
+            }
+            sb.AppendLine();
+        }
+
+        return (sb.ToString().Trim(), $"PPTX — {slideParts.Count} slide(s)");
+    }
+
+    // XLSX
+
+    private static (string Text, string Info) ExtractXlsx(string path)
+    {
+        using var doc = SpreadsheetDocument.Open(path, false);
+        var workbookPart = doc.WorkbookPart;
+        if (workbookPart is null) return (string.Empty, "XLSX — empty workbook");
+
+        var sharedStrings = BuildSharedStrings(workbookPart);
+        var sheets = workbookPart.Workbook?.Sheets?.Elements<Sheet>().ToList() ?? [];
+        var sb = new StringBuilder();
+        int totalRows = 0;
+
+        foreach (var sheet in sheets)
+        {
+            sb.AppendLine($"=== Sheet: {sheet.Name} ===");
+            if (sheet.Id?.Value is null) continue;
+            var wsPart = (WorksheetPart)workbookPart.GetPartById(sheet.Id.Value);
+            var data   = wsPart.Worksheet.GetFirstChild<SheetData>();
+            if (data is null) continue;
+
+            foreach (var row in data.Elements<Row>())
+            {
+                var cells = row.Elements<Cell>().Select(c => GetCellValue(c, sharedStrings));
+                sb.AppendLine(string.Join(" | ", cells));
+                totalRows++;
+            }
+            sb.AppendLine();
+        }
+
+        return (sb.ToString().Trim(), $"XLSX — {sheets.Count} sheet(s), {totalRows:N0} row(s)");
+    }
+
+    // Helpers
+
+    private static List<string> BuildSharedStrings(WorkbookPart workbookPart) =>
+        workbookPart.SharedStringTablePart?.SharedStringTable
+            ?.Elements<SharedStringItem>()
+            .Select(s => s.InnerText)
+            .ToList() ?? [];
+
+    private static string GetCellValue(Cell cell, List<string> sharedStrings)
+    {
+        var value = cell.CellValue?.Text ?? string.Empty;
+        if (cell.DataType?.Value == CellValues.SharedString
+            && int.TryParse(value, out var idx)
+            && (uint)idx < (uint)sharedStrings.Count)
+            return sharedStrings[idx];
+        return value;
+    }
+}
diff --git a/src/Infrastructure/Plugins/DocumentPlugin.cs b/src/Infrastructure/Plugins/DocumentPlugin.cs
new file mode 100644
index 0000000..34dc5a9
--- /dev/null
+++ b/src/Infrastructure/Plugins/DocumentPlugin.cs
@@ -0,0 +1,149 @@
+using System.ComponentModel;
+using fuseraft.Infrastructure;
+
+namespace fuseraft.Infrastructure.Plugins;
+
+/// <summary>
+/// Reads rich document formats (PDF, DOCX, PPTX, XLSX) as plain text.
+/// All operations are read-only. Path arguments are sandbox-checked when a
+/// sandbox root is configured.
+/// </summary>
+public sealed class DocumentPlugin(string? sandboxRoot = null)
+{
+    private readonly string? _sandboxRoot = sandboxRoot is not null
+        ? Path.GetFullPath(ProcessHelper.ExpandHome(sandboxRoot))
+        : null;
+
+    [Description("Extract plain text from a document. Supports PDF, DOCX, PPTX, XLSX.")]
+    public string ExtractText([Description("Path to the document.")] string path)
+    {
+        var denial = ResolveSafe(path, out var resolved);
+        if (denial is not null) return denial;
+        if (!File.Exists(resolved)) return PluginResult.Error($"File not found: {resolved}");
+        if (!DocumentTextExtractor.IsSupported(resolved))
+            return PluginResult.Error(
+                $"Unsupported format '{Path.GetExtension(resolved)}'. " +
+                $"Supported: {string.Join(", ", DocumentTextExtractor.SupportedExtensions)}");
+
+        try
+        {
+            var (text, info) = DocumentTextExtractor.Extract(resolved);
+            return string.IsNullOrWhiteSpace(text)
+                ? PluginResult.Info($"{info} — no text content found.")
+                : $"[{info}]\n\n{text}";
+        }
+        catch (Exception ex)
+        {
+            return PluginResult.Error($"Extraction failed: {ex.Message}");
+        }
+    }
+
+    [Description("Get format and size metadata for a document. Cheaper than extract_text. Supports PDF, DOCX, PPTX, XLSX.")]
+    public string GetInfo([Description("Path to the document.")] string path)
+    {
+        var denial = ResolveSafe(path, out var resolved);
+        if (denial is not null) return denial;
+        if (!File.Exists(resolved)) return PluginResult.Error($"File not found: {resolved}");
+        if (!DocumentTextExtractor.IsSupported(resolved))
+            return PluginResult.Error(
+                $"Unsupported format '{Path.GetExtension(resolved)}'. " +
+                $"Supported: {string.Join(", ", DocumentTextExtractor.SupportedExtensions)}");
+
+        try
+        {
+            var fi            = new FileInfo(resolved);
+            var (text, info)  = DocumentTextExtractor.Extract(resolved);
+            var charCount     = text.Length;
+            return $"{info}\nFile size: {FormatSize(fi.Length)}\n" +
+                   $"Extracted text: ~{charCount:N0} characters (~{charCount / 4:N0} tokens)";
+        }
+        catch (Exception ex)
+        {
+            return PluginResult.Error($"Could not read document metadata: {ex.Message}");
+        }
+    }
+
+    [Description("List sheet names in an Excel file (.xlsx).")]
+    public string ListSheets([Description("Path to the .xlsx file.")] string path)
+    {
+        var denial = ResolveSafe(path, out var resolved);
+        if (denial is not null) return denial;
+        if (!File.Exists(resolved)) return PluginResult.Error($"File not found: {resolved}");
+
+        var ext = Path.GetExtension(resolved).ToLowerInvariant();
+        if (ext != ".xlsx")
+            return PluginResult.Error($"list_sheets only works on .xlsx files, not '{ext}'.");
+
+        try
+        {
+            var sheets = DocumentTextExtractor.ListSheets(resolved);
+            return sheets.Count == 0
+                ? PluginResult.Info("No sheets found.")
+                : string.Join("\n", sheets.Select((s, i) => $"{i + 1}. {s}"));
+        }
+        catch (Exception ex)
+        {
+            return PluginResult.Error($"Could not read sheet list: {ex.Message}");
+        }
+    }
+
+    [Description("Read one sheet from an Excel file (.xlsx) as a pipe-delimited text table.")]
+    public string GetSheet(
+        [Description("Path to the .xlsx file.")] string path,
+        [Description("Sheet name.")] string sheetName,
+        [Description("Maximum rows to return (0 = all).")] int maxRows = 0)
+    {
+        var denial = ResolveSafe(path, out var resolved);
+        if (denial is not null) return denial;
+        if (!File.Exists(resolved)) return PluginResult.Error($"File not found: {resolved}");
+
+        var ext = Path.GetExtension(resolved).ToLowerInvariant();
+        if (ext != ".xlsx")
+            return PluginResult.Error($"get_sheet only works on .xlsx files, not '{ext}'.");
+
+        try
+        {
+            var (text, rowCount) = DocumentTextExtractor.ExtractSheet(resolved, sheetName, maxRows);
+            if (string.IsNullOrWhiteSpace(text))
+                return PluginResult.Info($"Sheet '{sheetName}' is empty.");
+            var truncNote = maxRows > 0 && rowCount >= maxRows ? $" — first {maxRows} rows" : string.Empty;
+            return $"[Sheet: {sheetName} — {rowCount} row(s){truncNote}]\n\n{text}";
+        }
+        catch (KeyNotFoundException ex)
+        {
+            return PluginResult.Error(ex.Message);
+        }
+        catch (Exception ex)
+        {
+            return PluginResult.Error($"Could not read sheet '{sheetName}': {ex.Message}");
+        }
+    }
+
+    private string? ResolveSafe(string path, out string resolved)
+    {
+        var expanded = ProcessHelper.ExpandHome(path);
+        resolved = _sandboxRoot is not null && !Path.IsPathRooted(expanded)
+            ? Path.GetFullPath(expanded, _sandboxRoot)
+            : Path.GetFullPath(expanded);
+
+        if (_sandboxRoot is null) return null;
+
+        var sandboxPrefix = _sandboxRoot.TrimEnd(Path.DirectorySeparatorChar) + Path.DirectorySeparatorChar;
+        var resolvedCheck = resolved.TrimEnd(Path.DirectorySeparatorChar) + Path.DirectorySeparatorChar;
+        var comparison    = OperatingSystem.IsWindows()
+            ? StringComparison.OrdinalIgnoreCase
+            : StringComparison.Ordinal;
+
+        return resolvedCheck.StartsWith(sandboxPrefix, comparison)
+            ? null
+            : PluginResult.Denied($"Path '{resolved}' is outside the configured sandbox '{_sandboxRoot}'.");
+    }
+
+    private static string FormatSize(long bytes) => bytes switch
+    {
+        < 1_024         => $"{bytes} B",
+        < 1_048_576     => $"{bytes / 1_024.0:F1} KB",
+        < 1_073_741_824 => $"{bytes / 1_048_576.0:F1} MB",
+        _               => $"{bytes / 1_073_741_824.0:F1} GB",
+    };
+}
diff --git a/src/Infrastructure/Plugins/PluginCapabilityMap.cs b/src/Infrastructure/Plugins/PluginCapabilityMap.cs
index 08497ab..65c62de 100644
--- a/src/Infrastructure/Plugins/PluginCapabilityMap.cs
+++ b/src/Infrastructure/Plugins/PluginCapabilityMap.cs
@@ -23,6 +23,7 @@ namespace fuseraft.Infrastructure.Plugins;
 ///   <item><term>Git</term><description><c>read</c> (status, diff, log, show, branch_list, stash_list) · <c>write</c> (add, commit, checkout, create_branch, init, push, pull, stash, stash_pop, reset)</description></item>
 ///   <item><term>Http</term><description><c>get</c> · <c>post</c> · <c>put</c> · <c>patch</c> · <c>delete</c> — one per HTTP verb</description></item>
 ///   <item><term>Json</term><description><c>read</c> (format, minify, get, keys, search, to_text, validate) · <c>write</c> (merge)</description></item>
+///   <item><term>Document</term><description><c>read</c> (extract_text, get_info, list_sheets, get_sheet — all read-only)</description></item>
 ///   <item><term>Search</term><description><c>read</c> (all search operations are read-only)</description></item>
 ///   <item><term>Plan</term><description><c>read</c> (plan_get, plan_get_summary) · <c>write</c> (plan_create, plan_update_step, plan_add_step)</description></item>
 ///   <item><term>Changes</term><description><c>read</c> (read, read_latest)</description></item>
@@ -102,6 +103,12 @@ internal static class PluginCapabilityMap
         ["json_validate"]                  = "read",
         ["json_merge"]                     = "write",
 
+        // Document (all read-only)
+        ["document_extract_text"]          = "read",
+        ["document_get_info"]              = "read",
+        ["document_list_sheets"]           = "read",
+        ["document_get_sheet"]             = "read",
+
         // Search (all read-only)
         ["search_files"]                   = "read",
         ["search_content"]                 = "read",
diff --git a/src/Infrastructure/Plugins/PluginRegistry.cs b/src/Infrastructure/Plugins/PluginRegistry.cs
index 7948cb2..0eb1430 100644
--- a/src/Infrastructure/Plugins/PluginRegistry.cs
+++ b/src/Infrastructure/Plugins/PluginRegistry.cs
@@ -68,6 +68,7 @@ public PluginRegistry RegisterDefaults()
         Register("Http",       () => new HttpPlugin(_sharedHttpClient, logger: _loggerFactory?.CreateLogger<HttpPlugin>()));
         Register("Json",       () => new JsonPlugin());
         Register("Search",     () => new SearchPlugin());
+        Register("Document",   () => new DocumentPlugin());
         Register("Probe",      () => new ProbePlugin());
         Register("CodeExecution", () => new CodeExecutionPlugin());
         Register("Handoff",       () => new HandoffPlugin());
@@ -107,6 +108,7 @@ public PluginRegistry Configure(
         Register("FileSystem", () => new FileSystemPlugin(sandboxRoot, security.ReadFileSizeLimit, versionStore: fileVersionStore));
         Register("Shell",      () => new ShellPlugin(sandboxRoot, shellCommandApprover));
         Register("Http",       () => new HttpPlugin(_sharedHttpClient, allowedHosts, apiProfiles, allowPrivateHosts, _loggerFactory?.CreateLogger<HttpPlugin>()));
+        Register("Document",   () => new DocumentPlugin(sandboxRoot));
         return this;
     }
 

From b811f4f77dc124988860e95107b0c492c2f33e9b Mon Sep 17 00:00:00 2001
From: Scott Stauffer <scott@fuseraft.com>
Date: Mon, 11 May 2026 21:51:00 -0500
Subject: [PATCH 14/14] docs: update configuration, cli-reference, and
 context-management for Document plugin

- configuration.md: add Document to the capability tag table
- cli-reference.md: document binary extraction behaviour in fuseraft context add
- context-management.md: update Layer 1 to mention document formats and cross-link Document plugin
---
 docs/cli-reference.md      |  9 +++++++++
 docs/configuration.md      |  1 +
 docs/context-management.md | 13 ++++++++++---
 3 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/docs/cli-reference.md b/docs/cli-reference.md
index 2b9914b..c04101b 100644
--- a/docs/cli-reference.md
+++ b/docs/cli-reference.md
@@ -760,6 +760,15 @@ fuseraft context add ~/specs/ --name specs --description "Product specifications
 fuseraft context add ~/docs/runbook.md --dir ~/projects/my-app
 ```
 
+**Binary document extraction:** When the source is a `.pdf`, `.docx`, `.pptx`, or `.xlsx` file, fuseraft automatically extracts the plain text and stores it as a `.txt` file. Agents read the extracted text via `read_file` — no `Document` plugin required. A note is printed on import:
+
+```
+✓ architecture — 1 file(s), 48.2 KB
+  Extracted from architecture.pdf: PDF — 24 page(s) → architecture.txt
+```
+
+If extraction fails (encrypted file, corrupt format), the binary is stored with a warning and will not be readable by agents via `read_file`.
+
 After importing, agents see an entry like this at the top of their system prompt:
 
 ```
diff --git a/docs/configuration.md b/docs/configuration.md
index 9756860..5ab38a0 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -148,6 +148,7 @@ Per-plugin tool filter. Keys are plugin names; values are arrays of capability t
 | `Git` | `read` (git_status, git_diff, git_log, git_show, git_branch_list, git_stash_list) · `write` (git_add, git_commit, git_checkout, git_create_branch, git_init, git_push, git_pull, git_stash, git_stash_pop, git_reset) |
 | `Http` | `get` · `head` · `post` · `put` · `patch` · `delete` — one per HTTP verb |
 | `Json` | `read` · `write` (json_merge) |
+| `Document` | `read` (document_extract_text, document_get_info, document_list_sheets, document_get_sheet) |
 | `Search` | `read` |
 | `Changes` | `read` |
 | `Scratchpad` | `read` · `write` |
diff --git a/docs/context-management.md b/docs/context-management.md
index 41aaddf..fce007f 100644
--- a/docs/context-management.md
+++ b/docs/context-management.md
@@ -30,11 +30,18 @@ available, and can access the full content with `read_file`.
 ```yaml
 # No config required — populated by CLI before running:
 #   fuseraft context add ~/docs/schema.sql --name db-schema
-#   fuseraft context add ~/specs/ --name specs
+#   fuseraft context add ~/specs/          --name specs
+#   fuseraft context add ~/docs/design.pdf --name design   # text extracted automatically
 ```
 
-**When to use:** Database schemas, API specs, architecture docs, task briefs — anything too
-large to paste into the task argument but that agents should know exists from turn one.
+**When to use:** Database schemas, API specs, architecture docs, slide decks, spreadsheets,
+task briefs — anything too large to paste into the task argument but that agents should know
+exists from turn one.
+
+**Binary documents:** When you import a `.pdf`, `.docx`, `.pptx`, or `.xlsx` file, fuseraft
+extracts the plain text at import time and stores a `.txt` file instead. Agents access it
+via `read_file` with no extra plugin. For documents found *during* a session — or when you
+need individual Excel sheets — use the [`Document` plugin](plugins.md#document) directly.
 
 See [Context Store](context-store.md) for the full CLI reference.