From 988b868cfeea7981d68c4e42904f1bd7489a7342 Mon Sep 17 00:00:00 2001 From: Eric Juta Date: Thu, 26 Mar 2026 12:58:16 +0000 Subject: [PATCH 1/8] docs: add Claude Code hooks parity spec\n\nCo-authored-by: Codex --- docs/claude-code-hooks-parity.md | 275 +++++++++++++++++++++++++++++++ 1 file changed, 275 insertions(+) create mode 100644 docs/claude-code-hooks-parity.md diff --git a/docs/claude-code-hooks-parity.md b/docs/claude-code-hooks-parity.md new file mode 100644 index 000000000..1094e6cf2 --- /dev/null +++ b/docs/claude-code-hooks-parity.md @@ -0,0 +1,275 @@ +# Claude Code hooks parity + +This document captures the current Codex hooks surface and the remaining +feature-parity gap versus Claude Code's documented hooks system. It is intended +to be the canonical planning doc for expanding `codex_hooks`. + +## Goal + +Bring Codex's public `hooks.json` lifecycle hooks close enough to Claude +Code's model that Claude-oriented hook setups can be ported with predictable, +documented edits rather than custom runtime patches. + +This does not require byte-for-byte compatibility in one step. It does require: + +- matching the major public event categories users expect, +- supporting the handler types those configurations rely on, +- honoring documented decision-control fields when they are accepted by schema, +- documenting any intentional deltas that remain. + +## Current Codex surface + +Today Codex exposes five public `hooks.json` event groups: + +- `PreToolUse` +- `PostToolUse` +- `SessionStart` +- `UserPromptSubmit` +- `Stop` + +The current engine only executes synchronous command handlers. `prompt`, +`agent`, and `async` configurations are parsed but skipped with warnings. + +The current runtime also has narrower execution coverage than Claude Code: + +- `PreToolUse` and `PostToolUse` are currently wired through the shell path, + with runtime requests using `tool_name: "Bash"`. +- `UserPromptSubmit` and `Stop` ignore matchers. +- some wire fields are present in schema but are rejected by the output parser + as unsupported. + +Legacy internal paths still exist for notification-style hooks +(`AfterAgent` / deprecated `AfterToolUse`), but they are not part of the +public `hooks.json` contract. + +## Claude Code parity gap + +Claude Code's current hooks reference documents a larger event surface and more +handler modes than Codex currently supports. + +### Missing event coverage + +Codex does not yet expose public `hooks.json` support for these documented +Claude Code event families: + +- `InstructionsLoaded` +- `PermissionRequest` +- `PostToolUseFailure` +- `Notification` +- `SubagentStart` +- `SubagentStop` +- `StopFailure` +- `TeammateIdle` +- `TaskCompleted` +- `ConfigChange` +- `CwdChanged` +- `FileChanged` +- `WorktreeCreate` +- `WorktreeRemove` +- `PreCompact` +- `PostCompact` +- `SessionEnd` +- `Elicitation` +- `ElicitationResult` + +### Missing handler coverage + +Codex does not yet support these Claude Code hook handler categories in the +public engine: + +- async command hooks, +- HTTP hooks, +- prompt hooks, +- agent hooks. + +### Partial decision-control coverage + +Codex schema already models some advanced fields, but runtime support is still +partial: + +- `PreToolUse.updatedInput` is rejected. +- `PreToolUse.additionalContext` is rejected. +- `PreToolUse.permissionDecision: allow` is rejected. +- `PreToolUse.permissionDecision: ask` is rejected. +- `PostToolUse.updatedMCPToolOutput` is rejected. +- `suppressOutput` is rejected for `PreToolUse` and `PostToolUse`. +- `stopReason` and `continue: false` are rejected for `PreToolUse`. + +This creates a confusing state where the schema shape suggests broader support +than the runtime actually honors. + +### Tool and matcher parity gaps + +- `PreToolUse` and `PostToolUse` should evolve from shell-centric wiring to + a consistent tool-event contract across relevant tool classes. +- matcher support should be explicit and consistent across all events that + Claude users expect to filter. +- MCP-aware hook behavior should be designed as first-class runtime behavior, + not as a schema placeholder. + +## Non-goals + +- Reproducing Claude Code internals exactly where Codex architecture differs. +- Preserving every existing partial or deprecated behavior forever. +- Adding public hook types without app-server, TUI, and docs visibility for the + resulting runs. + +## Design principles + +- **Public contract first**: do not expose schema fields that the runtime will + immediately reject unless they are clearly marked unsupported. +- **Event completeness over aliases**: add real lifecycle events before adding + compatibility shims. +- **One event, one payload contract**: every public event needs stable input and + output schema fixtures, runtime execution, and surfaced hook-run reporting. +- **Fail-open unless explicitly blocking**: invalid hook output should not cause + surprising hard failures outside events whose contract is intentionally + blocking. +- **No hidden UI drift**: hook additions must be visible in the TUI and + app-server surfaces anywhere hook runs are rendered today. + +## Implementation plan + +### Phase 1: make the current public surface coherent + +Goal: remove misleading partial support inside the existing five events. + +Required work: + +- align schema and parser behavior for the five existing events, +- either implement or remove unsupported schema fields that are already emitted + in fixtures, +- document matcher behavior explicitly, +- document current shell-centric tool coverage explicitly, +- add a dedicated user-facing reference doc for `hooks.json` behavior if the + main docs site still only mentions legacy notification hooks. + +Acceptance: + +- no schema field is silently accepted but runtime-rejected without explicit + documentation, +- the docs explain exactly which event fields and decisions are live, +- existing five-event behavior is covered by tests and schema fixtures. + +### Phase 2: expand event coverage on the existing command-hook engine + +Goal: add missing lifecycle events before broadening handler types. + +Priority order: + +1. `PermissionRequest` +2. `Notification` +3. `SubagentStart` and `SubagentStop` +4. `PostToolUseFailure` and `StopFailure` +5. `SessionEnd` +6. `ConfigChange`, `CwdChanged`, and `FileChanged` +7. `PreCompact` and `PostCompact` +8. `TaskCompleted` and `TeammateIdle` +9. `InstructionsLoaded` +10. `WorktreeCreate` and `WorktreeRemove` +11. `Elicitation` and `ElicitationResult` + +Acceptance: + +- each event has an input schema fixture, +- each event has runtime dispatch wiring, +- each event emits `HookStarted` and `HookCompleted` consistently, +- each event has an explicit matcher story, +- docs list the event as supported. + +### Phase 3: broaden handler types + +Goal: match the main Claude Code hook execution modes. + +Required work: + +- implement async command hooks, +- add HTTP hook handlers, +- add prompt hook handlers, +- add agent hook handlers, +- surface handler type and execution mode accurately in run summaries. + +Acceptance: + +- discovery no longer skips supported handler types with warnings, +- `HookRunSummary` reports real handler type and execution mode, +- command, HTTP, prompt, and agent handlers have stable input/output contracts, +- async execution semantics are documented, especially ordering and failure + behavior. + +### Phase 4: close decision-control parity gaps + +Goal: implement or explicitly drop advanced output fields. + +Required work: + +- decide whether `PreToolUse.updatedInput` will be supported in Codex, +- decide whether `PreToolUse.permissionDecision: ask` maps to an approval + prompt, a model-visible continuation, or remains unsupported, +- implement `additionalContext` anywhere the contract claims it exists, +- decide whether `PostToolUse.updatedMCPToolOutput` is part of the public + runtime contract, +- review event-specific `continue`, `stopReason`, and `suppressOutput` + semantics for consistency. + +Acceptance: + +- advanced hook output fields are either implemented end-to-end or removed from + public schema, +- runtime behavior matches docs and tests, +- no event-specific decision-control behavior relies on undocumented parser + special cases. + +### Phase 5: tool-class parity for pre/post tool hooks + +Goal: make tool hooks genuinely tool-aware rather than shell-specific. + +Required work: + +- define which Codex tool classes participate in `PreToolUse` and + `PostToolUse`, +- expose stable tool identifiers and input payloads for those classes, +- define MCP-tool matcher behavior explicitly, +- preserve backward compatibility for existing Bash-oriented hooks where + feasible. + +Acceptance: + +- users can target more than the shell path with pre/post tool hooks, +- tool names and payloads are documented and stable, +- MCP tool behavior is implemented rather than placeholder-only. + +## Required cross-cutting work + +- update docs under `docs/` when public behavior changes, +- keep generated schema fixtures in sync, +- extend TUI and app-server visibility for new hook events when needed, +- add focused tests for parser behavior, discovery behavior, and runtime + dispatch, +- decide whether legacy notification hooks remain supported long term or are + explicitly deprecated in docs. + +## Open decisions + +- Should Codex aim for Claude-compatible field names and semantics wherever + possible, or only for event-name parity? +- Should prompt and agent hooks be first-class in the initial public contract, + or stay experimental behind feature flags after implementation? +- Should unsupported advanced fields be removed now to reduce confusion, or kept + in schema as forward-compatibility placeholders? +- Which events should be thread-scoped versus turn-scoped in app-server and TUI + reporting? + +## Recommended first implementation slice + +If this work is started incrementally, the highest-leverage first slice is: + +1. publish a real user-facing hooks reference for Codex, +2. make the existing five events internally coherent, +3. add `PermissionRequest`, `Notification`, `SubagentStart`, + `SubagentStop`, and `SessionEnd`, +4. then add async and HTTP handler support. + +That sequence closes the largest user-visible parity gaps without mixing event +expansion, execution-model expansion, and advanced mutation semantics into one +hard-to-review change. From 3539af39303876c5c1a3d2ec3996a44b0f2aae32 Mon Sep 17 00:00:00 2001 From: Eric Juta Date: Thu, 26 Mar 2026 12:59:55 +0000 Subject: [PATCH 2/8] docs: tighten hooks parity handoff\n\nCo-authored-by: Codex --- docs/claude-code-hooks-parity.md | 71 ++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/docs/claude-code-hooks-parity.md b/docs/claude-code-hooks-parity.md index 1094e6cf2..02c3dd7ae 100644 --- a/docs/claude-code-hooks-parity.md +++ b/docs/claude-code-hooks-parity.md @@ -17,6 +17,40 @@ This does not require byte-for-byte compatibility in one step. It does require: - honoring documented decision-control fields when they are accepted by schema, - documenting any intentional deltas that remain. +## Read order + +If you are implementing against this doc, read the current source in this order: + +1. `docs/claude-code-hooks-parity.md` +2. `codex-rs/hooks/src/engine/config.rs` +3. `codex-rs/hooks/src/engine/discovery.rs` +4. `codex-rs/hooks/src/schema.rs` +5. `codex-rs/hooks/src/engine/output_parser.rs` +6. `codex-rs/core/src/hook_runtime.rs` +7. `codex-rs/core/src/codex.rs` +8. `codex-rs/core/src/tools/registry.rs` + +This order moves from public contract to discovery, then schema, then parser, +then runtime wiring, then legacy behavior. + +## Current source snapshot + +This doc is based on the current implementation shape in this checkout: + +- public `hooks.json` event groups are defined in + `codex-rs/hooks/src/engine/config.rs`, +- handler discovery and unsupported-handler warnings live in + `codex-rs/hooks/src/engine/discovery.rs`, +- public wire schema lives in `codex-rs/hooks/src/schema.rs`, +- output acceptance and rejection behavior lives in + `codex-rs/hooks/src/engine/output_parser.rs`, +- runtime dispatch for start, prompt-submit, pre-tool, and post-tool hooks + lives in `codex-rs/core/src/hook_runtime.rs`, +- `Stop` hook wiring lives in `codex-rs/core/src/codex.rs`, +- deprecated legacy `AfterToolUse` dispatch still exists in + `codex-rs/core/src/tools/registry.rs`, +- no repository-local `hooks.json` files are checked into this tree today. + ## Current Codex surface Today Codex exposes five public `hooks.json` event groups: @@ -128,8 +162,34 @@ than the runtime actually honors. - **No hidden UI drift**: hook additions must be visible in the TUI and app-server surfaces anywhere hook runs are rendered today. +## Do not do + +- Do not add a new public event without input schema, runtime dispatch, + hook-run reporting, and docs in the same lane. +- Do not keep wire fields in public schema as if they are live when the parser + still rejects them. +- Do not use deprecated `AfterAgent` or legacy `AfterToolUse` internals as + the long-term public parity path. +- Do not widen event coverage while leaving handler type and execution mode + reporting misleading in run summaries. +- Do not make hook support TUI-only; app-server and protocol surfaces must stay + aligned. + ## Implementation plan +### Branch and PR order + +Prefer this implementation order: + +1. contract cleanup for the existing five events, +2. runtime event expansion on the command-hook engine, +3. handler-type and execution-mode expansion, +4. advanced decision-control support, +5. pre/post tool-class parity work, +6. final doc consolidation and examples. + +Do not mix all six into one change. Keep each lane reviewable. + ### Phase 1: make the current public surface coherent Goal: remove misleading partial support inside the existing five events. @@ -249,6 +309,17 @@ Acceptance: - decide whether legacy notification hooks remain supported long term or are explicitly deprecated in docs. +## Acceptance gates for any implementation PR + +Every parity PR should satisfy all of these before merge: + +- docs updated for the newly supported behavior, +- generated hook schema fixtures updated if the public schema changed, +- focused tests added or updated for discovery, parser, and runtime behavior, +- hook run summaries still render correctly in TUI and app-server surfaces, +- unsupported behavior is either removed from schema or clearly documented as + unsupported. + ## Open decisions - Should Codex aim for Claude-compatible field names and semantics wherever From 40774ed67d691220a1c02b1fa50ab13d09239ca3 Mon Sep 17 00:00:00 2001 From: Eric Juta Date: Thu, 26 Mar 2026 13:19:14 +0000 Subject: [PATCH 3/8] docs: add agentmemory replacement spec\n\nCo-authored-by: Codex --- ...entmemory-codex-memory-replacement-spec.md | 672 ++++++++++++++++++ 1 file changed, 672 insertions(+) create mode 100644 docs/agentmemory-codex-memory-replacement-spec.md diff --git a/docs/agentmemory-codex-memory-replacement-spec.md b/docs/agentmemory-codex-memory-replacement-spec.md new file mode 100644 index 000000000..72cd811a0 --- /dev/null +++ b/docs/agentmemory-codex-memory-replacement-spec.md @@ -0,0 +1,672 @@ +# agentmemory replacement spec for Codex native memory + +This document evaluates whether a forked Codex should disable the current +first-party memory system and replace it with +`~/Projects/agentmemory` as the primary memory engine. + +It is intended to be the canonical decision and implementation handoff for this +specific question: + +- is `agentmemory` materially more capable than Codex native memory, +- is it likely more token-efficient over time, +- if so, what would be lost by replacing Codex native memory, +- what replacement shape is worth building in a fork. + +This is an architecture and product-integration spec, not a request to +implement the replacement immediately. + +## Executive summary + +`agentmemory` is materially more advanced than Codex native memory as a +capture and retrieval engine. + +The strongest deltas are: + +- broader lifecycle capture through a wider hook surface, +- hybrid retrieval (BM25 + vector + graph), +- pluggable embeddings including Gemini, +- cross-agent MCP and REST exposure, +- retrieval bounded by top-K / token-budgeted context instead of relying on a + prebuilt local memory summary alone. + +Codex native memory is still stronger in first-party runtime integration: + +- startup memory generation is built into the core session lifecycle, +- memory artifacts are deeply integrated into prompt construction, +- memory citations already flow through protocol and app-server surfaces, +- there are explicit local operations for memory refresh and memory removal, +- native memory state includes thread-level `memory_mode` semantics such as + `disabled` and `polluted`. + +Conclusion: + +- `agentmemory` is a material capability superset for memory retrieval and + capture quality. +- It is not a strict end-to-end product superset of Codex native memory. +- A replacement is defensible, but only if the fork rebuilds a thin Codex + integration layer for the native semantics that matter. + +Recommended direction: + +- do not pursue "full Claude parity first", +- do pursue "agentmemory as the primary memory engine with Codex-specific + shims", +- disable Codex native memory generation only after startup injection, + replacement memory ops, and a clear citation story are decided. + +## Target end state + +The target end state is not "agentmemory instead of Codex" in a narrow sense. +The target end state is: + +- \`agentmemory\` is the primary memory engine, +- Codex-native memory generation and consolidation are disabled, +- Codex retains or rebuilds only the product-level semantics that still add + value, +- the fork presents one coherent memory system to users, +- the resulting system is a functional superset of both: + - \`agentmemory\` capture and retrieval strengths, + - Codex-native prompt/runtime/protocol integration where it materially helps. + +In other words, the desired architecture is a Venn-diagram merge with one +authoritative engine, not permanent coexistence of two competing memory stacks. + +## Scope + +This spec compares: + +- `/private/tmp/codex` +- `/Users/ericjuta/Projects/agentmemory` + +This spec is based on the current implementation shape in those checkouts, +including user-local plugin and hook configuration files present in the +`agentmemory` repo. + +## Read order + +Read these sources in order if implementing against this spec: + +1. `docs/agentmemory-codex-memory-replacement-spec.md` +2. `docs/claude-code-hooks-parity.md` +3. `codex-rs/core/src/memories/README.md` +4. `codex-rs/core/src/memories/prompts.rs` +5. `codex-rs/core/templates/memories/read_path.md` +6. `codex-rs/core/src/codex.rs` +7. `codex-rs/hooks/src/engine/config.rs` +8. `codex-rs/hooks/src/engine/discovery.rs` +9. `plugin/hooks/hooks.json` in `agentmemory` +10. `src/hooks/*.ts` in `agentmemory` +11. `src/providers/embedding/*.ts` and `src/state/hybrid-search.ts` in + `agentmemory` +12. `README.md` and `benchmark/*.md` in `agentmemory` + +## Current source snapshot + +### Codex + +Codex currently has: + +- a first-party startup memory pipeline in + `codex-rs/core/src/memories/README.md`, +- phase-1 extraction and phase-2 consolidation into `MEMORY.md`, + `memory_summary.md`, and rollout summary artifacts, +- developer-prompt injection of memory read-path instructions built from + `memory_summary.md`, +- protocol-level memory citations, +- memory-management operations such as `UpdateMemories` and + `DropMemories`, +- thread-level memory-mode state such as `disabled` and `polluted`, +- an under-development `codex_hooks` feature with five public hook events. + +### agentmemory + +The `agentmemory` checkout currently contains: + +- a plugin manifest in `plugin/plugin.json`, +- a Claude-oriented hook bundle in `plugin/hooks/hooks.json`, +- TypeScript hook entrypoints under `src/hooks/`, +- multiple embedding providers under `src/providers/embedding/`, +- hybrid retrieval under `src/state/hybrid-search.ts`, +- REST and MCP surfaces, +- benchmarking and retrieval claims in `README.md` and `benchmark/`. + +The local `agentmemory` checkout is currently dirty. This matters only as a +reminder not to treat the local repo state as release-tagged truth; the source +shape is still adequate for architectural comparison. + +## Codex native memory: what it is + +Codex native memory is a core-managed memory pipeline, not just a retrieval +plugin. + +### Pipeline shape + +Codex native memory runs in two phases: + +1. Phase 1 extracts structured memory from eligible rollouts and stores + stage-1 outputs in the state DB. +2. Phase 2 consolidates those stage-1 outputs into durable memory artifacts on + disk and spawns an internal consolidation subagent. + +This is documented in `codex-rs/core/src/memories/README.md`. + +### Prompt integration + +Codex adds memory usage instructions directly into developer instructions when: + +- the memory feature is enabled, +- `config.memories.use_memories` is true, +- memory summary content exists. + +This is wired in `codex-rs/core/src/codex.rs` via +`build_memory_tool_developer_instructions(...)`. + +### Artifact model + +Codex memory produces and maintains: + +- `memory_summary.md` +- `MEMORY.md` +- `raw_memories.md` +- `rollout_summaries/*` +- optional `skills/*` + +These artifacts are not just storage. They are part of how Codex routes future +memory reads and citations. + +### Operational integration + +Codex exposes native memory operations: + +- `UpdateMemories` +- `DropMemories` + +and memory-state controls: + +- `generate_memories` +- `use_memories` +- `no_memories_if_mcp_or_web_search` + +Codex also tracks thread memory-mode transitions such as `polluted`. + +### Citation integration + +Codex has protocol and app-server support for structured memory citations. +Those citations are already part of assistant-message rendering and transport. + +## agentmemory: what it is + +`agentmemory` is not just a memory file or summary generator. It is a +capture, indexing, retrieval, consolidation, MCP, and REST system. + +### Capture model + +The working Claude-oriented setup uses 12 hooks: + +- `SessionStart` +- `UserPromptSubmit` +- `PreToolUse` +- `PostToolUse` +- `PostToolUseFailure` +- `PreCompact` +- `SubagentStart` +- `SubagentStop` +- `Notification` +- `TaskCompleted` +- `Stop` +- `SessionEnd` + +The hook bundle is defined in `plugin/hooks/hooks.json`. + +### Observation flow + +The core runtime pattern is: + +- hooks send observations to REST endpoints, +- observations are deduplicated and privacy-filtered, +- observations are compressed and indexed, +- retrieval returns bounded context back into future sessions. + +The important thing is that capture happens at many lifecycle points, not just +after a Codex-style rollout completes. + +### Retrieval model + +agentmemory uses: + +- BM25, +- vector retrieval, +- graph retrieval, +- Reciprocal Rank Fusion, +- session diversification, +- progressive disclosure. + +This is a genuine retrieval stack, not just a durable handbook. + +### Embeddings + +agentmemory supports multiple embedding providers, including: + +- local embeddings, +- Gemini embeddings, +- OpenAI embeddings, +- Voyage, +- Cohere, +- OpenRouter. + +Gemini embedding support is real in this checkout, not hypothetical. + +### Cross-agent model + +agentmemory is designed as a shared external service: + +- Claude hooks can write to it, +- MCP clients can query it, +- REST clients can integrate with it, +- multiple agent products can share one instance. + +This is a major design difference from Codex native memory. + +## Capability comparison + +### Capture breadth + +Codex native memory: + +- captures memory from rollouts selected by startup pipeline rules, +- is optimized around per-rollout extraction and later consolidation, +- does not expose comparable public lifecycle capture breadth in the current + hook surface. + +agentmemory: + +- captures at many lifecycle points, +- can record prompts, tool usage, failures, compaction moments, and subagent + lifecycle events, +- better matches the event stream of real coding work. + +Verdict: + +- `agentmemory` is materially stronger. + +### Retrieval quality + +Codex native memory: + +- primarily relies on generated memory artifacts, +- injects a read-path and memory summary into the prompt, +- does not show comparable semantic retrieval, vector search, BM25 ranking, or + graph traversal in the native memory path from the current source scan. + +agentmemory: + +- provides hybrid search, +- supports embeddings, +- supports graph-aware retrieval, +- uses token-bounded context assembly. + +Verdict: + +- `agentmemory` is materially stronger. + +### Consolidation sophistication + +Codex native memory: + +- has a robust two-phase extraction and consolidation pipeline, +- uses a dedicated consolidation subagent, +- maintains curated memory artifacts intended for future prompt routing. + +agentmemory: + +- claims 4-tier consolidation and memory evolution, +- versioning, semantic/procedural layers, relation graphs, and cascading + staleness. + +Verdict: + +- `agentmemory` is likely more ambitious and broader, +- Codex native memory is more tightly integrated and operationally simpler + inside Codex. + +### First-party runtime integration + +Codex native memory: + +- is already first-party, +- already has prompt integration, +- already has memory commands, +- already has citations, +- already participates in internal policy/state flows. + +agentmemory: + +- does not automatically provide those Codex-native product behaviors, +- would need a Codex-specific bridge layer to replace them cleanly. + +Verdict: + +- Codex native memory is stronger here. + +### Cross-agent reuse + +Codex native memory: + +- is local to Codex runtime and artifacts. + +agentmemory: + +- is designed for multi-agent reuse through MCP and REST. + +Verdict: + +- `agentmemory` is materially stronger. + +## Is agentmemory a material superset? + +### Yes, in these senses + +agentmemory is a material superset of Codex native memory for: + +- retrieval breadth, +- semantic search, +- embedding-backed lookup, +- graph-backed lookup, +- cross-agent sharing, +- hook-based observation capture. + +### No, in these senses + +agentmemory is not a strict product-level superset of Codex native memory for: + +- first-party startup prompt integration, +- native memory operations (`UpdateMemories`, `DropMemories`), +- native memory citation protocol plumbing, +- thread-level memory-mode semantics such as `polluted`, +- deep alignment with Codex’s state DB and app-server/TUI surfaces. + +The correct judgment is: + +- `agentmemory` is a material capability superset for retrieval and capture, +- not a strict end-to-end replacement unless shims are added. + +The desired fork outcome therefore is: + +- replace Codex native memory internals, +- preserve or rebuild the useful Codex-native user-facing semantics as shims, +- end with a product-level superset even though `agentmemory` alone is not a + strict superset today. + +## Token efficiency + +This is the strongest practical argument in favor of `agentmemory`. + +### Strong evidence in favor of agentmemory + +The `agentmemory` repo explicitly claims and benchmarks token savings: + +- `~1,900` tokens instead of loading all memory into context in + `README.md`, +- `92%` savings in `benchmark/REAL-EMBEDDINGS.md`, +- `86%` savings in `benchmark/QUALITY.md`, +- essentially corpus-size-stable top-K retrieval in `benchmark/SCALE.md`. + +The architectural reason is coherent: + +- retrieval returns top-K results, +- context assembly is bounded, +- compact result-first progressive disclosure reduces unnecessary expansion. + +### Codex native memory token profile + +Codex native memory is not obviously awful on tokens, but it is shaped +differently: + +- `memory_summary.md` injection is truncated to `5,000` tokens in + `codex-rs/core/src/memories/mod.rs`, +- stage-1 rollout processing can consume large inputs because it is an offline + extraction pipeline, not a lightweight query-time retrieval layer, +- the memory read-path instructs the model to query local memory artifacts + rather than receiving a purpose-built top-K retrieval result from a hybrid + search engine. + +### Apples-to-oranges caution + +The token comparison is not perfectly head-to-head. + +agentmemory benchmarks compare against "load everything into context" and +built-in-memory patterns such as monolithic `CLAUDE.md`-style memory files. +Codex native memory is more curated than that: + +- it injects a bounded `memory_summary.md`, +- it exposes a read-path for progressive on-disk lookup, +- it does not appear to simply dump all historical memory into every turn. + +So it would be wrong to claim the benchmark proves "agentmemory is 92% more +token-efficient than Codex native memory" as a verified current fact. + +### Bottom-line token judgment + +Even with that caveat, `agentmemory` is still likely more token-efficient over +the long term than Codex native memory for large corpora because: + +- query-time retrieval is explicitly bounded, +- corpus growth does not force proportional prompt growth, +- embedding + hybrid retrieval reduces the need to over-inject summaries "just + in case", +- progressive disclosure lets the system fetch more only when needed. + +Codex native memory likely remains acceptable for modest corpus sizes, but it +does not appear to have the same query-time retrieval efficiency model. + +## Replacement architecture + +### Option 1: hard replacement + +Disable Codex native memory generation and injection entirely. Make +`agentmemory` the only memory engine. + +Benefits: + +- cleaner mental model, +- no duplicate memory systems, +- retrieval quality and token efficiency become `agentmemory`-driven, +- cross-agent memory reuse becomes first-class. + +Costs: + +- must rebuild startup prompt integration, +- must replace or remove `UpdateMemories` and `DropMemories`, +- must decide what to do about native memory citations, +- must replace or drop `polluted`/thread memory-mode semantics, +- must extend Codex hooks enough to make capture quality acceptable. + +Risk: + +- highest. + +## Native Codex behaviors that replacement must preserve or intentionally drop + +### Must preserve or replace + +- startup injection into developer instructions, +- user-facing operations to refresh or clear memory state, +- some citation strategy if memory provenance is important, +- protocol/app-server awareness of whatever replaces native memory, +- a clear policy for memory invalidation / pollution. + +### Safe to drop if explicitly accepted + +- on-disk `MEMORY.md` / `memory_summary.md` artifact format compatibility, +- the exact current phase-1 / phase-2 internal implementation, +- native Codex consolidation subagent if `agentmemory` becomes authoritative, +- native artifact grooming and rollout summary persistence if the fork no longer + treats those as the canonical memory store. + +## Key risks + +### Duplicate system ambiguity + +If both systems remain partially active, it becomes unclear: + +- which system is authoritative, +- which one should inject context, +- which one should be cited, +- which one should be cleared by a user-facing "drop memories" action. + +Avoid this. + +### Hook-surface insufficiency + +Current Codex hooks are not enough to reproduce Claude-style `agentmemory` +capture quality: + +- only five public events, +- sync command handlers only, +- narrower tool coverage, +- missing public equivalents for several useful lifecycle events. + +If the fork does not extend hooks, the replacement will still leave value on +the table. + +### Protocol and UX regressions + +Dropping native Codex memory without replacing its protocol-level behaviors can +regress: + +- assistant memory citations, +- memory-management commands, +- app-server/TUI expectations around memory-aware behavior. + +### Benchmark over-claiming + +Do not claim: + +- that the `agentmemory` benchmarks directly prove a specific percentage gain + over Codex native memory, +- or that Gemini embeddings alone guarantee better results. + +The right claim is narrower: + +- `agentmemory` has a more scalable retrieval architecture and published token + savings versus all-in-context memory loading approaches, +- and that architecture is likely better long-term than Codex native memory for + large memory corpora. + +## Recommendation + +Target hard replacement as the end state. + +That means: + +1. make `agentmemory` the sole authoritative memory engine, +2. disable Codex native memory generation and consolidation in the final + architecture, +3. rebuild only the Codex-native product semantics worth preserving as shims on + top of `agentmemory`, +4. remove or deprecate native Codex memory artifacts and workflows in the fork + once those shims exist. + +This is the recommended path because it matches the explicit desired outcome: + +- one memory authority, +- no split-brain behavior, +- `agentmemory` for the stronger retrieval and capture substrate, +- Codex integration retained only where it improves the product. + +The fork can still phase the work, but every phase should point toward hard +replacement rather than toward permanent coexistence. + +## Recommended implementation phases + +### Phase 1: decision and contract + +- Decide that `agentmemory` is the primary memory authority. +- Freeze which native Codex behaviors will be preserved. +- Define how startup context injection will work in the fork. +- Decide whether native memory citations remain required. +- Define the end-state explicitly as a functional superset, not a partial port. + +### Phase 2: Codex integration adapter + +- Add a Codex-specific `agentmemory` integration layer. +- Replace startup memory prompt generation with `agentmemory` retrieval. +- Add equivalent user-facing operations for refresh and clear. +- Decide whether these call into `agentmemory` REST/MCP or a local adapter. + +### Phase 3: hook expansion + +- Extend Codex hook coverage enough to support useful `agentmemory` capture. +- Minimum likely useful additions: + - `SessionEnd` + - `PostToolUseFailure` + - `SubagentStart` + - `SubagentStop` + - `Notification` + - `PreCompact` + +### Phase 4: native memory deprecation + +- Turn off Codex native memory generation by default in the fork. +- Remove or quarantine old native memory artifacts once the adapter is stable. +- Preserve migration tooling only if existing users need it. + +### Phase 5: superset hardening + +- Verify that every retained Codex-native memory affordance has an + `agentmemory`-backed implementation or an intentional deletion note. +- Verify that token usage remains bounded as corpus size grows. +- Verify that there is only one authoritative memory source in the runtime. +- Remove any remaining code paths that can accidentally re-enable split-brain + behavior. + +### Phase 6: optional advanced alignment + +- Add memory citation mapping from `agentmemory` results into Codex protocol + structures. +- Add richer protocol and app-server visibility if needed. +- Reassess whether any remaining native memory logic should survive. + +## Do not do + +- Do not run Codex native memory injection and `agentmemory` injection as + equal peers long term. +- Do not claim a strict superset without rebuilding missing Codex-native + semantics. +- Do not clone Claude plugin infrastructure into Codex just to make the + replacement work. +- Do not overfit to Claude-specific bridge behavior such as + `~/.claude/projects/*/memory/MEMORY.md` if Codex is becoming the primary + target. +- Do not remove native memory citations or memory operations accidentally; if + they are dropped, document that as an intentional product change. + +## Acceptance criteria for a forked replacement + +The replacement is successful only if all of these are true: + +- `agentmemory` is the authoritative source for retrieved memory context, +- Codex native memory is no longer an independent competing authority, +- Codex startup injection still works reliably, +- memory refresh and memory clearing remain user-visible operations or are + intentionally removed with docs, +- hook/event coverage is sufficient to produce materially useful observations, +- token usage stays bounded as the corpus grows, +- the fork has a clear provenance story for memory-derived output, +- there is no ambiguity about which memory system is active, +- the resulting user-facing behavior is a practical superset of the two source + systems rather than a regression-heavy swap. + +## Final judgment + +If the question is "is `agentmemory` materially more advanced than Codex +native memory?", the answer is yes. + +If the question is "should a fork disable Codex native memory and replace it +with `agentmemory`?", the answer is: + +- yes, +- with the condition that the fork also rebuild the Codex-native integration + semantics that matter, +- and with the explicit goal of a single authoritative memory system rather + than a permanent hybrid. From 6c5094f86ed22d91f53fbe053125599c47177e7e Mon Sep 17 00:00:00 2001 From: Eric Juta Date: Thu, 26 Mar 2026 13:24:33 +0000 Subject: [PATCH 4/8] docs: tighten agentmemory replacement target\n\nCo-authored-by: Codex --- ...entmemory-codex-memory-replacement-spec.md | 79 +++++++++++++++++-- 1 file changed, 74 insertions(+), 5 deletions(-) diff --git a/docs/agentmemory-codex-memory-replacement-spec.md b/docs/agentmemory-codex-memory-replacement-spec.md index 72cd811a0..6861cd4a7 100644 --- a/docs/agentmemory-codex-memory-replacement-spec.md +++ b/docs/agentmemory-codex-memory-replacement-spec.md @@ -63,6 +63,12 @@ The target end state is: - Codex-native memory generation and consolidation are disabled, - Codex retains or rebuilds only the product-level semantics that still add value, +- the fork uses the full \`agentmemory\` retrieval stack in steady state: + BM25 + vector + graph, +- embeddings are enabled by default in steady state; BM25-only mode is a + fallback, not the target architecture, +- lifecycle capture uses the widest useful hook surface rather than the minimum + viable subset, - the fork presents one coherent memory system to users, - the resulting system is a functional superset of both: - \`agentmemory\` capture and retrieval strengths, @@ -71,6 +77,30 @@ The target end state is: In other words, the desired architecture is a Venn-diagram merge with one authoritative engine, not permanent coexistence of two competing memory stacks. +## Maximum-performance policy + +The intended end state should maximize \`agentmemory\`, not merely adopt it. + +That means: + +- use hybrid retrieval as the primary retrieval path, +- enable embeddings by default in the intended production configuration, +- preserve graph retrieval and relation-aware retrieval as first-class + capabilities, +- use progressive disclosure and token budgets instead of large static memory + injections wherever possible, +- implement enough hook coverage that the observation stream is rich rather + than sparse, +- treat BM25-only mode as an acceptable degraded mode, not as the target. + +Provider policy: + +- support all current \`agentmemory\` embedding providers, +- keep Gemini embeddings available as a first-class provider, +- prefer the best available embedding backend for the environment rather than + hardcoding a low-capability default in the architecture, +- avoid designing the replacement around a no-embeddings baseline. + ## Scope This spec compares: @@ -479,7 +509,8 @@ Costs: - must replace or remove `UpdateMemories` and `DropMemories`, - must decide what to do about native memory citations, - must replace or drop `polluted`/thread memory-mode semantics, -- must extend Codex hooks enough to make capture quality acceptable. +- must extend Codex hooks enough to make capture quality fully competitive with + the `agentmemory` model rather than merely acceptable. Risk: @@ -553,6 +584,20 @@ The right claim is narrower: - and that architecture is likely better long-term than Codex native memory for large memory corpora. +### Performance-oriented token policy + +The intended architecture should optimize for query-time token efficiency, not +artifact compatibility. + +That means: + +- prefer top-K retrieval over broad handbook injection, +- keep startup context bounded and relevance-ranked, +- expand details only on demand, +- avoid recreating a large static `MEMORY.md`-style injection layer on top of + `agentmemory`, +- measure steady-state tokens/query as a first-class success metric. + ## Recommendation Target hard replacement as the end state. @@ -593,17 +638,33 @@ replacement rather than toward permanent coexistence. - Replace startup memory prompt generation with `agentmemory` retrieval. - Add equivalent user-facing operations for refresh and clear. - Decide whether these call into `agentmemory` REST/MCP or a local adapter. +- Route startup injection through the bounded `agentmemory` retrieval path + rather than recreating Codex-native memory artifact loading. +- Make token budget, retrieval mode, and expansion behavior explicit parts of + the adapter contract. ### Phase 3: hook expansion -- Extend Codex hook coverage enough to support useful `agentmemory` capture. -- Minimum likely useful additions: - - `SessionEnd` +- Extend Codex hook coverage to support the full useful `agentmemory` + observation model, not just a minimum subset. +- Target the full current `agentmemory` hook set: + - `SessionStart` + - `UserPromptSubmit` + - `PreToolUse` + - `PostToolUse` - `PostToolUseFailure` + - `PreCompact` - `SubagentStart` - `SubagentStop` - `Notification` - - `PreCompact` + - `TaskCompleted` + - `Stop` + - `SessionEnd` +- Broaden `PreToolUse` and `PostToolUse` beyond the current shell-centric + path so file tools, command tools, and other high-signal tool classes are + observed consistently. +- Do not treat hook expansion as optional polish; it is core to achieving the + high-performance end state. ### Phase 4: native memory deprecation @@ -619,6 +680,10 @@ replacement rather than toward permanent coexistence. - Verify that there is only one authoritative memory source in the runtime. - Remove any remaining code paths that can accidentally re-enable split-brain behavior. +- Verify that embeddings, graph retrieval, and progressive disclosure are + active in the intended steady-state configuration. +- Verify that the system is not silently falling back to a lower-capability + retrieval mode in normal operation. ### Phase 6: optional advanced alignment @@ -652,6 +717,10 @@ The replacement is successful only if all of these are true: intentionally removed with docs, - hook/event coverage is sufficient to produce materially useful observations, - token usage stays bounded as the corpus grows, +- the intended steady state uses embeddings and hybrid retrieval rather than a + degraded BM25-only baseline, +- Gemini or another high-quality embedding provider remains available as a + first-class configuration path, - the fork has a clear provenance story for memory-derived output, - there is no ambiguity about which memory system is active, - the resulting user-facing behavior is a practical superset of the two source From fb371104a008dfed7a850747bd04c03e9c412408 Mon Sep 17 00:00:00 2001 From: Eric Juta Date: Thu, 26 Mar 2026 13:26:18 +0000 Subject: [PATCH 5/8] docs: note agentmemory env alignment\n\nCo-authored-by: Codex --- ...entmemory-codex-memory-replacement-spec.md | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/docs/agentmemory-codex-memory-replacement-spec.md b/docs/agentmemory-codex-memory-replacement-spec.md index 6861cd4a7..7c09b2b69 100644 --- a/docs/agentmemory-codex-memory-replacement-spec.md +++ b/docs/agentmemory-codex-memory-replacement-spec.md @@ -164,6 +164,39 @@ The local `agentmemory` checkout is currently dirty. This matters only as a reminder not to treat the local repo state as release-tagged truth; the source shape is still adequate for architectural comparison. +### Current env alignment + +The live worker configuration is not sourced from +`~/Projects/agentmemory/.env`. In this checkout, `docker-compose.yml` points +the worker at: + +- `\${HOME}/.agentmemory/.env` + +Current externally loaded env alignment, verified in redacted form: + +- `GEMINI_API_KEY` is present, +- `GEMINI_MODEL` is present, +- `GEMINI_EMBEDDING_MODEL` is present, +- `GEMINI_EMBEDDING_DIMENSIONS` is present, +- `GRAPH_EXTRACTION_ENABLED` is present, +- `CONSOLIDATION_ENABLED` is present. + +Implications: + +- the current live environment already aligns with Gemini-first provider + selection, +- embedding auto-detection should resolve to Gemini unless explicitly + overridden, +- graph extraction and consolidation are already enabled in the current + external env, +- the current external env does not explicitly pin `EMBEDDING_PROVIDER`, + `TOKEN_BUDGET`, `BM25_WEIGHT`, `VECTOR_WEIGHT`, or + `FALLBACK_PROVIDERS`, so those currently rely on code defaults rather than + explicit ops policy. + +For a maximum-performance steady state, that last point should be treated as a +configuration gap, not as the desired final setup. + ## Codex native memory: what it is Codex native memory is a core-managed memory pipeline, not just a retrieval From 0d6d7053402572eebd45384302d91f87a145db3c Mon Sep 17 00:00:00 2001 From: Eric Juta Date: Thu, 26 Mar 2026 16:01:24 +0000 Subject: [PATCH 6/8] docs: add replacement execution plan\n\nCo-authored-by: Codex --- ...entmemory-codex-memory-replacement-spec.md | 238 ++++++++++++++++++ 1 file changed, 238 insertions(+) diff --git a/docs/agentmemory-codex-memory-replacement-spec.md b/docs/agentmemory-codex-memory-replacement-spec.md index 7c09b2b69..17fc2cf37 100644 --- a/docs/agentmemory-codex-memory-replacement-spec.md +++ b/docs/agentmemory-codex-memory-replacement-spec.md @@ -725,6 +725,244 @@ replacement rather than toward permanent coexistence. - Add richer protocol and app-server visibility if needed. - Reassess whether any remaining native memory logic should survive. +## Execution plan + +This section turns the replacement architecture into a low-rebase execution +plan. + +The key rule is: + +- keep invasive edits concentrated in a few upstream-hot orchestration files, +- keep most new logic in fork-owned modules, +- gate native behavior off before deleting it. + +### Allowed write boundaries + +The preferred fork seam is: + +- small edits in: + - `codex-rs/core/src/codex.rs` + - `codex-rs/core/src/hook_runtime.rs` + - `codex-rs/hooks/src/engine/config.rs` + - `codex-rs/hooks/src/engine/discovery.rs` + - hook event/schema files only when required for new public events +- most new implementation in new fork-owned modules, for example: + - `codex-rs/core/src/agentmemory/` + - `codex-rs/hooks/src/agentmemory/` or equivalent hook-translation module + +### Intentionally untouched until cutover + +Do not broadly rewrite these early: + +- `codex-rs/core/src/memories/*` +- `codex-rs/core/templates/memories/*` +- native memory artifact generation logic +- broad protocol/app-server surfaces unrelated to memory provenance + +Early phases should gate or bypass these paths, not delete or refactor them. + +### Branch order + +Use a short stack of focused branches / PRs. + +#### PR 1: backend selector and fork seam + +Goal: + +- introduce a clear memory backend selector, +- add the new `agentmemory` adapter module skeleton, +- make no user-visible behavior change yet. + +Write scope: + +- config wiring, +- new adapter modules, +- minimal callsite plumbing only where needed. + +Must not do: + +- no native memory deletion, +- no protocol changes, +- no hook expansion yet. + +Merge gate: + +- no behavior regression with native memory still active by default, +- docs updated to describe the seam. + +#### PR 2: startup injection replacement + +Goal: + +- route startup memory injection through the `agentmemory` adapter, +- make bounded retrieval the new startup path, +- stop depending on native memory artifact loading for startup context. + +Write scope: + +- `codex-rs/core/src/codex.rs` +- adapter module +- minimal config/docs updates + +Must not do: + +- do not delete native memories yet, +- do not add broad protocol changes, +- do not expand hook coverage in the same PR. + +Merge gate: + +- startup context is sourced from `agentmemory`, +- token budget and retrieval mode are explicit and tested, +- no static `MEMORY.md`-style reinjection layer is recreated on top. + +#### PR 3: public hook event expansion + +Goal: + +- expand Codex hooks to cover the full useful `agentmemory` hook set: + - `SessionStart` + - `UserPromptSubmit` + - `PreToolUse` + - `PostToolUse` + - `PostToolUseFailure` + - `PreCompact` + - `SubagentStart` + - `SubagentStop` + - `Notification` + - `TaskCompleted` + - `Stop` + - `SessionEnd` + +Write scope: + +- hook config/schema/discovery/runtime files, +- TUI/app-server visibility only where hook runs need surfacing. + +Must not do: + +- do not mix in native memory deletion, +- do not mix in citation replacement. + +Merge gate: + +- each event has runtime dispatch, +- each event is documented, +- hook run visibility remains coherent. + +#### PR 4: tool coverage broadening + +Goal: + +- broaden `PreToolUse` and `PostToolUse` beyond the shell-centric path, +- ensure file tools, command tools, and other high-signal tool classes are + observed consistently for `agentmemory`. + +Write scope: + +- `codex-rs/core/src/hook_runtime.rs` +- tool handler payload plumbing +- hook translation layer + +Must not do: + +- do not mix in memory command replacement, +- do not delete native memory paths yet. + +Merge gate: + +- high-signal tool classes emit useful observation payloads, +- no regression in existing shell-hook flows. + +#### PR 5: memory ops and provenance replacement + +Goal: + +- replace or redefine `UpdateMemories` and `DropMemories`, +- decide and implement provenance behavior, +- define the replacement for native `polluted` semantics. + +Write scope: + +- memory command handlers, +- provenance/citation integration, +- minimal protocol additions if absolutely required. + +Must not do: + +- do not combine this with broad deletion of native memory code. + +Merge gate: + +- user-facing memory refresh/clear actions still exist or are intentionally + documented as removed, +- provenance behavior is explicit, +- no ambiguity remains about memory invalidation rules. + +#### PR 6: hard cutover + +Goal: + +- disable native memory generation and consolidation in normal runtime paths, +- make `agentmemory` the only authoritative memory backend, +- quarantine or deprecate native memory artifacts. + +Write scope: + +- backend selection defaults, +- final cutover gating, +- cleanup of callsites that can still route to native memory. + +Must not do: + +- do not do broad code deletion unless the fork is already stable after cutover, +- do not remove debug/rollback switches until at least one successful rebase + cycle after cutover. + +Merge gate: + +- one memory authority in runtime, +- no split-brain injection, +- no accidental native fallback in standard flows. + +#### PR 7: post-cutover cleanup + +Goal: + +- remove dead native-memory paths only after the hard cutover has stabilized. + +Write scope: + +- native memory code and docs that are no longer reachable, +- migration notes if existing users need them. + +Merge gate: + +- cleanup produces less rebase churn than it creates, +- rollback path is no longer needed. + +### Rebase policy + +- Rebase frequently; do not let this stack drift for long. +- Rebase before opening each PR and after any upstream changes touching: + - `codex-rs/core/src/codex.rs` + - `codex-rs/core/src/hook_runtime.rs` + - hook engine config/discovery/schema/runtime files +- Prefer new modules over editing existing modules repeatedly. +- If a behavior can live in the adapter, keep it out of upstream-hot files. +- Do not delete upstream code early; disabling is cheaper to rebase than + removal. + +### Success metrics by PR + +- PR 1: seam exists with no behavior regression. +- PR 2: startup injection is `agentmemory`-backed and token-bounded. +- PR 3: hook surface matches the intended `agentmemory` event model. +- PR 4: observation capture is rich across the important tool classes. +- PR 5: memory ops and provenance no longer depend on native memory internals. +- PR 6: runtime has one authoritative memory backend. +- PR 7: dead code removal does not increase future rebase cost materially. + ## Do not do - Do not run Codex native memory injection and `agentmemory` injection as From 3792d2fb8bb89309020c9b0d9401b3106428d8c4 Mon Sep 17 00:00:00 2001 From: Eric Juta Date: Thu, 26 Mar 2026 16:10:33 +0000 Subject: [PATCH 7/8] docs: add handoff prompts to replacement plan\n\nCo-authored-by: Codex --- ...entmemory-codex-memory-replacement-spec.md | 181 ++++++++++++++++++ 1 file changed, 181 insertions(+) diff --git a/docs/agentmemory-codex-memory-replacement-spec.md b/docs/agentmemory-codex-memory-replacement-spec.md index 17fc2cf37..451fa4a4c 100644 --- a/docs/agentmemory-codex-memory-replacement-spec.md +++ b/docs/agentmemory-codex-memory-replacement-spec.md @@ -963,6 +963,187 @@ Merge gate: - PR 6: runtime has one authoritative memory backend. - PR 7: dead code removal does not increase future rebase cost materially. +### Handoff prompts by PR + +These are intended as copy-paste prompts for future sessions, child agents, or +parallel worker swarms. Each prompt is deliberately scoped to one PR-sized +slice. + +#### PR 1 handoff prompt + +```text +Implement PR 1 from docs/agentmemory-codex-memory-replacement-spec.md. + +Goal: +- introduce a clear memory backend selector +- add the new agentmemory adapter seam +- make no user-visible behavior change yet + +Constraints: +- keep invasive edits concentrated +- do not delete or broadly rewrite codex-rs/core/src/memories/* +- do not change protocol shapes +- do not expand hooks yet + +Write scope: +- config wiring +- new fork-owned adapter modules +- minimal callsite plumbing only where needed + +Acceptance: +- native memory remains default and behaviorally unchanged +- the seam exists and is documented +- code is structured so later PRs can route through the adapter without large rewrites +``` + +#### PR 2 handoff prompt + +```text +Implement PR 2 from docs/agentmemory-codex-memory-replacement-spec.md. + +Goal: +- replace startup memory prompt generation with agentmemory-backed retrieval +- make startup context bounded, relevance-ranked, and token-budgeted + +Constraints: +- do not recreate static MEMORY.md-style loading on top of agentmemory +- do not expand hooks in this PR +- do not delete native memory code yet + +Write scope: +- codex-rs/core/src/codex.rs +- agentmemory adapter module +- small config/docs updates if required + +Acceptance: +- startup injection is sourced through the adapter +- retrieval mode and token budget are explicit +- native memory still exists only as a gated fallback path, not the main path +``` + +#### PR 3 handoff prompt + +```text +Implement PR 3 from docs/agentmemory-codex-memory-replacement-spec.md. + +Goal: +- expand Codex public hooks to support the full useful agentmemory event model + +Target events: +- SessionStart +- UserPromptSubmit +- PreToolUse +- PostToolUse +- PostToolUseFailure +- PreCompact +- SubagentStart +- SubagentStop +- Notification +- TaskCompleted +- Stop +- SessionEnd + +Constraints: +- keep handler semantics coherent +- do not mix in native memory deletion +- do not mix in provenance/citation replacement + +Acceptance: +- each target event is represented in config/discovery/runtime +- documentation and hook-run visibility are updated +- new events do not regress existing hook behavior +``` + +#### PR 4 handoff prompt + +```text +Implement PR 4 from docs/agentmemory-codex-memory-replacement-spec.md. + +Goal: +- broaden PreToolUse and PostToolUse beyond the shell-centric path +- ensure high-signal tool classes produce useful agentmemory observations + +Constraints: +- prioritize file tools, command tools, and other high-signal tool classes +- do not mix in memory command replacement +- do not cut over the backend here + +Acceptance: +- important tool classes emit observation payloads consistently +- shell-hook behavior still works +- capture quality is materially closer to the Claude-side agentmemory model +``` + +#### PR 5 handoff prompt + +```text +Implement PR 5 from docs/agentmemory-codex-memory-replacement-spec.md. + +Goal: +- replace or redefine UpdateMemories and DropMemories +- decide and implement provenance behavior +- define the replacement for native polluted semantics + +Constraints: +- keep protocol churn minimal unless required +- make user-facing behavior explicit +- do not delete native memory paths in this PR + +Acceptance: +- memory refresh/clear actions still exist or are intentionally removed with docs +- provenance behavior is explicit +- invalidation rules are no longer ambiguous +``` + +#### PR 6 handoff prompt + +```text +Implement PR 6 from docs/agentmemory-codex-memory-replacement-spec.md. + +Goal: +- make agentmemory the only authoritative runtime memory backend +- disable native memory generation and consolidation in normal runtime paths + +Constraints: +- do not do broad dead-code deletion yet +- keep rollback/debug switches until cutover is validated + +Acceptance: +- one memory authority remains in runtime +- no split-brain injection is possible in standard flows +- native paths are gated off rather than accidentally still active +``` + +#### PR 7 handoff prompt + +```text +Implement PR 7 from docs/agentmemory-codex-memory-replacement-spec.md. + +Goal: +- perform post-cutover cleanup only after the hard replacement is stable + +Constraints: +- prefer cleanup that reduces future rebase cost +- do not remove rollback/debug tooling prematurely + +Acceptance: +- dead native-memory paths are removed only when safe +- cleanup does not create more rebase drag than it removes +``` + +#### Cross-PR reviewer prompt + +```text +Review the current PR against docs/agentmemory-codex-memory-replacement-spec.md. + +Focus: +- does this PR stay within its assigned write boundary +- does it reduce or increase future rebase drag +- does it preserve the hard-replacement target +- does it accidentally introduce split-brain behavior +- does it move the system toward maximum-performance agentmemory usage rather than a degraded fallback +``` + ## Do not do - Do not run Codex native memory injection and `agentmemory` injection as From 7651a9be717bcd37cd087639f7d0aaf653222e7f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 26 Mar 2026 16:25:37 +0000 Subject: [PATCH 8/8] build(deps): bump pulldown-cmark from 0.10.3 to 0.13.3 in /codex-rs Bumps [pulldown-cmark](https://github.com/raphlinus/pulldown-cmark) from 0.10.3 to 0.13.3. - [Release notes](https://github.com/raphlinus/pulldown-cmark/releases) - [Commits](https://github.com/raphlinus/pulldown-cmark/compare/v0.10.3...v0.13.3) --- updated-dependencies: - dependency-name: pulldown-cmark dependency-version: 0.13.3 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- codex-rs/Cargo.lock | 8 ++++---- codex-rs/Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index 7a554c2bc..f6b2790f1 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -7534,9 +7534,9 @@ checksum = "33cb294fe86a74cbcf50d4445b37da762029549ebeea341421c7c70370f86cac" [[package]] name = "pulldown-cmark" -version = "0.10.3" +version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76979bea66e7875e7509c4ec5300112b316af87fa7a252ca91c448b32dfe3993" +checksum = "7c3a14896dfa883796f1cb410461aef38810ea05f2b2c33c5aded3649095fdad" dependencies = [ "bitflags 2.10.0", "getopts", @@ -7547,9 +7547,9 @@ dependencies = [ [[package]] name = "pulldown-cmark-escape" -version = "0.10.1" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd348ff538bc9caeda7ee8cad2d1d48236a1f443c1fa3913c6a02fe0043b1dd3" +checksum = "007d8adb5ddab6f8e3f491ac63566a7d5002cc7ed73901f72057943fa71ae1ae" [[package]] name = "pxfm" diff --git a/codex-rs/Cargo.toml b/codex-rs/Cargo.toml index 556976fc1..036761c78 100644 --- a/codex-rs/Cargo.toml +++ b/codex-rs/Cargo.toml @@ -261,7 +261,7 @@ pathdiff = "0.2" portable-pty = "0.9.0" predicates = "3" pretty_assertions = "1.4.1" -pulldown-cmark = "0.10" +pulldown-cmark = "0.13" quick-xml = "0.38.4" rand = "0.9" ratatui = "0.29.0"