From 988b868cfeea7981d68c4e42904f1bd7489a7342 Mon Sep 17 00:00:00 2001
From: Eric Juta <ericjohnjuta@gmail.com>
Date: Thu, 26 Mar 2026 12:58:16 +0000
Subject: [PATCH 1/8] docs: add Claude Code hooks parity
 spec\n\nCo-authored-by: Codex <noreply@openai.com>

---
 docs/claude-code-hooks-parity.md | 275 +++++++++++++++++++++++++++++++
 1 file changed, 275 insertions(+)
 create mode 100644 docs/claude-code-hooks-parity.md

diff --git a/docs/claude-code-hooks-parity.md b/docs/claude-code-hooks-parity.md
new file mode 100644
index 000000000..1094e6cf2
--- /dev/null
+++ b/docs/claude-code-hooks-parity.md
@@ -0,0 +1,275 @@
+# Claude Code hooks parity
+
+This document captures the current Codex hooks surface and the remaining
+feature-parity gap versus Claude Code's documented hooks system. It is intended
+to be the canonical planning doc for expanding `codex_hooks`.
+
+## Goal
+
+Bring Codex's public `hooks.json` lifecycle hooks close enough to Claude
+Code's model that Claude-oriented hook setups can be ported with predictable,
+documented edits rather than custom runtime patches.
+
+This does not require byte-for-byte compatibility in one step. It does require:
+
+- matching the major public event categories users expect,
+- supporting the handler types those configurations rely on,
+- honoring documented decision-control fields when they are accepted by schema,
+- documenting any intentional deltas that remain.
+
+## Current Codex surface
+
+Today Codex exposes five public `hooks.json` event groups:
+
+- `PreToolUse`
+- `PostToolUse`
+- `SessionStart`
+- `UserPromptSubmit`
+- `Stop`
+
+The current engine only executes synchronous command handlers. `prompt`,
+`agent`, and `async` configurations are parsed but skipped with warnings.
+
+The current runtime also has narrower execution coverage than Claude Code:
+
+- `PreToolUse` and `PostToolUse` are currently wired through the shell path,
+  with runtime requests using `tool_name: "Bash"`.
+- `UserPromptSubmit` and `Stop` ignore matchers.
+- some wire fields are present in schema but are rejected by the output parser
+  as unsupported.
+
+Legacy internal paths still exist for notification-style hooks
+(`AfterAgent` / deprecated `AfterToolUse`), but they are not part of the
+public `hooks.json` contract.
+
+## Claude Code parity gap
+
+Claude Code's current hooks reference documents a larger event surface and more
+handler modes than Codex currently supports.
+
+### Missing event coverage
+
+Codex does not yet expose public `hooks.json` support for these documented
+Claude Code event families:
+
+- `InstructionsLoaded`
+- `PermissionRequest`
+- `PostToolUseFailure`
+- `Notification`
+- `SubagentStart`
+- `SubagentStop`
+- `StopFailure`
+- `TeammateIdle`
+- `TaskCompleted`
+- `ConfigChange`
+- `CwdChanged`
+- `FileChanged`
+- `WorktreeCreate`
+- `WorktreeRemove`
+- `PreCompact`
+- `PostCompact`
+- `SessionEnd`
+- `Elicitation`
+- `ElicitationResult`
+
+### Missing handler coverage
+
+Codex does not yet support these Claude Code hook handler categories in the
+public engine:
+
+- async command hooks,
+- HTTP hooks,
+- prompt hooks,
+- agent hooks.
+
+### Partial decision-control coverage
+
+Codex schema already models some advanced fields, but runtime support is still
+partial:
+
+- `PreToolUse.updatedInput` is rejected.
+- `PreToolUse.additionalContext` is rejected.
+- `PreToolUse.permissionDecision: allow` is rejected.
+- `PreToolUse.permissionDecision: ask` is rejected.
+- `PostToolUse.updatedMCPToolOutput` is rejected.
+- `suppressOutput` is rejected for `PreToolUse` and `PostToolUse`.
+- `stopReason` and `continue: false` are rejected for `PreToolUse`.
+
+This creates a confusing state where the schema shape suggests broader support
+than the runtime actually honors.
+
+### Tool and matcher parity gaps
+
+- `PreToolUse` and `PostToolUse` should evolve from shell-centric wiring to
+  a consistent tool-event contract across relevant tool classes.
+- matcher support should be explicit and consistent across all events that
+  Claude users expect to filter.
+- MCP-aware hook behavior should be designed as first-class runtime behavior,
+  not as a schema placeholder.
+
+## Non-goals
+
+- Reproducing Claude Code internals exactly where Codex architecture differs.
+- Preserving every existing partial or deprecated behavior forever.
+- Adding public hook types without app-server, TUI, and docs visibility for the
+  resulting runs.
+
+## Design principles
+
+- **Public contract first**: do not expose schema fields that the runtime will
+  immediately reject unless they are clearly marked unsupported.
+- **Event completeness over aliases**: add real lifecycle events before adding
+  compatibility shims.
+- **One event, one payload contract**: every public event needs stable input and
+  output schema fixtures, runtime execution, and surfaced hook-run reporting.
+- **Fail-open unless explicitly blocking**: invalid hook output should not cause
+  surprising hard failures outside events whose contract is intentionally
+  blocking.
+- **No hidden UI drift**: hook additions must be visible in the TUI and
+  app-server surfaces anywhere hook runs are rendered today.
+
+## Implementation plan
+
+### Phase 1: make the current public surface coherent
+
+Goal: remove misleading partial support inside the existing five events.
+
+Required work:
+
+- align schema and parser behavior for the five existing events,
+- either implement or remove unsupported schema fields that are already emitted
+  in fixtures,
+- document matcher behavior explicitly,
+- document current shell-centric tool coverage explicitly,
+- add a dedicated user-facing reference doc for `hooks.json` behavior if the
+  main docs site still only mentions legacy notification hooks.
+
+Acceptance:
+
+- no schema field is silently accepted but runtime-rejected without explicit
+  documentation,
+- the docs explain exactly which event fields and decisions are live,
+- existing five-event behavior is covered by tests and schema fixtures.
+
+### Phase 2: expand event coverage on the existing command-hook engine
+
+Goal: add missing lifecycle events before broadening handler types.
+
+Priority order:
+
+1. `PermissionRequest`
+2. `Notification`
+3. `SubagentStart` and `SubagentStop`
+4. `PostToolUseFailure` and `StopFailure`
+5. `SessionEnd`
+6. `ConfigChange`, `CwdChanged`, and `FileChanged`
+7. `PreCompact` and `PostCompact`
+8. `TaskCompleted` and `TeammateIdle`
+9. `InstructionsLoaded`
+10. `WorktreeCreate` and `WorktreeRemove`
+11. `Elicitation` and `ElicitationResult`
+
+Acceptance:
+
+- each event has an input schema fixture,
+- each event has runtime dispatch wiring,
+- each event emits `HookStarted` and `HookCompleted` consistently,
+- each event has an explicit matcher story,
+- docs list the event as supported.
+
+### Phase 3: broaden handler types
+
+Goal: match the main Claude Code hook execution modes.
+
+Required work:
+
+- implement async command hooks,
+- add HTTP hook handlers,
+- add prompt hook handlers,
+- add agent hook handlers,
+- surface handler type and execution mode accurately in run summaries.
+
+Acceptance:
+
+- discovery no longer skips supported handler types with warnings,
+- `HookRunSummary` reports real handler type and execution mode,
+- command, HTTP, prompt, and agent handlers have stable input/output contracts,
+- async execution semantics are documented, especially ordering and failure
+  behavior.
+
+### Phase 4: close decision-control parity gaps
+
+Goal: implement or explicitly drop advanced output fields.
+
+Required work:
+
+- decide whether `PreToolUse.updatedInput` will be supported in Codex,
+- decide whether `PreToolUse.permissionDecision: ask` maps to an approval
+  prompt, a model-visible continuation, or remains unsupported,
+- implement `additionalContext` anywhere the contract claims it exists,
+- decide whether `PostToolUse.updatedMCPToolOutput` is part of the public
+  runtime contract,
+- review event-specific `continue`, `stopReason`, and `suppressOutput`
+  semantics for consistency.
+
+Acceptance:
+
+- advanced hook output fields are either implemented end-to-end or removed from
+  public schema,
+- runtime behavior matches docs and tests,
+- no event-specific decision-control behavior relies on undocumented parser
+  special cases.
+
+### Phase 5: tool-class parity for pre/post tool hooks
+
+Goal: make tool hooks genuinely tool-aware rather than shell-specific.
+
+Required work:
+
+- define which Codex tool classes participate in `PreToolUse` and
+  `PostToolUse`,
+- expose stable tool identifiers and input payloads for those classes,
+- define MCP-tool matcher behavior explicitly,
+- preserve backward compatibility for existing Bash-oriented hooks where
+  feasible.
+
+Acceptance:
+
+- users can target more than the shell path with pre/post tool hooks,
+- tool names and payloads are documented and stable,
+- MCP tool behavior is implemented rather than placeholder-only.
+
+## Required cross-cutting work
+
+- update docs under `docs/` when public behavior changes,
+- keep generated schema fixtures in sync,
+- extend TUI and app-server visibility for new hook events when needed,
+- add focused tests for parser behavior, discovery behavior, and runtime
+  dispatch,
+- decide whether legacy notification hooks remain supported long term or are
+  explicitly deprecated in docs.
+
+## Open decisions
+
+- Should Codex aim for Claude-compatible field names and semantics wherever
+  possible, or only for event-name parity?
+- Should prompt and agent hooks be first-class in the initial public contract,
+  or stay experimental behind feature flags after implementation?
+- Should unsupported advanced fields be removed now to reduce confusion, or kept
+  in schema as forward-compatibility placeholders?
+- Which events should be thread-scoped versus turn-scoped in app-server and TUI
+  reporting?
+
+## Recommended first implementation slice
+
+If this work is started incrementally, the highest-leverage first slice is:
+
+1. publish a real user-facing hooks reference for Codex,
+2. make the existing five events internally coherent,
+3. add `PermissionRequest`, `Notification`, `SubagentStart`,
+   `SubagentStop`, and `SessionEnd`,
+4. then add async and HTTP handler support.
+
+That sequence closes the largest user-visible parity gaps without mixing event
+expansion, execution-model expansion, and advanced mutation semantics into one
+hard-to-review change.

From 3539af39303876c5c1a3d2ec3996a44b0f2aae32 Mon Sep 17 00:00:00 2001
From: Eric Juta <ericjohnjuta@gmail.com>
Date: Thu, 26 Mar 2026 12:59:55 +0000
Subject: [PATCH 2/8] docs: tighten hooks parity handoff\n\nCo-authored-by:
 Codex <noreply@openai.com>

---
 docs/claude-code-hooks-parity.md | 71 ++++++++++++++++++++++++++++++++
 1 file changed, 71 insertions(+)

diff --git a/docs/claude-code-hooks-parity.md b/docs/claude-code-hooks-parity.md
index 1094e6cf2..02c3dd7ae 100644
--- a/docs/claude-code-hooks-parity.md
+++ b/docs/claude-code-hooks-parity.md
@@ -17,6 +17,40 @@ This does not require byte-for-byte compatibility in one step. It does require:
 - honoring documented decision-control fields when they are accepted by schema,
 - documenting any intentional deltas that remain.
 
+## Read order
+
+If you are implementing against this doc, read the current source in this order:
+
+1. `docs/claude-code-hooks-parity.md`
+2. `codex-rs/hooks/src/engine/config.rs`
+3. `codex-rs/hooks/src/engine/discovery.rs`
+4. `codex-rs/hooks/src/schema.rs`
+5. `codex-rs/hooks/src/engine/output_parser.rs`
+6. `codex-rs/core/src/hook_runtime.rs`
+7. `codex-rs/core/src/codex.rs`
+8. `codex-rs/core/src/tools/registry.rs`
+
+This order moves from public contract to discovery, then schema, then parser,
+then runtime wiring, then legacy behavior.
+
+## Current source snapshot
+
+This doc is based on the current implementation shape in this checkout:
+
+- public `hooks.json` event groups are defined in
+  `codex-rs/hooks/src/engine/config.rs`,
+- handler discovery and unsupported-handler warnings live in
+  `codex-rs/hooks/src/engine/discovery.rs`,
+- public wire schema lives in `codex-rs/hooks/src/schema.rs`,
+- output acceptance and rejection behavior lives in
+  `codex-rs/hooks/src/engine/output_parser.rs`,
+- runtime dispatch for start, prompt-submit, pre-tool, and post-tool hooks
+  lives in `codex-rs/core/src/hook_runtime.rs`,
+- `Stop` hook wiring lives in `codex-rs/core/src/codex.rs`,
+- deprecated legacy `AfterToolUse` dispatch still exists in
+  `codex-rs/core/src/tools/registry.rs`,
+- no repository-local `hooks.json` files are checked into this tree today.
+
 ## Current Codex surface
 
 Today Codex exposes five public `hooks.json` event groups:
@@ -128,8 +162,34 @@ than the runtime actually honors.
 - **No hidden UI drift**: hook additions must be visible in the TUI and
   app-server surfaces anywhere hook runs are rendered today.
 
+## Do not do
+
+- Do not add a new public event without input schema, runtime dispatch,
+  hook-run reporting, and docs in the same lane.
+- Do not keep wire fields in public schema as if they are live when the parser
+  still rejects them.
+- Do not use deprecated `AfterAgent` or legacy `AfterToolUse` internals as
+  the long-term public parity path.
+- Do not widen event coverage while leaving handler type and execution mode
+  reporting misleading in run summaries.
+- Do not make hook support TUI-only; app-server and protocol surfaces must stay
+  aligned.
+
 ## Implementation plan
 
+### Branch and PR order
+
+Prefer this implementation order:
+
+1. contract cleanup for the existing five events,
+2. runtime event expansion on the command-hook engine,
+3. handler-type and execution-mode expansion,
+4. advanced decision-control support,
+5. pre/post tool-class parity work,
+6. final doc consolidation and examples.
+
+Do not mix all six into one change. Keep each lane reviewable.
+
 ### Phase 1: make the current public surface coherent
 
 Goal: remove misleading partial support inside the existing five events.
@@ -249,6 +309,17 @@ Acceptance:
 - decide whether legacy notification hooks remain supported long term or are
   explicitly deprecated in docs.
 
+## Acceptance gates for any implementation PR
+
+Every parity PR should satisfy all of these before merge:
+
+- docs updated for the newly supported behavior,
+- generated hook schema fixtures updated if the public schema changed,
+- focused tests added or updated for discovery, parser, and runtime behavior,
+- hook run summaries still render correctly in TUI and app-server surfaces,
+- unsupported behavior is either removed from schema or clearly documented as
+  unsupported.
+
 ## Open decisions
 
 - Should Codex aim for Claude-compatible field names and semantics wherever

From 40774ed67d691220a1c02b1fa50ab13d09239ca3 Mon Sep 17 00:00:00 2001
From: Eric Juta <ericjohnjuta@gmail.com>
Date: Thu, 26 Mar 2026 13:19:14 +0000
Subject: [PATCH 3/8] docs: add agentmemory replacement spec\n\nCo-authored-by:
 Codex <noreply@openai.com>

---
 ...entmemory-codex-memory-replacement-spec.md | 672 ++++++++++++++++++
 1 file changed, 672 insertions(+)
 create mode 100644 docs/agentmemory-codex-memory-replacement-spec.md

diff --git a/docs/agentmemory-codex-memory-replacement-spec.md b/docs/agentmemory-codex-memory-replacement-spec.md
new file mode 100644
index 000000000..72cd811a0
--- /dev/null
+++ b/docs/agentmemory-codex-memory-replacement-spec.md
@@ -0,0 +1,672 @@
+# agentmemory replacement spec for Codex native memory
+
+This document evaluates whether a forked Codex should disable the current
+first-party memory system and replace it with
+`~/Projects/agentmemory` as the primary memory engine.
+
+It is intended to be the canonical decision and implementation handoff for this
+specific question:
+
+- is `agentmemory` materially more capable than Codex native memory,
+- is it likely more token-efficient over time,
+- if so, what would be lost by replacing Codex native memory,
+- what replacement shape is worth building in a fork.
+
+This is an architecture and product-integration spec, not a request to
+implement the replacement immediately.
+
+## Executive summary
+
+`agentmemory` is materially more advanced than Codex native memory as a
+capture and retrieval engine.
+
+The strongest deltas are:
+
+- broader lifecycle capture through a wider hook surface,
+- hybrid retrieval (BM25 + vector + graph),
+- pluggable embeddings including Gemini,
+- cross-agent MCP and REST exposure,
+- retrieval bounded by top-K / token-budgeted context instead of relying on a
+  prebuilt local memory summary alone.
+
+Codex native memory is still stronger in first-party runtime integration:
+
+- startup memory generation is built into the core session lifecycle,
+- memory artifacts are deeply integrated into prompt construction,
+- memory citations already flow through protocol and app-server surfaces,
+- there are explicit local operations for memory refresh and memory removal,
+- native memory state includes thread-level `memory_mode` semantics such as
+  `disabled` and `polluted`.
+
+Conclusion:
+
+- `agentmemory` is a material capability superset for memory retrieval and
+  capture quality.
+- It is not a strict end-to-end product superset of Codex native memory.
+- A replacement is defensible, but only if the fork rebuilds a thin Codex
+  integration layer for the native semantics that matter.
+
+Recommended direction:
+
+- do not pursue "full Claude parity first",
+- do pursue "agentmemory as the primary memory engine with Codex-specific
+  shims",
+- disable Codex native memory generation only after startup injection,
+  replacement memory ops, and a clear citation story are decided.
+
+## Target end state
+
+The target end state is not "agentmemory instead of Codex" in a narrow sense.
+The target end state is:
+
+- \`agentmemory\` is the primary memory engine,
+- Codex-native memory generation and consolidation are disabled,
+- Codex retains or rebuilds only the product-level semantics that still add
+  value,
+- the fork presents one coherent memory system to users,
+- the resulting system is a functional superset of both:
+  - \`agentmemory\` capture and retrieval strengths,
+  - Codex-native prompt/runtime/protocol integration where it materially helps.
+
+In other words, the desired architecture is a Venn-diagram merge with one
+authoritative engine, not permanent coexistence of two competing memory stacks.
+
+## Scope
+
+This spec compares:
+
+- `/private/tmp/codex`
+- `/Users/ericjuta/Projects/agentmemory`
+
+This spec is based on the current implementation shape in those checkouts,
+including user-local plugin and hook configuration files present in the
+`agentmemory` repo.
+
+## Read order
+
+Read these sources in order if implementing against this spec:
+
+1. `docs/agentmemory-codex-memory-replacement-spec.md`
+2. `docs/claude-code-hooks-parity.md`
+3. `codex-rs/core/src/memories/README.md`
+4. `codex-rs/core/src/memories/prompts.rs`
+5. `codex-rs/core/templates/memories/read_path.md`
+6. `codex-rs/core/src/codex.rs`
+7. `codex-rs/hooks/src/engine/config.rs`
+8. `codex-rs/hooks/src/engine/discovery.rs`
+9. `plugin/hooks/hooks.json` in `agentmemory`
+10. `src/hooks/*.ts` in `agentmemory`
+11. `src/providers/embedding/*.ts` and `src/state/hybrid-search.ts` in
+    `agentmemory`
+12. `README.md` and `benchmark/*.md` in `agentmemory`
+
+## Current source snapshot
+
+### Codex
+
+Codex currently has:
+
+- a first-party startup memory pipeline in
+  `codex-rs/core/src/memories/README.md`,
+- phase-1 extraction and phase-2 consolidation into `MEMORY.md`,
+  `memory_summary.md`, and rollout summary artifacts,
+- developer-prompt injection of memory read-path instructions built from
+  `memory_summary.md`,
+- protocol-level memory citations,
+- memory-management operations such as `UpdateMemories` and
+  `DropMemories`,
+- thread-level memory-mode state such as `disabled` and `polluted`,
+- an under-development `codex_hooks` feature with five public hook events.
+
+### agentmemory
+
+The `agentmemory` checkout currently contains:
+
+- a plugin manifest in `plugin/plugin.json`,
+- a Claude-oriented hook bundle in `plugin/hooks/hooks.json`,
+- TypeScript hook entrypoints under `src/hooks/`,
+- multiple embedding providers under `src/providers/embedding/`,
+- hybrid retrieval under `src/state/hybrid-search.ts`,
+- REST and MCP surfaces,
+- benchmarking and retrieval claims in `README.md` and `benchmark/`.
+
+The local `agentmemory` checkout is currently dirty. This matters only as a
+reminder not to treat the local repo state as release-tagged truth; the source
+shape is still adequate for architectural comparison.
+
+## Codex native memory: what it is
+
+Codex native memory is a core-managed memory pipeline, not just a retrieval
+plugin.
+
+### Pipeline shape
+
+Codex native memory runs in two phases:
+
+1. Phase 1 extracts structured memory from eligible rollouts and stores
+   stage-1 outputs in the state DB.
+2. Phase 2 consolidates those stage-1 outputs into durable memory artifacts on
+   disk and spawns an internal consolidation subagent.
+
+This is documented in `codex-rs/core/src/memories/README.md`.
+
+### Prompt integration
+
+Codex adds memory usage instructions directly into developer instructions when:
+
+- the memory feature is enabled,
+- `config.memories.use_memories` is true,
+- memory summary content exists.
+
+This is wired in `codex-rs/core/src/codex.rs` via
+`build_memory_tool_developer_instructions(...)`.
+
+### Artifact model
+
+Codex memory produces and maintains:
+
+- `memory_summary.md`
+- `MEMORY.md`
+- `raw_memories.md`
+- `rollout_summaries/*`
+- optional `skills/*`
+
+These artifacts are not just storage. They are part of how Codex routes future
+memory reads and citations.
+
+### Operational integration
+
+Codex exposes native memory operations:
+
+- `UpdateMemories`
+- `DropMemories`
+
+and memory-state controls:
+
+- `generate_memories`
+- `use_memories`
+- `no_memories_if_mcp_or_web_search`
+
+Codex also tracks thread memory-mode transitions such as `polluted`.
+
+### Citation integration
+
+Codex has protocol and app-server support for structured memory citations.
+Those citations are already part of assistant-message rendering and transport.
+
+## agentmemory: what it is
+
+`agentmemory` is not just a memory file or summary generator. It is a
+capture, indexing, retrieval, consolidation, MCP, and REST system.
+
+### Capture model
+
+The working Claude-oriented setup uses 12 hooks:
+
+- `SessionStart`
+- `UserPromptSubmit`
+- `PreToolUse`
+- `PostToolUse`
+- `PostToolUseFailure`
+- `PreCompact`
+- `SubagentStart`
+- `SubagentStop`
+- `Notification`
+- `TaskCompleted`
+- `Stop`
+- `SessionEnd`
+
+The hook bundle is defined in `plugin/hooks/hooks.json`.
+
+### Observation flow
+
+The core runtime pattern is:
+
+- hooks send observations to REST endpoints,
+- observations are deduplicated and privacy-filtered,
+- observations are compressed and indexed,
+- retrieval returns bounded context back into future sessions.
+
+The important thing is that capture happens at many lifecycle points, not just
+after a Codex-style rollout completes.
+
+### Retrieval model
+
+agentmemory uses:
+
+- BM25,
+- vector retrieval,
+- graph retrieval,
+- Reciprocal Rank Fusion,
+- session diversification,
+- progressive disclosure.
+
+This is a genuine retrieval stack, not just a durable handbook.
+
+### Embeddings
+
+agentmemory supports multiple embedding providers, including:
+
+- local embeddings,
+- Gemini embeddings,
+- OpenAI embeddings,
+- Voyage,
+- Cohere,
+- OpenRouter.
+
+Gemini embedding support is real in this checkout, not hypothetical.
+
+### Cross-agent model
+
+agentmemory is designed as a shared external service:
+
+- Claude hooks can write to it,
+- MCP clients can query it,
+- REST clients can integrate with it,
+- multiple agent products can share one instance.
+
+This is a major design difference from Codex native memory.
+
+## Capability comparison
+
+### Capture breadth
+
+Codex native memory:
+
+- captures memory from rollouts selected by startup pipeline rules,
+- is optimized around per-rollout extraction and later consolidation,
+- does not expose comparable public lifecycle capture breadth in the current
+  hook surface.
+
+agentmemory:
+
+- captures at many lifecycle points,
+- can record prompts, tool usage, failures, compaction moments, and subagent
+  lifecycle events,
+- better matches the event stream of real coding work.
+
+Verdict:
+
+- `agentmemory` is materially stronger.
+
+### Retrieval quality
+
+Codex native memory:
+
+- primarily relies on generated memory artifacts,
+- injects a read-path and memory summary into the prompt,
+- does not show comparable semantic retrieval, vector search, BM25 ranking, or
+  graph traversal in the native memory path from the current source scan.
+
+agentmemory:
+
+- provides hybrid search,
+- supports embeddings,
+- supports graph-aware retrieval,
+- uses token-bounded context assembly.
+
+Verdict:
+
+- `agentmemory` is materially stronger.
+
+### Consolidation sophistication
+
+Codex native memory:
+
+- has a robust two-phase extraction and consolidation pipeline,
+- uses a dedicated consolidation subagent,
+- maintains curated memory artifacts intended for future prompt routing.
+
+agentmemory:
+
+- claims 4-tier consolidation and memory evolution,
+- versioning, semantic/procedural layers, relation graphs, and cascading
+  staleness.
+
+Verdict:
+
+- `agentmemory` is likely more ambitious and broader,
+- Codex native memory is more tightly integrated and operationally simpler
+  inside Codex.
+
+### First-party runtime integration
+
+Codex native memory:
+
+- is already first-party,
+- already has prompt integration,
+- already has memory commands,
+- already has citations,
+- already participates in internal policy/state flows.
+
+agentmemory:
+
+- does not automatically provide those Codex-native product behaviors,
+- would need a Codex-specific bridge layer to replace them cleanly.
+
+Verdict:
+
+- Codex native memory is stronger here.
+
+### Cross-agent reuse
+
+Codex native memory:
+
+- is local to Codex runtime and artifacts.
+
+agentmemory:
+
+- is designed for multi-agent reuse through MCP and REST.
+
+Verdict:
+
+- `agentmemory` is materially stronger.
+
+## Is agentmemory a material superset?
+
+### Yes, in these senses
+
+agentmemory is a material superset of Codex native memory for:
+
+- retrieval breadth,
+- semantic search,
+- embedding-backed lookup,
+- graph-backed lookup,
+- cross-agent sharing,
+- hook-based observation capture.
+
+### No, in these senses
+
+agentmemory is not a strict product-level superset of Codex native memory for:
+
+- first-party startup prompt integration,
+- native memory operations (`UpdateMemories`, `DropMemories`),
+- native memory citation protocol plumbing,
+- thread-level memory-mode semantics such as `polluted`,
+- deep alignment with Codex’s state DB and app-server/TUI surfaces.
+
+The correct judgment is:
+
+- `agentmemory` is a material capability superset for retrieval and capture,
+- not a strict end-to-end replacement unless shims are added.
+
+The desired fork outcome therefore is:
+
+- replace Codex native memory internals,
+- preserve or rebuild the useful Codex-native user-facing semantics as shims,
+- end with a product-level superset even though `agentmemory` alone is not a
+  strict superset today.
+
+## Token efficiency
+
+This is the strongest practical argument in favor of `agentmemory`.
+
+### Strong evidence in favor of agentmemory
+
+The `agentmemory` repo explicitly claims and benchmarks token savings:
+
+- `~1,900` tokens instead of loading all memory into context in
+  `README.md`,
+- `92%` savings in `benchmark/REAL-EMBEDDINGS.md`,
+- `86%` savings in `benchmark/QUALITY.md`,
+- essentially corpus-size-stable top-K retrieval in `benchmark/SCALE.md`.
+
+The architectural reason is coherent:
+
+- retrieval returns top-K results,
+- context assembly is bounded,
+- compact result-first progressive disclosure reduces unnecessary expansion.
+
+### Codex native memory token profile
+
+Codex native memory is not obviously awful on tokens, but it is shaped
+differently:
+
+- `memory_summary.md` injection is truncated to `5,000` tokens in
+  `codex-rs/core/src/memories/mod.rs`,
+- stage-1 rollout processing can consume large inputs because it is an offline
+  extraction pipeline, not a lightweight query-time retrieval layer,
+- the memory read-path instructs the model to query local memory artifacts
+  rather than receiving a purpose-built top-K retrieval result from a hybrid
+  search engine.
+
+### Apples-to-oranges caution
+
+The token comparison is not perfectly head-to-head.
+
+agentmemory benchmarks compare against "load everything into context" and
+built-in-memory patterns such as monolithic `CLAUDE.md`-style memory files.
+Codex native memory is more curated than that:
+
+- it injects a bounded `memory_summary.md`,
+- it exposes a read-path for progressive on-disk lookup,
+- it does not appear to simply dump all historical memory into every turn.
+
+So it would be wrong to claim the benchmark proves "agentmemory is 92% more
+token-efficient than Codex native memory" as a verified current fact.
+
+### Bottom-line token judgment
+
+Even with that caveat, `agentmemory` is still likely more token-efficient over
+the long term than Codex native memory for large corpora because:
+
+- query-time retrieval is explicitly bounded,
+- corpus growth does not force proportional prompt growth,
+- embedding + hybrid retrieval reduces the need to over-inject summaries "just
+  in case",
+- progressive disclosure lets the system fetch more only when needed.
+
+Codex native memory likely remains acceptable for modest corpus sizes, but it
+does not appear to have the same query-time retrieval efficiency model.
+
+## Replacement architecture
+
+### Option 1: hard replacement
+
+Disable Codex native memory generation and injection entirely. Make
+`agentmemory` the only memory engine.
+
+Benefits:
+
+- cleaner mental model,
+- no duplicate memory systems,
+- retrieval quality and token efficiency become `agentmemory`-driven,
+- cross-agent memory reuse becomes first-class.
+
+Costs:
+
+- must rebuild startup prompt integration,
+- must replace or remove `UpdateMemories` and `DropMemories`,
+- must decide what to do about native memory citations,
+- must replace or drop `polluted`/thread memory-mode semantics,
+- must extend Codex hooks enough to make capture quality acceptable.
+
+Risk:
+
+- highest.
+
+## Native Codex behaviors that replacement must preserve or intentionally drop
+
+### Must preserve or replace
+
+- startup injection into developer instructions,
+- user-facing operations to refresh or clear memory state,
+- some citation strategy if memory provenance is important,
+- protocol/app-server awareness of whatever replaces native memory,
+- a clear policy for memory invalidation / pollution.
+
+### Safe to drop if explicitly accepted
+
+- on-disk `MEMORY.md` / `memory_summary.md` artifact format compatibility,
+- the exact current phase-1 / phase-2 internal implementation,
+- native Codex consolidation subagent if `agentmemory` becomes authoritative,
+- native artifact grooming and rollout summary persistence if the fork no longer
+  treats those as the canonical memory store.
+
+## Key risks
+
+### Duplicate system ambiguity
+
+If both systems remain partially active, it becomes unclear:
+
+- which system is authoritative,
+- which one should inject context,
+- which one should be cited,
+- which one should be cleared by a user-facing "drop memories" action.
+
+Avoid this.
+
+### Hook-surface insufficiency
+
+Current Codex hooks are not enough to reproduce Claude-style `agentmemory`
+capture quality:
+
+- only five public events,
+- sync command handlers only,
+- narrower tool coverage,
+- missing public equivalents for several useful lifecycle events.
+
+If the fork does not extend hooks, the replacement will still leave value on
+the table.
+
+### Protocol and UX regressions
+
+Dropping native Codex memory without replacing its protocol-level behaviors can
+regress:
+
+- assistant memory citations,
+- memory-management commands,
+- app-server/TUI expectations around memory-aware behavior.
+
+### Benchmark over-claiming
+
+Do not claim:
+
+- that the `agentmemory` benchmarks directly prove a specific percentage gain
+  over Codex native memory,
+- or that Gemini embeddings alone guarantee better results.
+
+The right claim is narrower:
+
+- `agentmemory` has a more scalable retrieval architecture and published token
+  savings versus all-in-context memory loading approaches,
+- and that architecture is likely better long-term than Codex native memory for
+  large memory corpora.
+
+## Recommendation
+
+Target hard replacement as the end state.
+
+That means:
+
+1. make `agentmemory` the sole authoritative memory engine,
+2. disable Codex native memory generation and consolidation in the final
+   architecture,
+3. rebuild only the Codex-native product semantics worth preserving as shims on
+   top of `agentmemory`,
+4. remove or deprecate native Codex memory artifacts and workflows in the fork
+   once those shims exist.
+
+This is the recommended path because it matches the explicit desired outcome:
+
+- one memory authority,
+- no split-brain behavior,
+- `agentmemory` for the stronger retrieval and capture substrate,
+- Codex integration retained only where it improves the product.
+
+The fork can still phase the work, but every phase should point toward hard
+replacement rather than toward permanent coexistence.
+
+## Recommended implementation phases
+
+### Phase 1: decision and contract
+
+- Decide that `agentmemory` is the primary memory authority.
+- Freeze which native Codex behaviors will be preserved.
+- Define how startup context injection will work in the fork.
+- Decide whether native memory citations remain required.
+- Define the end-state explicitly as a functional superset, not a partial port.
+
+### Phase 2: Codex integration adapter
+
+- Add a Codex-specific `agentmemory` integration layer.
+- Replace startup memory prompt generation with `agentmemory` retrieval.
+- Add equivalent user-facing operations for refresh and clear.
+- Decide whether these call into `agentmemory` REST/MCP or a local adapter.
+
+### Phase 3: hook expansion
+
+- Extend Codex hook coverage enough to support useful `agentmemory` capture.
+- Minimum likely useful additions:
+  - `SessionEnd`
+  - `PostToolUseFailure`
+  - `SubagentStart`
+  - `SubagentStop`
+  - `Notification`
+  - `PreCompact`
+
+### Phase 4: native memory deprecation
+
+- Turn off Codex native memory generation by default in the fork.
+- Remove or quarantine old native memory artifacts once the adapter is stable.
+- Preserve migration tooling only if existing users need it.
+
+### Phase 5: superset hardening
+
+- Verify that every retained Codex-native memory affordance has an
+  `agentmemory`-backed implementation or an intentional deletion note.
+- Verify that token usage remains bounded as corpus size grows.
+- Verify that there is only one authoritative memory source in the runtime.
+- Remove any remaining code paths that can accidentally re-enable split-brain
+  behavior.
+
+### Phase 6: optional advanced alignment
+
+- Add memory citation mapping from `agentmemory` results into Codex protocol
+  structures.
+- Add richer protocol and app-server visibility if needed.
+- Reassess whether any remaining native memory logic should survive.
+
+## Do not do
+
+- Do not run Codex native memory injection and `agentmemory` injection as
+  equal peers long term.
+- Do not claim a strict superset without rebuilding missing Codex-native
+  semantics.
+- Do not clone Claude plugin infrastructure into Codex just to make the
+  replacement work.
+- Do not overfit to Claude-specific bridge behavior such as
+  `~/.claude/projects/*/memory/MEMORY.md` if Codex is becoming the primary
+  target.
+- Do not remove native memory citations or memory operations accidentally; if
+  they are dropped, document that as an intentional product change.
+
+## Acceptance criteria for a forked replacement
+
+The replacement is successful only if all of these are true:
+
+- `agentmemory` is the authoritative source for retrieved memory context,
+- Codex native memory is no longer an independent competing authority,
+- Codex startup injection still works reliably,
+- memory refresh and memory clearing remain user-visible operations or are
+  intentionally removed with docs,
+- hook/event coverage is sufficient to produce materially useful observations,
+- token usage stays bounded as the corpus grows,
+- the fork has a clear provenance story for memory-derived output,
+- there is no ambiguity about which memory system is active,
+- the resulting user-facing behavior is a practical superset of the two source
+  systems rather than a regression-heavy swap.
+
+## Final judgment
+
+If the question is "is `agentmemory` materially more advanced than Codex
+native memory?", the answer is yes.
+
+If the question is "should a fork disable Codex native memory and replace it
+with `agentmemory`?", the answer is:
+
+- yes,
+- with the condition that the fork also rebuild the Codex-native integration
+  semantics that matter,
+- and with the explicit goal of a single authoritative memory system rather
+  than a permanent hybrid.

From 6c5094f86ed22d91f53fbe053125599c47177e7e Mon Sep 17 00:00:00 2001
From: Eric Juta <ericjohnjuta@gmail.com>
Date: Thu, 26 Mar 2026 13:24:33 +0000
Subject: [PATCH 4/8] docs: tighten agentmemory replacement
 target\n\nCo-authored-by: Codex <noreply@openai.com>

---
 ...entmemory-codex-memory-replacement-spec.md | 79 +++++++++++++++++--
 1 file changed, 74 insertions(+), 5 deletions(-)

diff --git a/docs/agentmemory-codex-memory-replacement-spec.md b/docs/agentmemory-codex-memory-replacement-spec.md
index 72cd811a0..6861cd4a7 100644
--- a/docs/agentmemory-codex-memory-replacement-spec.md
+++ b/docs/agentmemory-codex-memory-replacement-spec.md
@@ -63,6 +63,12 @@ The target end state is:
 - Codex-native memory generation and consolidation are disabled,
 - Codex retains or rebuilds only the product-level semantics that still add
   value,
+- the fork uses the full \`agentmemory\` retrieval stack in steady state:
+  BM25 + vector + graph,
+- embeddings are enabled by default in steady state; BM25-only mode is a
+  fallback, not the target architecture,
+- lifecycle capture uses the widest useful hook surface rather than the minimum
+  viable subset,
 - the fork presents one coherent memory system to users,
 - the resulting system is a functional superset of both:
   - \`agentmemory\` capture and retrieval strengths,
@@ -71,6 +77,30 @@ The target end state is:
 In other words, the desired architecture is a Venn-diagram merge with one
 authoritative engine, not permanent coexistence of two competing memory stacks.
 
+## Maximum-performance policy
+
+The intended end state should maximize \`agentmemory\`, not merely adopt it.
+
+That means:
+
+- use hybrid retrieval as the primary retrieval path,
+- enable embeddings by default in the intended production configuration,
+- preserve graph retrieval and relation-aware retrieval as first-class
+  capabilities,
+- use progressive disclosure and token budgets instead of large static memory
+  injections wherever possible,
+- implement enough hook coverage that the observation stream is rich rather
+  than sparse,
+- treat BM25-only mode as an acceptable degraded mode, not as the target.
+
+Provider policy:
+
+- support all current \`agentmemory\` embedding providers,
+- keep Gemini embeddings available as a first-class provider,
+- prefer the best available embedding backend for the environment rather than
+  hardcoding a low-capability default in the architecture,
+- avoid designing the replacement around a no-embeddings baseline.
+
 ## Scope
 
 This spec compares:
@@ -479,7 +509,8 @@ Costs:
 - must replace or remove `UpdateMemories` and `DropMemories`,
 - must decide what to do about native memory citations,
 - must replace or drop `polluted`/thread memory-mode semantics,
-- must extend Codex hooks enough to make capture quality acceptable.
+- must extend Codex hooks enough to make capture quality fully competitive with
+  the `agentmemory` model rather than merely acceptable.
 
 Risk:
 
@@ -553,6 +584,20 @@ The right claim is narrower:
 - and that architecture is likely better long-term than Codex native memory for
   large memory corpora.
 
+### Performance-oriented token policy
+
+The intended architecture should optimize for query-time token efficiency, not
+artifact compatibility.
+
+That means:
+
+- prefer top-K retrieval over broad handbook injection,
+- keep startup context bounded and relevance-ranked,
+- expand details only on demand,
+- avoid recreating a large static `MEMORY.md`-style injection layer on top of
+  `agentmemory`,
+- measure steady-state tokens/query as a first-class success metric.
+
 ## Recommendation
 
 Target hard replacement as the end state.
@@ -593,17 +638,33 @@ replacement rather than toward permanent coexistence.
 - Replace startup memory prompt generation with `agentmemory` retrieval.
 - Add equivalent user-facing operations for refresh and clear.
 - Decide whether these call into `agentmemory` REST/MCP or a local adapter.
+- Route startup injection through the bounded `agentmemory` retrieval path
+  rather than recreating Codex-native memory artifact loading.
+- Make token budget, retrieval mode, and expansion behavior explicit parts of
+  the adapter contract.
 
 ### Phase 3: hook expansion
 
-- Extend Codex hook coverage enough to support useful `agentmemory` capture.
-- Minimum likely useful additions:
-  - `SessionEnd`
+- Extend Codex hook coverage to support the full useful `agentmemory`
+  observation model, not just a minimum subset.
+- Target the full current `agentmemory` hook set:
+  - `SessionStart`
+  - `UserPromptSubmit`
+  - `PreToolUse`
+  - `PostToolUse`
   - `PostToolUseFailure`
+  - `PreCompact`
   - `SubagentStart`
   - `SubagentStop`
   - `Notification`
-  - `PreCompact`
+  - `TaskCompleted`
+  - `Stop`
+  - `SessionEnd`
+- Broaden `PreToolUse` and `PostToolUse` beyond the current shell-centric
+  path so file tools, command tools, and other high-signal tool classes are
+  observed consistently.
+- Do not treat hook expansion as optional polish; it is core to achieving the
+  high-performance end state.
 
 ### Phase 4: native memory deprecation
 
@@ -619,6 +680,10 @@ replacement rather than toward permanent coexistence.
 - Verify that there is only one authoritative memory source in the runtime.
 - Remove any remaining code paths that can accidentally re-enable split-brain
   behavior.
+- Verify that embeddings, graph retrieval, and progressive disclosure are
+  active in the intended steady-state configuration.
+- Verify that the system is not silently falling back to a lower-capability
+  retrieval mode in normal operation.
 
 ### Phase 6: optional advanced alignment
 
@@ -652,6 +717,10 @@ The replacement is successful only if all of these are true:
   intentionally removed with docs,
 - hook/event coverage is sufficient to produce materially useful observations,
 - token usage stays bounded as the corpus grows,
+- the intended steady state uses embeddings and hybrid retrieval rather than a
+  degraded BM25-only baseline,
+- Gemini or another high-quality embedding provider remains available as a
+  first-class configuration path,
 - the fork has a clear provenance story for memory-derived output,
 - there is no ambiguity about which memory system is active,
 - the resulting user-facing behavior is a practical superset of the two source

From fb371104a008dfed7a850747bd04c03e9c412408 Mon Sep 17 00:00:00 2001
From: Eric Juta <ericjohnjuta@gmail.com>
Date: Thu, 26 Mar 2026 13:26:18 +0000
Subject: [PATCH 5/8] docs: note agentmemory env alignment\n\nCo-authored-by:
 Codex <noreply@openai.com>

---
 ...entmemory-codex-memory-replacement-spec.md | 33 +++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/docs/agentmemory-codex-memory-replacement-spec.md b/docs/agentmemory-codex-memory-replacement-spec.md
index 6861cd4a7..7c09b2b69 100644
--- a/docs/agentmemory-codex-memory-replacement-spec.md
+++ b/docs/agentmemory-codex-memory-replacement-spec.md
@@ -164,6 +164,39 @@ The local `agentmemory` checkout is currently dirty. This matters only as a
 reminder not to treat the local repo state as release-tagged truth; the source
 shape is still adequate for architectural comparison.
 
+### Current env alignment
+
+The live worker configuration is not sourced from
+`~/Projects/agentmemory/.env`. In this checkout, `docker-compose.yml` points
+the worker at:
+
+- `\${HOME}/.agentmemory/.env`
+
+Current externally loaded env alignment, verified in redacted form:
+
+- `GEMINI_API_KEY` is present,
+- `GEMINI_MODEL` is present,
+- `GEMINI_EMBEDDING_MODEL` is present,
+- `GEMINI_EMBEDDING_DIMENSIONS` is present,
+- `GRAPH_EXTRACTION_ENABLED` is present,
+- `CONSOLIDATION_ENABLED` is present.
+
+Implications:
+
+- the current live environment already aligns with Gemini-first provider
+  selection,
+- embedding auto-detection should resolve to Gemini unless explicitly
+  overridden,
+- graph extraction and consolidation are already enabled in the current
+  external env,
+- the current external env does not explicitly pin `EMBEDDING_PROVIDER`,
+  `TOKEN_BUDGET`, `BM25_WEIGHT`, `VECTOR_WEIGHT`, or
+  `FALLBACK_PROVIDERS`, so those currently rely on code defaults rather than
+  explicit ops policy.
+
+For a maximum-performance steady state, that last point should be treated as a
+configuration gap, not as the desired final setup.
+
 ## Codex native memory: what it is
 
 Codex native memory is a core-managed memory pipeline, not just a retrieval

From 0d6d7053402572eebd45384302d91f87a145db3c Mon Sep 17 00:00:00 2001
From: Eric Juta <ericjohnjuta@gmail.com>
Date: Thu, 26 Mar 2026 16:01:24 +0000
Subject: [PATCH 6/8] docs: add replacement execution plan\n\nCo-authored-by:
 Codex <noreply@openai.com>

---
 ...entmemory-codex-memory-replacement-spec.md | 238 ++++++++++++++++++
 1 file changed, 238 insertions(+)

diff --git a/docs/agentmemory-codex-memory-replacement-spec.md b/docs/agentmemory-codex-memory-replacement-spec.md
index 7c09b2b69..17fc2cf37 100644
--- a/docs/agentmemory-codex-memory-replacement-spec.md
+++ b/docs/agentmemory-codex-memory-replacement-spec.md
@@ -725,6 +725,244 @@ replacement rather than toward permanent coexistence.
 - Add richer protocol and app-server visibility if needed.
 - Reassess whether any remaining native memory logic should survive.
 
+## Execution plan
+
+This section turns the replacement architecture into a low-rebase execution
+plan.
+
+The key rule is:
+
+- keep invasive edits concentrated in a few upstream-hot orchestration files,
+- keep most new logic in fork-owned modules,
+- gate native behavior off before deleting it.
+
+### Allowed write boundaries
+
+The preferred fork seam is:
+
+- small edits in:
+  - `codex-rs/core/src/codex.rs`
+  - `codex-rs/core/src/hook_runtime.rs`
+  - `codex-rs/hooks/src/engine/config.rs`
+  - `codex-rs/hooks/src/engine/discovery.rs`
+  - hook event/schema files only when required for new public events
+- most new implementation in new fork-owned modules, for example:
+  - `codex-rs/core/src/agentmemory/`
+  - `codex-rs/hooks/src/agentmemory/` or equivalent hook-translation module
+
+### Intentionally untouched until cutover
+
+Do not broadly rewrite these early:
+
+- `codex-rs/core/src/memories/*`
+- `codex-rs/core/templates/memories/*`
+- native memory artifact generation logic
+- broad protocol/app-server surfaces unrelated to memory provenance
+
+Early phases should gate or bypass these paths, not delete or refactor them.
+
+### Branch order
+
+Use a short stack of focused branches / PRs.
+
+#### PR 1: backend selector and fork seam
+
+Goal:
+
+- introduce a clear memory backend selector,
+- add the new `agentmemory` adapter module skeleton,
+- make no user-visible behavior change yet.
+
+Write scope:
+
+- config wiring,
+- new adapter modules,
+- minimal callsite plumbing only where needed.
+
+Must not do:
+
+- no native memory deletion,
+- no protocol changes,
+- no hook expansion yet.
+
+Merge gate:
+
+- no behavior regression with native memory still active by default,
+- docs updated to describe the seam.
+
+#### PR 2: startup injection replacement
+
+Goal:
+
+- route startup memory injection through the `agentmemory` adapter,
+- make bounded retrieval the new startup path,
+- stop depending on native memory artifact loading for startup context.
+
+Write scope:
+
+- `codex-rs/core/src/codex.rs`
+- adapter module
+- minimal config/docs updates
+
+Must not do:
+
+- do not delete native memories yet,
+- do not add broad protocol changes,
+- do not expand hook coverage in the same PR.
+
+Merge gate:
+
+- startup context is sourced from `agentmemory`,
+- token budget and retrieval mode are explicit and tested,
+- no static `MEMORY.md`-style reinjection layer is recreated on top.
+
+#### PR 3: public hook event expansion
+
+Goal:
+
+- expand Codex hooks to cover the full useful `agentmemory` hook set:
+  - `SessionStart`
+  - `UserPromptSubmit`
+  - `PreToolUse`
+  - `PostToolUse`
+  - `PostToolUseFailure`
+  - `PreCompact`
+  - `SubagentStart`
+  - `SubagentStop`
+  - `Notification`
+  - `TaskCompleted`
+  - `Stop`
+  - `SessionEnd`
+
+Write scope:
+
+- hook config/schema/discovery/runtime files,
+- TUI/app-server visibility only where hook runs need surfacing.
+
+Must not do:
+
+- do not mix in native memory deletion,
+- do not mix in citation replacement.
+
+Merge gate:
+
+- each event has runtime dispatch,
+- each event is documented,
+- hook run visibility remains coherent.
+
+#### PR 4: tool coverage broadening
+
+Goal:
+
+- broaden `PreToolUse` and `PostToolUse` beyond the shell-centric path,
+- ensure file tools, command tools, and other high-signal tool classes are
+  observed consistently for `agentmemory`.
+
+Write scope:
+
+- `codex-rs/core/src/hook_runtime.rs`
+- tool handler payload plumbing
+- hook translation layer
+
+Must not do:
+
+- do not mix in memory command replacement,
+- do not delete native memory paths yet.
+
+Merge gate:
+
+- high-signal tool classes emit useful observation payloads,
+- no regression in existing shell-hook flows.
+
+#### PR 5: memory ops and provenance replacement
+
+Goal:
+
+- replace or redefine `UpdateMemories` and `DropMemories`,
+- decide and implement provenance behavior,
+- define the replacement for native `polluted` semantics.
+
+Write scope:
+
+- memory command handlers,
+- provenance/citation integration,
+- minimal protocol additions if absolutely required.
+
+Must not do:
+
+- do not combine this with broad deletion of native memory code.
+
+Merge gate:
+
+- user-facing memory refresh/clear actions still exist or are intentionally
+  documented as removed,
+- provenance behavior is explicit,
+- no ambiguity remains about memory invalidation rules.
+
+#### PR 6: hard cutover
+
+Goal:
+
+- disable native memory generation and consolidation in normal runtime paths,
+- make `agentmemory` the only authoritative memory backend,
+- quarantine or deprecate native memory artifacts.
+
+Write scope:
+
+- backend selection defaults,
+- final cutover gating,
+- cleanup of callsites that can still route to native memory.
+
+Must not do:
+
+- do not do broad code deletion unless the fork is already stable after cutover,
+- do not remove debug/rollback switches until at least one successful rebase
+  cycle after cutover.
+
+Merge gate:
+
+- one memory authority in runtime,
+- no split-brain injection,
+- no accidental native fallback in standard flows.
+
+#### PR 7: post-cutover cleanup
+
+Goal:
+
+- remove dead native-memory paths only after the hard cutover has stabilized.
+
+Write scope:
+
+- native memory code and docs that are no longer reachable,
+- migration notes if existing users need them.
+
+Merge gate:
+
+- cleanup produces less rebase churn than it creates,
+- rollback path is no longer needed.
+
+### Rebase policy
+
+- Rebase frequently; do not let this stack drift for long.
+- Rebase before opening each PR and after any upstream changes touching:
+  - `codex-rs/core/src/codex.rs`
+  - `codex-rs/core/src/hook_runtime.rs`
+  - hook engine config/discovery/schema/runtime files
+- Prefer new modules over editing existing modules repeatedly.
+- If a behavior can live in the adapter, keep it out of upstream-hot files.
+- Do not delete upstream code early; disabling is cheaper to rebase than
+  removal.
+
+### Success metrics by PR
+
+- PR 1: seam exists with no behavior regression.
+- PR 2: startup injection is `agentmemory`-backed and token-bounded.
+- PR 3: hook surface matches the intended `agentmemory` event model.
+- PR 4: observation capture is rich across the important tool classes.
+- PR 5: memory ops and provenance no longer depend on native memory internals.
+- PR 6: runtime has one authoritative memory backend.
+- PR 7: dead code removal does not increase future rebase cost materially.
+
 ## Do not do
 
 - Do not run Codex native memory injection and `agentmemory` injection as

From 3792d2fb8bb89309020c9b0d9401b3106428d8c4 Mon Sep 17 00:00:00 2001
From: Eric Juta <ericjohnjuta@gmail.com>
Date: Thu, 26 Mar 2026 16:10:33 +0000
Subject: [PATCH 7/8] docs: add handoff prompts to replacement
 plan\n\nCo-authored-by: Codex <noreply@openai.com>

---
 ...entmemory-codex-memory-replacement-spec.md | 181 ++++++++++++++++++
 1 file changed, 181 insertions(+)

diff --git a/docs/agentmemory-codex-memory-replacement-spec.md b/docs/agentmemory-codex-memory-replacement-spec.md
index 17fc2cf37..451fa4a4c 100644
--- a/docs/agentmemory-codex-memory-replacement-spec.md
+++ b/docs/agentmemory-codex-memory-replacement-spec.md
@@ -963,6 +963,187 @@ Merge gate:
 - PR 6: runtime has one authoritative memory backend.
 - PR 7: dead code removal does not increase future rebase cost materially.
 
+### Handoff prompts by PR
+
+These are intended as copy-paste prompts for future sessions, child agents, or
+parallel worker swarms. Each prompt is deliberately scoped to one PR-sized
+slice.
+
+#### PR 1 handoff prompt
+
+```text
+Implement PR 1 from docs/agentmemory-codex-memory-replacement-spec.md.
+
+Goal:
+- introduce a clear memory backend selector
+- add the new agentmemory adapter seam
+- make no user-visible behavior change yet
+
+Constraints:
+- keep invasive edits concentrated
+- do not delete or broadly rewrite codex-rs/core/src/memories/*
+- do not change protocol shapes
+- do not expand hooks yet
+
+Write scope:
+- config wiring
+- new fork-owned adapter modules
+- minimal callsite plumbing only where needed
+
+Acceptance:
+- native memory remains default and behaviorally unchanged
+- the seam exists and is documented
+- code is structured so later PRs can route through the adapter without large rewrites
+```
+
+#### PR 2 handoff prompt
+
+```text
+Implement PR 2 from docs/agentmemory-codex-memory-replacement-spec.md.
+
+Goal:
+- replace startup memory prompt generation with agentmemory-backed retrieval
+- make startup context bounded, relevance-ranked, and token-budgeted
+
+Constraints:
+- do not recreate static MEMORY.md-style loading on top of agentmemory
+- do not expand hooks in this PR
+- do not delete native memory code yet
+
+Write scope:
+- codex-rs/core/src/codex.rs
+- agentmemory adapter module
+- small config/docs updates if required
+
+Acceptance:
+- startup injection is sourced through the adapter
+- retrieval mode and token budget are explicit
+- native memory still exists only as a gated fallback path, not the main path
+```
+
+#### PR 3 handoff prompt
+
+```text
+Implement PR 3 from docs/agentmemory-codex-memory-replacement-spec.md.
+
+Goal:
+- expand Codex public hooks to support the full useful agentmemory event model
+
+Target events:
+- SessionStart
+- UserPromptSubmit
+- PreToolUse
+- PostToolUse
+- PostToolUseFailure
+- PreCompact
+- SubagentStart
+- SubagentStop
+- Notification
+- TaskCompleted
+- Stop
+- SessionEnd
+
+Constraints:
+- keep handler semantics coherent
+- do not mix in native memory deletion
+- do not mix in provenance/citation replacement
+
+Acceptance:
+- each target event is represented in config/discovery/runtime
+- documentation and hook-run visibility are updated
+- new events do not regress existing hook behavior
+```
+
+#### PR 4 handoff prompt
+
+```text
+Implement PR 4 from docs/agentmemory-codex-memory-replacement-spec.md.
+
+Goal:
+- broaden PreToolUse and PostToolUse beyond the shell-centric path
+- ensure high-signal tool classes produce useful agentmemory observations
+
+Constraints:
+- prioritize file tools, command tools, and other high-signal tool classes
+- do not mix in memory command replacement
+- do not cut over the backend here
+
+Acceptance:
+- important tool classes emit observation payloads consistently
+- shell-hook behavior still works
+- capture quality is materially closer to the Claude-side agentmemory model
+```
+
+#### PR 5 handoff prompt
+
+```text
+Implement PR 5 from docs/agentmemory-codex-memory-replacement-spec.md.
+
+Goal:
+- replace or redefine UpdateMemories and DropMemories
+- decide and implement provenance behavior
+- define the replacement for native polluted semantics
+
+Constraints:
+- keep protocol churn minimal unless required
+- make user-facing behavior explicit
+- do not delete native memory paths in this PR
+
+Acceptance:
+- memory refresh/clear actions still exist or are intentionally removed with docs
+- provenance behavior is explicit
+- invalidation rules are no longer ambiguous
+```
+
+#### PR 6 handoff prompt
+
+```text
+Implement PR 6 from docs/agentmemory-codex-memory-replacement-spec.md.
+
+Goal:
+- make agentmemory the only authoritative runtime memory backend
+- disable native memory generation and consolidation in normal runtime paths
+
+Constraints:
+- do not do broad dead-code deletion yet
+- keep rollback/debug switches until cutover is validated
+
+Acceptance:
+- one memory authority remains in runtime
+- no split-brain injection is possible in standard flows
+- native paths are gated off rather than accidentally still active
+```
+
+#### PR 7 handoff prompt
+
+```text
+Implement PR 7 from docs/agentmemory-codex-memory-replacement-spec.md.
+
+Goal:
+- perform post-cutover cleanup only after the hard replacement is stable
+
+Constraints:
+- prefer cleanup that reduces future rebase cost
+- do not remove rollback/debug tooling prematurely
+
+Acceptance:
+- dead native-memory paths are removed only when safe
+- cleanup does not create more rebase drag than it removes
+```
+
+#### Cross-PR reviewer prompt
+
+```text
+Review the current PR against docs/agentmemory-codex-memory-replacement-spec.md.
+
+Focus:
+- does this PR stay within its assigned write boundary
+- does it reduce or increase future rebase drag
+- does it preserve the hard-replacement target
+- does it accidentally introduce split-brain behavior
+- does it move the system toward maximum-performance agentmemory usage rather than a degraded fallback
+```
+
 ## Do not do
 
 - Do not run Codex native memory injection and `agentmemory` injection as

From 7651a9be717bcd37cd087639f7d0aaf653222e7f Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 26 Mar 2026 16:25:37 +0000
Subject: [PATCH 8/8] build(deps): bump pulldown-cmark from 0.10.3 to 0.13.3 in
 /codex-rs

Bumps [pulldown-cmark](https://github.com/raphlinus/pulldown-cmark) from 0.10.3 to 0.13.3.
- [Release notes](https://github.com/raphlinus/pulldown-cmark/releases)
- [Commits](https://github.com/raphlinus/pulldown-cmark/compare/v0.10.3...v0.13.3)

---
updated-dependencies:
- dependency-name: pulldown-cmark
  dependency-version: 0.13.3
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 codex-rs/Cargo.lock | 8 ++++----
 codex-rs/Cargo.toml | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock
index 7a554c2bc..f6b2790f1 100644
--- a/codex-rs/Cargo.lock
+++ b/codex-rs/Cargo.lock
@@ -7534,9 +7534,9 @@ checksum = "33cb294fe86a74cbcf50d4445b37da762029549ebeea341421c7c70370f86cac"
 
 [[package]]
 name = "pulldown-cmark"
-version = "0.10.3"
+version = "0.13.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "76979bea66e7875e7509c4ec5300112b316af87fa7a252ca91c448b32dfe3993"
+checksum = "7c3a14896dfa883796f1cb410461aef38810ea05f2b2c33c5aded3649095fdad"
 dependencies = [
  "bitflags 2.10.0",
  "getopts",
@@ -7547,9 +7547,9 @@ dependencies = [
 
 [[package]]
 name = "pulldown-cmark-escape"
-version = "0.10.1"
+version = "0.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bd348ff538bc9caeda7ee8cad2d1d48236a1f443c1fa3913c6a02fe0043b1dd3"
+checksum = "007d8adb5ddab6f8e3f491ac63566a7d5002cc7ed73901f72057943fa71ae1ae"
 
 [[package]]
 name = "pxfm"
diff --git a/codex-rs/Cargo.toml b/codex-rs/Cargo.toml
index 556976fc1..036761c78 100644
--- a/codex-rs/Cargo.toml
+++ b/codex-rs/Cargo.toml
@@ -261,7 +261,7 @@ pathdiff = "0.2"
 portable-pty = "0.9.0"
 predicates = "3"
 pretty_assertions = "1.4.1"
-pulldown-cmark = "0.10"
+pulldown-cmark = "0.13"
 quick-xml = "0.38.4"
 rand = "0.9"
 ratatui = "0.29.0"