From e3513b184c1163b27935470ba1cdfb328b12df72 Mon Sep 17 00:00:00 2001
From: Hanwen Cheng <heawen.cheng@gmail.com>
Date: Wed, 15 Apr 2026 11:34:44 +0800
Subject: [PATCH 1/3] "Claude PR Assistant workflow"

---
 .github/workflows/claude.yml | 50 ++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)
 create mode 100644 .github/workflows/claude.yml

diff --git a/.github/workflows/claude.yml b/.github/workflows/claude.yml
new file mode 100644
index 0000000..d300267
--- /dev/null
+++ b/.github/workflows/claude.yml
@@ -0,0 +1,50 @@
+name: Claude Code
+
+on:
+  issue_comment:
+    types: [created]
+  pull_request_review_comment:
+    types: [created]
+  issues:
+    types: [opened, assigned]
+  pull_request_review:
+    types: [submitted]
+
+jobs:
+  claude:
+    if: |
+      (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) ||
+      (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) ||
+      (github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) ||
+      (github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude')))
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      pull-requests: read
+      issues: read
+      id-token: write
+      actions: read # Required for Claude to read CI results on PRs
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 1
+
+      - name: Run Claude Code
+        id: claude
+        uses: anthropics/claude-code-action@v1
+        with:
+          claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
+
+          # This is an optional setting that allows Claude to read CI results on PRs
+          additional_permissions: |
+            actions: read
+
+          # Optional: Give a custom prompt to Claude. If this is not specified, Claude will perform the instructions specified in the comment that tagged it.
+          # prompt: 'Update the pull request description to include a summary of changes.'
+
+          # Optional: Add claude_args to customize behavior and configuration
+          # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md
+          # or https://code.claude.com/docs/en/cli-reference for available options
+          # claude_args: '--allowed-tools Bash(gh pr:*)'
+

From c15d5e13b900169c05dcbf1ab51c47012810f182 Mon Sep 17 00:00:00 2001
From: Hanwen Cheng <heawen.cheng@gmail.com>
Date: Wed, 15 Apr 2026 11:34:46 +0800
Subject: [PATCH 2/3] "Claude Code Review workflow"

---
 .github/workflows/claude-code-review.yml | 44 ++++++++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 .github/workflows/claude-code-review.yml

diff --git a/.github/workflows/claude-code-review.yml b/.github/workflows/claude-code-review.yml
new file mode 100644
index 0000000..b5e8cfd
--- /dev/null
+++ b/.github/workflows/claude-code-review.yml
@@ -0,0 +1,44 @@
+name: Claude Code Review
+
+on:
+  pull_request:
+    types: [opened, synchronize, ready_for_review, reopened]
+    # Optional: Only run on specific file changes
+    # paths:
+    #   - "src/**/*.ts"
+    #   - "src/**/*.tsx"
+    #   - "src/**/*.js"
+    #   - "src/**/*.jsx"
+
+jobs:
+  claude-review:
+    # Optional: Filter by PR author
+    # if: |
+    #   github.event.pull_request.user.login == 'external-contributor' ||
+    #   github.event.pull_request.user.login == 'new-developer' ||
+    #   github.event.pull_request.author_association == 'FIRST_TIME_CONTRIBUTOR'
+
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      pull-requests: read
+      issues: read
+      id-token: write
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 1
+
+      - name: Run Claude Code Review
+        id: claude-review
+        uses: anthropics/claude-code-action@v1
+        with:
+          claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
+          plugin_marketplaces: 'https://github.com/anthropics/claude-code.git'
+          plugins: 'code-review@claude-code-plugins'
+          prompt: '/code-review:code-review ${{ github.repository }}/pull/${{ github.event.pull_request.number }}'
+          # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md
+          # or https://code.claude.com/docs/en/cli-reference for available options
+

From 537ab462630074d9e7953c68f362ca6eb4a757bf Mon Sep 17 00:00:00 2001
From: Hanwen Cheng <heawen.cheng@gmail.com>
Date: Wed, 15 Apr 2026 11:42:00 +0800
Subject: [PATCH 3/3] ci: wire agentkeys-specific context into Claude Code
 workflows
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Augments the auto-generated Claude Code + Claude Code Review workflows
with context from 15+ PR review cycles in this repo so the Action
produces findings consistent with recent codex iterations instead of
generic Rust advice.

## claude-code-review.yml

- Scope `on.pull_request.paths` to `crates/**`, `docs/**`, `wiki/**`,
  workflows, Cargo.toml, CLAUDE.md, and harness/. Skips cheap Cargo.lock
  churn.
- `fetch-depth: 0` so Claude can inspect `git log` / `git blame` during
  review (useful for "this finding predates the PR" arguments).
- `dtolnay/rust-toolchain@stable` + `Swatinem/rust-cache@v2` so every
  `cargo check` / `cargo test -p <crate>` in-session runs fast.
- Custom prompt injects:
  - crate names (agentkeys-types, agentkeys-core, etc)
  - pointer to CLAUDE.md for architecture + mock-server design principles
  - pointer to the new .github/REVIEW_GUIDELINES.md for agentkeys-specific
    review patterns
  - `--test-threads=1` requirement (tests mutate shared HOME/keyring)
  - the 8-pattern checklist (audit-log DENIED rows, URL-encoding via
    reqwest .query(), session-token redaction, case-insensitive wallet
    comparison, 30-day TTL, synchronous keychain ops, path-traversal
    guards, cross-wallet credential safety)
- `claude_args --allowed-tools` whitelist for cargo/git/gh so the
  Action can actually run the cargo commands the prompt tells it to.

## claude.yml (@claude mentions)

- Same Rust toolchain + cache setup so `@claude run tests` /
  `@claude check clippy` requests don't pay cold-compile cost.
- `fetch-depth: 0` for git-history tools.
- Same `claude_args --allowed-tools` whitelist plus `gh pr comment:*` /
  `gh pr edit:*` so @claude can update PR bodies and comment back with
  findings.

## .github/REVIEW_GUIDELINES.md (new)

Single source of truth for agentkeys review patterns, extracted from
PRs #18-#38 (fix/issue-10 through fix/issue-37). Documents:

- Test constraints (`--test-threads=1`, per-crate targeting)
- 10 canonical bug patterns that codex has flagged repeatedly
- Architectural invariants (master→agent single-hop; no users yet)
- Scope-control guidance (no speculative refactors, no backwards-compat
  shims pre-launch)
- Policy for codex-vs-claude disagreements

Each pattern has a PR/issue reference so reviewers (and future Claude
runs) can trace why the rule exists.
---
 .github/REVIEW_GUIDELINES.md             | 177 +++++++++++++++++++++++
 .github/workflows/claude-code-review.yml |  77 ++++++++--
 .github/workflows/claude.yml             |  33 +++--
 3 files changed, 259 insertions(+), 28 deletions(-)
 create mode 100644 .github/REVIEW_GUIDELINES.md

diff --git a/.github/REVIEW_GUIDELINES.md b/.github/REVIEW_GUIDELINES.md
new file mode 100644
index 0000000..8f4a4f9
--- /dev/null
+++ b/.github/REVIEW_GUIDELINES.md
@@ -0,0 +1,177 @@
+# Review Guidelines — agentkeys
+
+This is the single source of truth for code review patterns in this repo. The
+`claude-code-review.yml` workflow points Claude at this file; human reviewers
+should also use it as a checklist.
+
+Background: these patterns were distilled from 15+ PR review cycles in
+March-April 2026 where codex repeatedly surfaced the same classes of bug. Each
+numbered item below has been a real P1 or P2 finding at least once.
+
+## Test constraints
+
+- **ALWAYS use `cargo test -p <crate> -- --test-threads=1`.** Tests mutate
+  shared process env (HOME, keyring accounts, AGENTKEYS_SESSION_STORE) and
+  parallel runs race. This is not negotiable.
+- Target only affected crates. `cargo test` on the whole workspace takes 5+
+  minutes and masks which crate actually regressed.
+- `cargo clippy -p <crate> -- -D warnings` should stay clean.
+- When adding FFI code (macOS LAContext, keychain), add both L2 FFI-boundary
+  tests behind `#[cfg(target_os = "macos")]` AND a `MockBackend` so the
+  non-FFI behavior stays testable on Linux CI.
+
+## Canonical bug patterns
+
+### 1. Cross-wallet credential leak on namespace collisions
+
+**Don't:** use raw user input as a filesystem directory or keyring account
+name.
+
+**Do:** sanitize via `sanitize_for_keyring(session_id)` in `session_store.rs`
+— ASCII alnum + `-_.` preserved, anything else replaced with `_`, rewrites
+suffixed with a SHA-256-derived 8-char hash under the reserved `__agk_`
+prefix. `DefaultHasher` is NOT stable across Rust versions; use `sha2`
+crate.
+
+Reference: PR #24 v6 P1 + v9, issue #37.
+
+### 2. Nondeterministic daemon session selection
+
+**Don't:** rely on `std::fs::read_dir` order when picking among multiple
+`daemon-*` sessions.
+
+**Do:** sort alphabetically via `list_fallback_session_ids` before selecting.
+If more than one loadable session exists and no `--session-id` is passed,
+error with the candidate list rather than picking arbitrarily.
+
+Reference: PR #24 v1 P1.
+
+### 3. URL encoding via reqwest `.query()`, never raw interpolation
+
+**Don't:**
+```rust
+let url = format!("/identity/resolve?identity_type=alias&identity_value={}", raw);
+```
+
+**Do:**
+```rust
+let resp = client.get(format!("{backend_url}/identity/resolve"))
+    .query(&[("identity_type", "alias"), ("identity_value", raw)])
+    .send()
+    .await?;
+```
+
+Aliases with `+`, `&`, `%`, spaces, plus-addressed emails (`bot+prod@…`)
+break under raw interpolation.
+
+Reference: PR #20, PR #22 v1 P2, PR #29.
+
+### 4. Session-token redaction in prompt / log strings
+
+**Don't:** echo `cmd_revoke`'s raw argument into error messages, biometric
+prompts, or stderr. The argument may be a live bearer token.
+
+**Do:** pipe user-supplied strings through `redact_prompt_reason()` (in
+`biometric/logic.rs`) — strips anything over 40 characters opaque while
+preserving short `0x…` wallet addresses.
+
+Reference: PR #27 P2 (session-token leak via biometric prompt), PR #38.
+
+### 5. Wallet comparison must be case-insensitive OR normalize before compare
+
+**Don't:** `session.wallet.0 == target_wallet_str` when `target_wallet_str`
+came from user input. EIP-55 checksummed addresses collide with the
+backend's lowercase storage.
+
+**Do:** either `eq_ignore_ascii_case` (revoke self-detection pattern in
+`cmd_revoke`) OR lowercase before sending the value to `/identity/resolve`
+(pattern in PR #22 v3 `resolve_parent_if_set`).
+
+Reference: PR #18 P2, PR #22 v2 P2.
+
+### 6. Session TTL is 30 days uniformly
+
+Master, agent, sandbox — all sessions are 30 days per `wiki/session-token.md`.
+Don't introduce per-type TTL splits; they were tried and reverted.
+
+Reference: PR #23.
+
+### 7. Keychain operations must be synchronous
+
+**Don't:** spawn a detached thread to delete a keyring entry. Callers use
+the return value to decide what to tell the user, and the CLI often exits
+before the background thread runs.
+
+**Do:** use the `try_keyring_save`/`try_keyring_load` pattern — spawn a
+thread, wait up to 2 seconds via `recv_timeout`, return a bool. Propagate
+failures as `anyhow::Error` from `clear_session`.
+
+Reference: PR #24 v8 P1 (fire-and-forget delete regression).
+
+### 8. Path traversal guards on user-supplied session_id / filename
+
+**Don't:** join user input directly into a filesystem path.
+
+**Do:** sanitize with the pattern from `crates/agentkeys-core/src/session_store.rs::sanitize_for_keyring`.
+Reject reserved path components (`""`, `"."`, `".."`) and anything starting
+with the reserved `__agk_` rewrite prefix — those are forced through the
+hash-suffix path so user inputs can't collide with rewrite outputs.
+
+Reference: PR #24 v2 P2, v6 P1.
+
+### 9. Audit log: DENIED rows for every cross-agent probing path
+
+New `/credential/*` endpoints that gate on `is_owner_of` must insert a
+`'denied'` audit row BEFORE returning 403. This keeps cross-agent probing
+visible.
+
+Check: `read_credential`, `store_credential`, `list_credentials`,
+`teardown_agent` all insert DENIED rows. New endpoints must match.
+
+Reference: PR #19 P2.
+
+### 10. Mock server design principles (from CLAUDE.md)
+
+- **Typed parameters**: every endpoint takes explicit typed inputs (e.g.,
+  `identity_type` + `identity_value`). No opaque JSON-blob parsing at
+  runtime. Blockchain extrinsics require typed inputs; the mock mirrors
+  that.
+- **Shared `resolve_identity`**: one utility in `handlers/identity.rs` —
+  never inline `if let Alias => ... else if Email => ...` chains per
+  handler.
+- **Modular handlers**: request-type-specific logic goes in separate
+  functions (`mint_pair_session`, `mint_recover_session`). `approve_auth_request`
+  is pure dispatch.
+
+Reference: CLAUDE.md → "Mock Server Design Principles".
+
+## Architectural facts to apply
+
+- **Master → agent is strictly one hop.** No grandchildren. `is_owner_of`
+  checks a single `parent_token` level and that's sufficient for the
+  current model. Reviewers should NOT file tickets about transitive
+  ownership unless the session model has grown recursive pairing, which
+  it has not. See closed issue #35.
+- **No users in production yet.** Pre-launch state. "Preserve pre-#N
+  legacy data on upgrade" findings get dismissed because there is no
+  legacy field state. When we ship, we'll add dedicated migration commits
+  with CHANGELOG entries.
+- **Commit message style**: `fix(<crate>): #<issue> — <subject>` or
+  `docs(<scope>): <subject>`. Version suffixes `v2`, `v3` etc for
+  iterative codex-review fixes on the same PR.
+
+## Scope control
+
+- `bug fix` PRs should NOT include refactors beyond the fix. If the diff
+  starts growing new abstractions, flag it as scope creep.
+- Don't add feature flags or backwards-compatibility shims unless the
+  project has users (see above — it does not).
+- Don't write tests that only re-verify framework guarantees (e.g.,
+  "tokio spawn returns a JoinHandle"). Test user-visible behavior.
+
+## When codex and Claude disagree
+
+If this repo's automated review (claude-code-action) and a manual codex
+review flag contradictory issues on the same line, prefer the pattern in
+this doc. If the doc is silent, surface the disagreement in the PR thread
+rather than picking a side silently — the author makes the call.
diff --git a/.github/workflows/claude-code-review.yml b/.github/workflows/claude-code-review.yml
index b5e8cfd..1a4be70 100644
--- a/.github/workflows/claude-code-review.yml
+++ b/.github/workflows/claude-code-review.yml
@@ -3,33 +3,44 @@ name: Claude Code Review
 on:
   pull_request:
     types: [opened, synchronize, ready_for_review, reopened]
-    # Optional: Only run on specific file changes
-    # paths:
-    #   - "src/**/*.ts"
-    #   - "src/**/*.tsx"
-    #   - "src/**/*.js"
-    #   - "src/**/*.jsx"
+    # Only run on code / test / doc paths we actually want reviewed.
+    # Skips cheap churn like Cargo.lock-only updates.
+    paths:
+      - "crates/**"
+      - "docs/**"
+      - "wiki/**"
+      - ".github/workflows/**"
+      - "Cargo.toml"
+      - "CLAUDE.md"
+      - "harness/**"
 
 jobs:
   claude-review:
-    # Optional: Filter by PR author
-    # if: |
-    #   github.event.pull_request.user.login == 'external-contributor' ||
-    #   github.event.pull_request.user.login == 'new-developer' ||
-    #   github.event.pull_request.author_association == 'FIRST_TIME_CONTRIBUTOR'
-
     runs-on: ubuntu-latest
     permissions:
       contents: read
       pull-requests: read
       issues: read
       id-token: write
+      actions: read
 
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4
         with:
-          fetch-depth: 1
+          fetch-depth: 0   # full history so Claude can inspect git log / blame during review
+
+      # Rust toolchain + cache so `cargo check` / `cargo test -p <crate>` runs fast.
+      # Every reviewer-invoked cargo command benefits.
+      - name: Install Rust toolchain
+        uses: dtolnay/rust-toolchain@stable
+        with:
+          components: clippy, rustfmt
+
+      - name: Cache cargo registry + target
+        uses: Swatinem/rust-cache@v2
+        with:
+          shared-key: "agentkeys-review"
 
       - name: Run Claude Code Review
         id: claude-review
@@ -38,7 +49,41 @@ jobs:
           claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
           plugin_marketplaces: 'https://github.com/anthropics/claude-code.git'
           plugins: 'code-review@claude-code-plugins'
-          prompt: '/code-review:code-review ${{ github.repository }}/pull/${{ github.event.pull_request.number }}'
-          # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md
-          # or https://code.claude.com/docs/en/cli-reference for available options
+          # Custom review prompt tailored to agentkeys' Rust workspace + known
+          # review patterns from past codex iterations. See
+          # .github/REVIEW_GUIDELINES.md for the full pattern catalog.
+          prompt: |
+            /code-review:code-review ${{ github.repository }}/pull/${{ github.event.pull_request.number }}
+
+            REPO CONTEXT:
+            - Rust workspace. Workspace-member crates: agentkeys-types, agentkeys-core,
+              agentkeys-cli, agentkeys-daemon, agentkeys-mock-server, agentkeys-mcp,
+              agentkeys-provisioner.
+            - READ `CLAUDE.md` at repo root for architecture, mock-server design
+              principles, and test commands.
+            - READ `.github/REVIEW_GUIDELINES.md` for agentkeys-specific review
+              patterns (audit-log contract, session-token redaction, URL encoding
+              via reqwest `.query()`, `--test-threads=1` requirement, etc).
+            - Related specs: `docs/spec/architecture.md`,
+              `docs/spec/credential-backend-interface.md`,
+              `wiki/session-token.md` (30-day TTL policy).
+
+            TEST CONSTRAINTS:
+            - Tests mutate shared process state (HOME, keyring accounts) so
+              ALWAYS use `cargo test -p <crate> -- --test-threads=1`.
+            - Target only affected crates, not the whole workspace.
+            - `cargo clippy -p <crate> -- -D warnings` is expected clean.
+
+            KNOWN REVIEW PATTERNS (apply these before accepting a PR):
+            1. Cross-wallet credential safety (namespacing, sanitization).
+            2. Audit log DENIED rows for all cross-agent probing paths.
+            3. URL encoding via reqwest `.query()` — NEVER raw `format!()`
+               interpolation into query strings.
+            4. Token / session-token redaction in prompts and log lines.
+            5. Case-insensitive wallet comparison (EIP-55 vs backend lowercase).
+            6. Session TTL uniformly 30 days per `wiki/session-token.md`.
+            7. Synchronous keychain ops — no fire-and-forget delete.
+            8. Path traversal guards on any user-supplied session_id / filename.
 
+          claude_args: |
+            --allowed-tools Bash(cargo check:*),Bash(cargo test:*),Bash(cargo clippy:*),Bash(cargo fmt:*),Bash(git diff:*),Bash(git log:*),Bash(git blame:*),Bash(gh pr:*),Bash(gh issue view:*)
diff --git a/.github/workflows/claude.yml b/.github/workflows/claude.yml
index d300267..5b7c0f2 100644
--- a/.github/workflows/claude.yml
+++ b/.github/workflows/claude.yml
@@ -23,28 +23,37 @@ jobs:
       pull-requests: read
       issues: read
       id-token: write
-      actions: read # Required for Claude to read CI results on PRs
+      actions: read   # required for Claude to read CI results on PRs
+
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4
         with:
-          fetch-depth: 1
+          fetch-depth: 0   # full history so `git log` / `git blame` work in-session
+
+      # Rust toolchain + cache so `@claude test` / `@claude check clippy`
+      # style requests don't need to pay cold-compile cost.
+      - name: Install Rust toolchain
+        uses: dtolnay/rust-toolchain@stable
+        with:
+          components: clippy, rustfmt
+
+      - name: Cache cargo registry + target
+        uses: Swatinem/rust-cache@v2
+        with:
+          shared-key: "agentkeys-review"
 
       - name: Run Claude Code
         id: claude
         uses: anthropics/claude-code-action@v1
         with:
           claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
-
-          # This is an optional setting that allows Claude to read CI results on PRs
           additional_permissions: |
             actions: read
 
-          # Optional: Give a custom prompt to Claude. If this is not specified, Claude will perform the instructions specified in the comment that tagged it.
-          # prompt: 'Update the pull request description to include a summary of changes.'
-
-          # Optional: Add claude_args to customize behavior and configuration
-          # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md
-          # or https://code.claude.com/docs/en/cli-reference for available options
-          # claude_args: '--allowed-tools Bash(gh pr:*)'
-
+          # Repo-specific allow-list so @claude requests can actually run
+          # tests / lint / diff without hitting permission prompts.
+          # `--test-threads=1` is required because our test suites mutate
+          # shared process env (HOME, keyring accounts).
+          claude_args: |
+            --allowed-tools Bash(cargo check:*),Bash(cargo test:*),Bash(cargo clippy:*),Bash(cargo fmt:*),Bash(git diff:*),Bash(git log:*),Bash(git blame:*),Bash(gh pr:*),Bash(gh issue view:*),Bash(gh pr comment:*),Bash(gh pr edit:*)