From e3513b184c1163b27935470ba1cdfb328b12df72 Mon Sep 17 00:00:00 2001 From: Hanwen Cheng Date: Wed, 15 Apr 2026 11:34:44 +0800 Subject: [PATCH 1/3] "Claude PR Assistant workflow" --- .github/workflows/claude.yml | 50 ++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 .github/workflows/claude.yml diff --git a/.github/workflows/claude.yml b/.github/workflows/claude.yml new file mode 100644 index 0000000..d300267 --- /dev/null +++ b/.github/workflows/claude.yml @@ -0,0 +1,50 @@ +name: Claude Code + +on: + issue_comment: + types: [created] + pull_request_review_comment: + types: [created] + issues: + types: [opened, assigned] + pull_request_review: + types: [submitted] + +jobs: + claude: + if: | + (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) || + (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) || + (github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) || + (github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude'))) + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: read + issues: read + id-token: write + actions: read # Required for Claude to read CI results on PRs + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Run Claude Code + id: claude + uses: anthropics/claude-code-action@v1 + with: + claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} + + # This is an optional setting that allows Claude to read CI results on PRs + additional_permissions: | + actions: read + + # Optional: Give a custom prompt to Claude. If this is not specified, Claude will perform the instructions specified in the comment that tagged it. + # prompt: 'Update the pull request description to include a summary of changes.' + + # Optional: Add claude_args to customize behavior and configuration + # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md + # or https://code.claude.com/docs/en/cli-reference for available options + # claude_args: '--allowed-tools Bash(gh pr:*)' + From c15d5e13b900169c05dcbf1ab51c47012810f182 Mon Sep 17 00:00:00 2001 From: Hanwen Cheng Date: Wed, 15 Apr 2026 11:34:46 +0800 Subject: [PATCH 2/3] "Claude Code Review workflow" --- .github/workflows/claude-code-review.yml | 44 ++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 .github/workflows/claude-code-review.yml diff --git a/.github/workflows/claude-code-review.yml b/.github/workflows/claude-code-review.yml new file mode 100644 index 0000000..b5e8cfd --- /dev/null +++ b/.github/workflows/claude-code-review.yml @@ -0,0 +1,44 @@ +name: Claude Code Review + +on: + pull_request: + types: [opened, synchronize, ready_for_review, reopened] + # Optional: Only run on specific file changes + # paths: + # - "src/**/*.ts" + # - "src/**/*.tsx" + # - "src/**/*.js" + # - "src/**/*.jsx" + +jobs: + claude-review: + # Optional: Filter by PR author + # if: | + # github.event.pull_request.user.login == 'external-contributor' || + # github.event.pull_request.user.login == 'new-developer' || + # github.event.pull_request.author_association == 'FIRST_TIME_CONTRIBUTOR' + + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: read + issues: read + id-token: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Run Claude Code Review + id: claude-review + uses: anthropics/claude-code-action@v1 + with: + claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} + plugin_marketplaces: 'https://github.com/anthropics/claude-code.git' + plugins: 'code-review@claude-code-plugins' + prompt: '/code-review:code-review ${{ github.repository }}/pull/${{ github.event.pull_request.number }}' + # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md + # or https://code.claude.com/docs/en/cli-reference for available options + From 537ab462630074d9e7953c68f362ca6eb4a757bf Mon Sep 17 00:00:00 2001 From: Hanwen Cheng Date: Wed, 15 Apr 2026 11:42:00 +0800 Subject: [PATCH 3/3] ci: wire agentkeys-specific context into Claude Code workflows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Augments the auto-generated Claude Code + Claude Code Review workflows with context from 15+ PR review cycles in this repo so the Action produces findings consistent with recent codex iterations instead of generic Rust advice. ## claude-code-review.yml - Scope `on.pull_request.paths` to `crates/**`, `docs/**`, `wiki/**`, workflows, Cargo.toml, CLAUDE.md, and harness/. Skips cheap Cargo.lock churn. - `fetch-depth: 0` so Claude can inspect `git log` / `git blame` during review (useful for "this finding predates the PR" arguments). - `dtolnay/rust-toolchain@stable` + `Swatinem/rust-cache@v2` so every `cargo check` / `cargo test -p ` in-session runs fast. - Custom prompt injects: - crate names (agentkeys-types, agentkeys-core, etc) - pointer to CLAUDE.md for architecture + mock-server design principles - pointer to the new .github/REVIEW_GUIDELINES.md for agentkeys-specific review patterns - `--test-threads=1` requirement (tests mutate shared HOME/keyring) - the 8-pattern checklist (audit-log DENIED rows, URL-encoding via reqwest .query(), session-token redaction, case-insensitive wallet comparison, 30-day TTL, synchronous keychain ops, path-traversal guards, cross-wallet credential safety) - `claude_args --allowed-tools` whitelist for cargo/git/gh so the Action can actually run the cargo commands the prompt tells it to. ## claude.yml (@claude mentions) - Same Rust toolchain + cache setup so `@claude run tests` / `@claude check clippy` requests don't pay cold-compile cost. - `fetch-depth: 0` for git-history tools. - Same `claude_args --allowed-tools` whitelist plus `gh pr comment:*` / `gh pr edit:*` so @claude can update PR bodies and comment back with findings. ## .github/REVIEW_GUIDELINES.md (new) Single source of truth for agentkeys review patterns, extracted from PRs #18-#38 (fix/issue-10 through fix/issue-37). Documents: - Test constraints (`--test-threads=1`, per-crate targeting) - 10 canonical bug patterns that codex has flagged repeatedly - Architectural invariants (master→agent single-hop; no users yet) - Scope-control guidance (no speculative refactors, no backwards-compat shims pre-launch) - Policy for codex-vs-claude disagreements Each pattern has a PR/issue reference so reviewers (and future Claude runs) can trace why the rule exists. --- .github/REVIEW_GUIDELINES.md | 177 +++++++++++++++++++++++ .github/workflows/claude-code-review.yml | 77 ++++++++-- .github/workflows/claude.yml | 33 +++-- 3 files changed, 259 insertions(+), 28 deletions(-) create mode 100644 .github/REVIEW_GUIDELINES.md diff --git a/.github/REVIEW_GUIDELINES.md b/.github/REVIEW_GUIDELINES.md new file mode 100644 index 0000000..8f4a4f9 --- /dev/null +++ b/.github/REVIEW_GUIDELINES.md @@ -0,0 +1,177 @@ +# Review Guidelines — agentkeys + +This is the single source of truth for code review patterns in this repo. The +`claude-code-review.yml` workflow points Claude at this file; human reviewers +should also use it as a checklist. + +Background: these patterns were distilled from 15+ PR review cycles in +March-April 2026 where codex repeatedly surfaced the same classes of bug. Each +numbered item below has been a real P1 or P2 finding at least once. + +## Test constraints + +- **ALWAYS use `cargo test -p -- --test-threads=1`.** Tests mutate + shared process env (HOME, keyring accounts, AGENTKEYS_SESSION_STORE) and + parallel runs race. This is not negotiable. +- Target only affected crates. `cargo test` on the whole workspace takes 5+ + minutes and masks which crate actually regressed. +- `cargo clippy -p -- -D warnings` should stay clean. +- When adding FFI code (macOS LAContext, keychain), add both L2 FFI-boundary + tests behind `#[cfg(target_os = "macos")]` AND a `MockBackend` so the + non-FFI behavior stays testable on Linux CI. + +## Canonical bug patterns + +### 1. Cross-wallet credential leak on namespace collisions + +**Don't:** use raw user input as a filesystem directory or keyring account +name. + +**Do:** sanitize via `sanitize_for_keyring(session_id)` in `session_store.rs` +— ASCII alnum + `-_.` preserved, anything else replaced with `_`, rewrites +suffixed with a SHA-256-derived 8-char hash under the reserved `__agk_` +prefix. `DefaultHasher` is NOT stable across Rust versions; use `sha2` +crate. + +Reference: PR #24 v6 P1 + v9, issue #37. + +### 2. Nondeterministic daemon session selection + +**Don't:** rely on `std::fs::read_dir` order when picking among multiple +`daemon-*` sessions. + +**Do:** sort alphabetically via `list_fallback_session_ids` before selecting. +If more than one loadable session exists and no `--session-id` is passed, +error with the candidate list rather than picking arbitrarily. + +Reference: PR #24 v1 P1. + +### 3. URL encoding via reqwest `.query()`, never raw interpolation + +**Don't:** +```rust +let url = format!("/identity/resolve?identity_type=alias&identity_value={}", raw); +``` + +**Do:** +```rust +let resp = client.get(format!("{backend_url}/identity/resolve")) + .query(&[("identity_type", "alias"), ("identity_value", raw)]) + .send() + .await?; +``` + +Aliases with `+`, `&`, `%`, spaces, plus-addressed emails (`bot+prod@…`) +break under raw interpolation. + +Reference: PR #20, PR #22 v1 P2, PR #29. + +### 4. Session-token redaction in prompt / log strings + +**Don't:** echo `cmd_revoke`'s raw argument into error messages, biometric +prompts, or stderr. The argument may be a live bearer token. + +**Do:** pipe user-supplied strings through `redact_prompt_reason()` (in +`biometric/logic.rs`) — strips anything over 40 characters opaque while +preserving short `0x…` wallet addresses. + +Reference: PR #27 P2 (session-token leak via biometric prompt), PR #38. + +### 5. Wallet comparison must be case-insensitive OR normalize before compare + +**Don't:** `session.wallet.0 == target_wallet_str` when `target_wallet_str` +came from user input. EIP-55 checksummed addresses collide with the +backend's lowercase storage. + +**Do:** either `eq_ignore_ascii_case` (revoke self-detection pattern in +`cmd_revoke`) OR lowercase before sending the value to `/identity/resolve` +(pattern in PR #22 v3 `resolve_parent_if_set`). + +Reference: PR #18 P2, PR #22 v2 P2. + +### 6. Session TTL is 30 days uniformly + +Master, agent, sandbox — all sessions are 30 days per `wiki/session-token.md`. +Don't introduce per-type TTL splits; they were tried and reverted. + +Reference: PR #23. + +### 7. Keychain operations must be synchronous + +**Don't:** spawn a detached thread to delete a keyring entry. Callers use +the return value to decide what to tell the user, and the CLI often exits +before the background thread runs. + +**Do:** use the `try_keyring_save`/`try_keyring_load` pattern — spawn a +thread, wait up to 2 seconds via `recv_timeout`, return a bool. Propagate +failures as `anyhow::Error` from `clear_session`. + +Reference: PR #24 v8 P1 (fire-and-forget delete regression). + +### 8. Path traversal guards on user-supplied session_id / filename + +**Don't:** join user input directly into a filesystem path. + +**Do:** sanitize with the pattern from `crates/agentkeys-core/src/session_store.rs::sanitize_for_keyring`. +Reject reserved path components (`""`, `"."`, `".."`) and anything starting +with the reserved `__agk_` rewrite prefix — those are forced through the +hash-suffix path so user inputs can't collide with rewrite outputs. + +Reference: PR #24 v2 P2, v6 P1. + +### 9. Audit log: DENIED rows for every cross-agent probing path + +New `/credential/*` endpoints that gate on `is_owner_of` must insert a +`'denied'` audit row BEFORE returning 403. This keeps cross-agent probing +visible. + +Check: `read_credential`, `store_credential`, `list_credentials`, +`teardown_agent` all insert DENIED rows. New endpoints must match. + +Reference: PR #19 P2. + +### 10. Mock server design principles (from CLAUDE.md) + +- **Typed parameters**: every endpoint takes explicit typed inputs (e.g., + `identity_type` + `identity_value`). No opaque JSON-blob parsing at + runtime. Blockchain extrinsics require typed inputs; the mock mirrors + that. +- **Shared `resolve_identity`**: one utility in `handlers/identity.rs` — + never inline `if let Alias => ... else if Email => ...` chains per + handler. +- **Modular handlers**: request-type-specific logic goes in separate + functions (`mint_pair_session`, `mint_recover_session`). `approve_auth_request` + is pure dispatch. + +Reference: CLAUDE.md → "Mock Server Design Principles". + +## Architectural facts to apply + +- **Master → agent is strictly one hop.** No grandchildren. `is_owner_of` + checks a single `parent_token` level and that's sufficient for the + current model. Reviewers should NOT file tickets about transitive + ownership unless the session model has grown recursive pairing, which + it has not. See closed issue #35. +- **No users in production yet.** Pre-launch state. "Preserve pre-#N + legacy data on upgrade" findings get dismissed because there is no + legacy field state. When we ship, we'll add dedicated migration commits + with CHANGELOG entries. +- **Commit message style**: `fix(): #` or + `docs(): `. Version suffixes `v2`, `v3` etc for + iterative codex-review fixes on the same PR. + +## Scope control + +- `bug fix` PRs should NOT include refactors beyond the fix. If the diff + starts growing new abstractions, flag it as scope creep. +- Don't add feature flags or backwards-compatibility shims unless the + project has users (see above — it does not). +- Don't write tests that only re-verify framework guarantees (e.g., + "tokio spawn returns a JoinHandle"). Test user-visible behavior. + +## When codex and Claude disagree + +If this repo's automated review (claude-code-action) and a manual codex +review flag contradictory issues on the same line, prefer the pattern in +this doc. If the doc is silent, surface the disagreement in the PR thread +rather than picking a side silently — the author makes the call. diff --git a/.github/workflows/claude-code-review.yml b/.github/workflows/claude-code-review.yml index b5e8cfd..1a4be70 100644 --- a/.github/workflows/claude-code-review.yml +++ b/.github/workflows/claude-code-review.yml @@ -3,33 +3,44 @@ name: Claude Code Review on: pull_request: types: [opened, synchronize, ready_for_review, reopened] - # Optional: Only run on specific file changes - # paths: - # - "src/**/*.ts" - # - "src/**/*.tsx" - # - "src/**/*.js" - # - "src/**/*.jsx" + # Only run on code / test / doc paths we actually want reviewed. + # Skips cheap churn like Cargo.lock-only updates. + paths: + - "crates/**" + - "docs/**" + - "wiki/**" + - ".github/workflows/**" + - "Cargo.toml" + - "CLAUDE.md" + - "harness/**" jobs: claude-review: - # Optional: Filter by PR author - # if: | - # github.event.pull_request.user.login == 'external-contributor' || - # github.event.pull_request.user.login == 'new-developer' || - # github.event.pull_request.author_association == 'FIRST_TIME_CONTRIBUTOR' - runs-on: ubuntu-latest permissions: contents: read pull-requests: read issues: read id-token: write + actions: read steps: - name: Checkout repository uses: actions/checkout@v4 with: - fetch-depth: 1 + fetch-depth: 0 # full history so Claude can inspect git log / blame during review + + # Rust toolchain + cache so `cargo check` / `cargo test -p ` runs fast. + # Every reviewer-invoked cargo command benefits. + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + with: + components: clippy, rustfmt + + - name: Cache cargo registry + target + uses: Swatinem/rust-cache@v2 + with: + shared-key: "agentkeys-review" - name: Run Claude Code Review id: claude-review @@ -38,7 +49,41 @@ jobs: claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} plugin_marketplaces: 'https://github.com/anthropics/claude-code.git' plugins: 'code-review@claude-code-plugins' - prompt: '/code-review:code-review ${{ github.repository }}/pull/${{ github.event.pull_request.number }}' - # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md - # or https://code.claude.com/docs/en/cli-reference for available options + # Custom review prompt tailored to agentkeys' Rust workspace + known + # review patterns from past codex iterations. See + # .github/REVIEW_GUIDELINES.md for the full pattern catalog. + prompt: | + /code-review:code-review ${{ github.repository }}/pull/${{ github.event.pull_request.number }} + + REPO CONTEXT: + - Rust workspace. Workspace-member crates: agentkeys-types, agentkeys-core, + agentkeys-cli, agentkeys-daemon, agentkeys-mock-server, agentkeys-mcp, + agentkeys-provisioner. + - READ `CLAUDE.md` at repo root for architecture, mock-server design + principles, and test commands. + - READ `.github/REVIEW_GUIDELINES.md` for agentkeys-specific review + patterns (audit-log contract, session-token redaction, URL encoding + via reqwest `.query()`, `--test-threads=1` requirement, etc). + - Related specs: `docs/spec/architecture.md`, + `docs/spec/credential-backend-interface.md`, + `wiki/session-token.md` (30-day TTL policy). + + TEST CONSTRAINTS: + - Tests mutate shared process state (HOME, keyring accounts) so + ALWAYS use `cargo test -p -- --test-threads=1`. + - Target only affected crates, not the whole workspace. + - `cargo clippy -p -- -D warnings` is expected clean. + + KNOWN REVIEW PATTERNS (apply these before accepting a PR): + 1. Cross-wallet credential safety (namespacing, sanitization). + 2. Audit log DENIED rows for all cross-agent probing paths. + 3. URL encoding via reqwest `.query()` — NEVER raw `format!()` + interpolation into query strings. + 4. Token / session-token redaction in prompts and log lines. + 5. Case-insensitive wallet comparison (EIP-55 vs backend lowercase). + 6. Session TTL uniformly 30 days per `wiki/session-token.md`. + 7. Synchronous keychain ops — no fire-and-forget delete. + 8. Path traversal guards on any user-supplied session_id / filename. + claude_args: | + --allowed-tools Bash(cargo check:*),Bash(cargo test:*),Bash(cargo clippy:*),Bash(cargo fmt:*),Bash(git diff:*),Bash(git log:*),Bash(git blame:*),Bash(gh pr:*),Bash(gh issue view:*) diff --git a/.github/workflows/claude.yml b/.github/workflows/claude.yml index d300267..5b7c0f2 100644 --- a/.github/workflows/claude.yml +++ b/.github/workflows/claude.yml @@ -23,28 +23,37 @@ jobs: pull-requests: read issues: read id-token: write - actions: read # Required for Claude to read CI results on PRs + actions: read # required for Claude to read CI results on PRs + steps: - name: Checkout repository uses: actions/checkout@v4 with: - fetch-depth: 1 + fetch-depth: 0 # full history so `git log` / `git blame` work in-session + + # Rust toolchain + cache so `@claude test` / `@claude check clippy` + # style requests don't need to pay cold-compile cost. + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + with: + components: clippy, rustfmt + + - name: Cache cargo registry + target + uses: Swatinem/rust-cache@v2 + with: + shared-key: "agentkeys-review" - name: Run Claude Code id: claude uses: anthropics/claude-code-action@v1 with: claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} - - # This is an optional setting that allows Claude to read CI results on PRs additional_permissions: | actions: read - # Optional: Give a custom prompt to Claude. If this is not specified, Claude will perform the instructions specified in the comment that tagged it. - # prompt: 'Update the pull request description to include a summary of changes.' - - # Optional: Add claude_args to customize behavior and configuration - # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md - # or https://code.claude.com/docs/en/cli-reference for available options - # claude_args: '--allowed-tools Bash(gh pr:*)' - + # Repo-specific allow-list so @claude requests can actually run + # tests / lint / diff without hitting permission prompts. + # `--test-threads=1` is required because our test suites mutate + # shared process env (HOME, keyring accounts). + claude_args: | + --allowed-tools Bash(cargo check:*),Bash(cargo test:*),Bash(cargo clippy:*),Bash(cargo fmt:*),Bash(git diff:*),Bash(git log:*),Bash(git blame:*),Bash(gh pr:*),Bash(gh issue view:*),Bash(gh pr comment:*),Bash(gh pr edit:*)