litentry · hanwencheng · Apr 20, 2026 · Apr 20, 2026 · Apr 20, 2026 · Apr 20, 2026
diff --git a/.gitignore b/.gitignore
@@ -5,3 +5,4 @@
 .omc
 .obsidian
 /docs/test-screenshots/
+.gstack/
diff --git a/crates/agentkeys-provisioner/src/orchestrator.rs b/crates/agentkeys-provisioner/src/orchestrator.rs
@@ -1,14 +1,14 @@
 use std::collections::HashMap;
-use std::path::Path;
+use std::path::{Path, PathBuf};
 use std::sync::{Arc, Mutex};
-use std::time::Instant;
+use std::time::{Instant, SystemTime, UNIX_EPOCH};
 
 use agentkeys_core::backend::CredentialBackend;
 use agentkeys_types::{ProvisionEvent, ServiceName, Session, TripwireKind, WalletAddress};
 
 use crate::error::{ProvisionError, ProvisionResult};
 use crate::metrics::{self, ProvisionMetric, VerificationResultLabel};
-use crate::subprocess::{spawn_and_collect, SubprocessConfig};
+use crate::subprocess::{spawn_and_collect, SubprocessConfig, SubprocessOutcome};
 
 #[derive(Debug, Clone)]
 pub struct ActiveProvision {
@@ -86,6 +86,37 @@ impl Drop for ProvisionGuard {
     }
 }
 
+/// Best-effort dump of subprocess output to `~/.agentkeys/logs/provision-<service>-<ts>.log`.
+/// Returns the file path if the write succeeded. Never errors — failure to write the log
+/// must not mask the underlying provision failure.
+fn write_provision_log(service: &str, outcome: &SubprocessOutcome) -> Option<PathBuf> {
+    let home = std::env::var("HOME").ok().map(PathBuf::from)?;
+    let dir = home.join(".agentkeys").join("logs");
+    std::fs::create_dir_all(&dir).ok()?;
+    let ts = SystemTime::now().duration_since(UNIX_EPOCH).ok()?.as_secs();
+    let safe_service: String = service
+        .chars()
+        .map(|c| if c.is_ascii_alphanumeric() || c == '-' || c == '_' { c } else { '_' })
+        .collect();
+    let path = dir.join(format!("provision-{}-{}.log", safe_service, ts));
+
+    let mut body = String::new();
+    body.push_str(&format!(
+        "service: {}\nexit_code: {:?}\nevents_emitted: {}\n\n=== subprocess stdout events ===\n",
+        service,
+        outcome.exit_code,
+        outcome.events.len()
+    ));
+    for ev in &outcome.events {
+        body.push_str(&format!("{:?}\n", ev));
+    }
+    body.push_str("\n=== subprocess stderr ===\n");
+    body.push_str(&outcome.stderr);
+
+    std::fs::write(&path, body).ok()?;
+    Some(path)
+}
+
 /// Returns first 8 chars + `****...` + last 4. For keys shorter than 12 chars returns `****`.
 pub fn mask_key(key: &str) -> String {
     if key.len() < 12 {
@@ -190,7 +221,26 @@ pub async fn run_provision(
     }
 
     let raw_key = api_key.ok_or_else(|| {
-        ProvisionError::Internal("subprocess ended without terminal event".to_string())
+        let stderr_tail: String = outcome
+            .stderr
+            .lines()
+            .rev()
+            .take(20)
+            .collect::<Vec<_>>()
+            .into_iter()
+            .rev()
+            .collect::<Vec<_>>()
+            .join("\n");
+        let log_hint = match write_provision_log(service, &outcome) {
+            Some(path) => format!("full log: {}", path.display()),
+            None => "full log: (unable to write ~/.agentkeys/logs — check HOME + permissions)".to_string(),
+        };
+        ProvisionError::Internal(format!(
+            "subprocess ended without terminal event (exit {:?}). {}. stderr tail:\n{}",
+            outcome.exit_code,
+            log_hint,
+            if stderr_tail.is_empty() { "(empty)" } else { stderr_tail.as_str() }
+        ))
     })?;
 
     let masked = mask_key(&raw_key);

diff --git a/docs/manual-test-stage5.md b/docs/manual-test-stage5.md
diff --git a/docs/manual-test-stage6.md b/docs/manual-test-stage6.md
@@ -0,0 +1,177 @@
+# Stage 6 Manual Test Guide
+
+**Prerequisite:** Stage 6 SES stack deployed OR the Stage 6 mock equivalent running locally (see [`docs/spec/plans/development-stages.md`](./spec/plans/development-stages.md) §Stage 6). Rust toolchain installed, Node.js 20+.
+
+> **Scope.** This guide covers Stage 6 — **Federated Own Email on `@agentkeys-email.io`**. It does NOT cover Stage 5b's CDP-scraper retest beyond the handoff section at the end — that's done per [`docs/manual-test-stage5.md`](./manual-test-stage5.md) §3 once the Stage 6 pieces this document covers are in place.
+
+Stage 6 has two tests that matter:
+
+1. **Throwaway inbox provisioning** — `agentkeys inbox provision` mints a fresh `<id>@agentkeys-email.io`, mail sent to it lands in the right S3 prefix, fetchable by the agent with correctly-tagged creds. This is what makes Stage 5b's live-demo re-run unblockable.
+2. **Per-user isolation** — agent A cannot read agent B's mail. Enforced by `aws:PrincipalTag/agentkeys_user_wallet` on the shared `agentkeys-mail` bucket.
+
+---
+
+## 1. Preflight
+
+```bash
+# Rust + Node + provisioner deps built
+cd ~/Projects/agentkeys
+cargo build --workspace --release
+npm install --prefix provisioner-scripts
+
+# Backend (mock or real TEE endpoint)
+BACKEND="${BACKEND:-http://127.0.0.1:8090}"
+curl -sf "$BACKEND/health" >/dev/null || {
+  echo "backend not up — run: cargo run --release -p agentkeys-mock-server -- --port 8090 &"
+  exit 1
+}
+
+BIN=$(pwd)/target/release/agentkeys
+$BIN --backend $BACKEND init --mock-token stage6-demo
+```
+
+**For the real SES path (not mock):** you also need:
+- DNS for `agentkeys-email.io` published (MX, Ed25519 DKIM CNAMEs, SPF, DMARC) per `docs/spec/ses-email-architecture.md`
+- S3 bucket `agentkeys-mail` with PrincipalTag-conditioned bucket policy
+- IAM OIDC provider `oidc.agentkeys.dev` registered
+- IAM role `agentkeys-agent` with trust policy on the OIDC provider + MRSIGNER pinning
+- TEE reachable + `derive("dkim/agentkeys-email.io/v1")` and `derive("oidc/issuer/v1")` subkeys present
+
+The mock equivalent stubs all of that to SQLite + a local HTTP server; both paths honor the same CLI surface.
+
+---
+
+## 2. The demo — provision a throwaway inbox + verify mail delivery
+
+```bash
+# Provision a fresh inbox (returns { address, agent_wallet })
+$BIN --backend $BACKEND inbox provision --agent my-agent
+# → e.g.: { "address": "bot-ax7kq@agentkeys-email.io", "agent_wallet": "0x..." }
+```
+
+Save the address:
+
+```bash
+INBOX=$(<output-from-above>)
+echo "provisioned: $INBOX"
+```
+
+**Send a test message** from any external mail source (use your own Gmail, Mailgun sandbox, or a `curl` to your own SMTP test relay):
+
+```bash
+# Example via `mail` on macOS (requires a local SMTP configured)
+echo "stage-6 test body" | mail -s "stage-6-$RANDOM" "$INBOX"
+```
+
+Or send from a real service's signup form if you want to validate end-to-end (e.g. paste `$INBOX` into the signup form at a simpler service and trigger a verification email).
+
+**Read the mail** — Claude drives the daemon:
+
+```bash
+$BIN --backend $BACKEND run my-agent -- \
+  claude-mcp-client email.list 2>&1 | jq
+# → shows the test message + its body
+
+$BIN --backend $BACKEND run my-agent -- \
+  claude-mcp-client email.get --id <msg-id-from-above>
+# → full MIME + parsed body
+```
+
+### Expected behavior
+
+1. `inbox provision` exits 0 and returns a JSON object with a `.address` matching `^bot-[a-z0-9]{6}@agentkeys-email.io$` (or whatever shape §Stage 6 commits to; acceptance is "distinct local-part per call, no plus-aliases").
+2. `email.list` from the agent returns a non-empty array within ≤30s of the test message being sent. The item's `to` field matches `$INBOX`.
+3. The raw MIME in `s3://agentkeys-mail/<agent_wallet>/<inbox_address>/*.eml` is present — verify with `aws s3 ls --profile agentkeys` or whatever read mechanism your deployment uses.
+4. An on-chain audit extrinsic `CredentialMinted` was emitted for the `s3.read` mint that serviced `email.list`.
+
+### Failure modes to watch for
+
+- **`inbox provision` 400 / "domain not verified"** — SES identity for `agentkeys-email.io` is not verified in this AWS account. Run `aws sesv2 get-email-identity --email-identity agentkeys-email.io`; if the DKIM / Identity records aren't `PENDING` → `SUCCESS`, publish DNS or re-trigger verification.
+- **Mail never appears in S3** — DNS MX record not pointing at `inbound-smtp.us-east-1.amazonaws.com`, OR the SES receipt rule isn't active, OR the S3 bucket's bucket policy denies SES write. `aws logs tail /aws/ses/<rule-set>` to debug.
+- **`email.list` returns AccessDenied** — the agent's minted JWT didn't carry `agentkeys_user_wallet` as a session tag, OR bucket policy's `${aws:PrincipalTag/agentkeys_user_wallet}` condition didn't match. Inspect the temp-cred claims via `aws sts get-caller-identity --profile <temp>`.
+- **DKIM fails at recipient** — the outbound-path test. Send a message from your agent to a real Gmail inbox; if it lands in Spam with "DKIM: fail", either (a) the Ed25519 DKIM key at `derive("dkim/agentkeys-email.io/v1")` doesn't match what's published in DNS, or (b) the DKIM header isn't being added before SES hands the MIME to the outbound MTA.
+
+---
+
+## 3. Per-user isolation test
+
+```bash
+# Provision a second agent + inbox
+$BIN --backend $BACKEND inbox provision --agent other-agent
+OTHER_WALLET=$(... extract the wallet from the previous output)
+
+# First agent attempts to read second agent's inbox prefix — must fail
+$BIN --backend $BACKEND run my-agent -- \
+  claude-mcp-client email.list --agent "$OTHER_WALLET" 2>&1
+# → AccessDenied OR "DENIED session does not own agent"
+```
+
+**Expected:** the cross-agent read is refused at either our backend (ownership check) or at AWS's bucket policy (`${aws:PrincipalTag/agentkeys_user_wallet}` mismatch). Either layer is sufficient; both firing is belt-and-suspenders.
+
+---
+
+## 4. Stage 5b live-demo re-run (the payoff)
+
+With Stage 6's throwaway-inbox API in place, Stage 5b's blocker from [`docs/manual-test-stage5.md`](./manual-test-stage5.md) §4 is resolved.
+
+Procedure:
+
+```bash
+# 1. Provision a throwaway inbox for this signup
+INBOX=$($BIN --backend $BACKEND inbox provision --agent stage5b-retest --json | jq -r .address)
+echo "signup email: $INBOX"
+
+# 2. Export it as the signup target for the CDP scraper
+export AGENTKEYS_SIGNUP_EMAIL="$INBOX"
+export AGENTKEYS_SIGNUP_PASSWORD="Stage5b-$(date +%s)-xZq9!okFg"
+
+# 3. Point the OTP fetcher at Stage 6's SES-S3 reader (not Gmail IMAP)
+export AGENTKEYS_EMAIL_BACKEND=ses-s3   # new value, see provisioner-scripts/src/lib/email.ts
+# (ses-s3 backend infers wallet + address from AGENTKEYS_SIGNUP_EMAIL and reads S3 directly)
+
+# 4. Launch real Chrome with CDP (same as Stage 5b)
+/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome \
+  --remote-debugging-port=9222 \
+  --user-data-dir=/tmp/agentkeys-chrome-profile &
+
+# 5. Run the CDP scraper
+cd provisioner-scripts
+node --import tsx/esm src/scrapers/openrouter-cdp.ts
+
+# 6. Store + verify
+$BIN --backend $BACKEND store openrouter "<KEY-FROM-STDOUT>"
+$BIN --backend $BACKEND read openrouter
+curl -sS -H "Authorization: Bearer $($BIN --backend $BACKEND read openrouter)" \
+  https://openrouter.ai/api/v1/models | head -c 40
+# → expect "{\"data\":["
+```
+
+All four Stage 5a live-demo acceptance criteria should pass. If any step fails, check [`docs/manual-test-stage5.md`](./manual-test-stage5.md) §Failure modes first.
+
+---
+
+## 5. What this does NOT cover (post-Stage 6)
+
+- **BYO custom domain** — `bots.theircompany.com` for enterprise users. Deferred per Stage 7+.
+- **BYO Workspace DWD** — the advanced `docs/stage5-workspace-email-setup.md` runbook remains valid for Workspace customers but is never the default path.
+- **Email drafts as HITL primitive** — daemon-side, per the broker-not-proxy thesis. Not Stage 6 scope.
+- **Labels / threads / search** — implemented daemon-side (MCP); not server features. Per [`wiki/email-system.md`](../wiki/email-system.md).
+
+---
+
+## Summary checklist
+
+- [ ] §2 demo passes: throwaway inbox provisioned, inbound mail received, agent reads it
+- [ ] §3 isolation test passes: cross-agent read denied
+- [ ] §4 Stage 5b re-run passes: `sk-or-v1-*` minted end-to-end with a Stage-6-provisioned inbox
+- [ ] DKIM verified by a real recipient (Gmail / Outlook / Fastmail)
+- [ ] Audit trail on chain: `agentkeys usage <agent> --filter email` shows the expected mint events
+
+## References
+
+- [`docs/spec/plans/development-stages.md`](./spec/plans/development-stages.md) §Stage 6 — deliverables + tests
+- [`docs/spec/ses-email-architecture.md`](./spec/ses-email-architecture.md) — SES architecture spec
+- [`wiki/email-system.md`](../wiki/email-system.md) — high-level email system overview
+- [`wiki/hosted-first.md`](../wiki/hosted-first.md) — why `@agentkeys-email.io` is the default
+- [`wiki/tag-based-access.md`](../wiki/tag-based-access.md) — PrincipalTag mechanism
+- [`docs/manual-test-stage5.md`](./manual-test-stage5.md) — Stage 5a + 5b guide (live demo unblocked by this stage)
diff --git a/docs/spec/plans/development-stages.md b/docs/spec/plans/development-stages.md
@@ -896,6 +896,8 @@ See `docs/spec/ses-email-architecture.md` for the full spec. High-level:
 - [ ] Chain extrinsic pallet for `CredentialMinted` audit events
 - [ ] Daemon MCP tools wired to real minted creds
 - [ ] Stage 5's `provisioner-scripts` updated to read OTPs from the hosted inbox
+- [ ] **Throwaway inbox provisioning API** — on-demand mint of a fresh `<id>@agentkeys-email.io` address per caller. Acceptance: `POST /inbox/provision` (or `agentkeys inbox provision`) returns `{address, agent_wallet}` where `address` is a new locally-unique local-part under our hosted domain, Clerk-normalization-proof (distinct local-part, not plus-alias suffixes). Readable via the same `fetchVerificationCode`-style API as Stage 5, backed by the SES→S3→TEE-decrypt chain. Auto-cleanup or audit-logged revocation after the inbox is done serving signups.
+- [ ] **Stage 5b live-demo re-run against throwaway inbox** — once throwaway-inbox provisioning lands, the Stage 5b CDP scraper (`provisioner-scripts/src/scrapers/openrouter-cdp.ts`) is re-tested end-to-end: provision a throwaway `bot-N@agentkeys-email.io`, run the scraper with that as `AGENTKEYS_SIGNUP_EMAIL`, get a verified `sk-or-v1-*` key back. This closes the [`docs/manual-test-stage5.md`](../../../docs/manual-test-stage5.md) §3 pickup item. (Clerk rejects the Workspace-plus-alias flow as duplicate; only distinct local-parts work.)
 
 ### Tests
 
@@ -909,6 +911,8 @@ See `docs/spec/ses-email-architecture.md` for the full spec. High-level:
 | `email::jwt_without_wallet_claim_denied` | JWT missing `agentkeys_user_wallet` → `sts:AssumeRoleWithWebIdentity` fails per role trust policy |
 | `email::audit_emitted_on_mint` | Every SES/S3 credential mint emits a chain extrinsic with `(child, scope, operation, timestamp)` |
 | `email::grant_revocation_propagates` | Revoke user's email grant → next mint attempt fails within ≤6s |
+| `email::throwaway_inbox_provisioning` | `agentkeys inbox provision` returns a unique `<id>@agentkeys-email.io` address (distinct local-part per call, not plus-alias). A message sent to that address lands in `s3://agentkeys-mail/<wallet>/<address>/*.eml` and is readable via `fetchVerificationCode`. |
+| `email::stage5b_live_demo_rerun` | After a throwaway inbox is provisioned and set as `AGENTKEYS_SIGNUP_EMAIL`, the Stage 5b CDP scraper completes end-to-end: signup → Turnstile passes → OTP fetched → key minted → `agentkeys read openrouter` returns a `sk-or-v1-*` string → `curl /api/v1/models` returns HTTP 200. |
 
 ### Reviewer E2E Checklist
 

diff --git a/harness/stage-5a-live-demo-handoff.sh b/harness/stage-5a-live-demo-handoff.sh
@@ -0,0 +1,103 @@
+#!/usr/bin/env bash
+# Stage 5a live-demo one-shot handoff.
+# Preconditions checked up front; failures are loud; prints SUCCESS when
+# all four acceptance criteria pass.
+#
+# Usage (with AGENTKEYS_EMAIL_{BACKEND,USER,PASSWORD,HOST,PORT} exported;
+# AGENTKEYS_SIGNUP_EMAIL is auto-minted below if unset):
+#   cd ~/Projects/agentkeys
+#   bash harness/stage-5a-live-demo-handoff.sh
+set -uo pipefail
+
+REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
+cd "$REPO_ROOT"
+BIN="$REPO_ROOT/target/release/agentkeys"
+BACKEND="${BACKEND:-http://127.0.0.1:8090}"
+
+say()  { printf '\n\033[1;34m==>\033[0m %s\n' "$*"; }
+fail() { printf '\033[1;31mFAIL:\033[0m %s\n' "$*" >&2; exit 1; }
+pass() { printf '\033[1;32mPASS:\033[0m %s\n' "$*"; }
+
+say "Preflight — required env"
+: "${AGENTKEYS_EMAIL_BACKEND:?AGENTKEYS_EMAIL_BACKEND must be set (e.g. gmail)}"
+: "${AGENTKEYS_EMAIL_USER:?AGENTKEYS_EMAIL_USER must be set to the CANONICAL Gmail address (NOT a plus-alias; IMAP login only accepts canonical)}"
+: "${AGENTKEYS_EMAIL_PASSWORD:?AGENTKEYS_EMAIL_PASSWORD must be set (Gmail app password; NOT your normal Google password)}"
+: "${AGENTKEYS_EMAIL_HOST:?AGENTKEYS_EMAIL_HOST must be set (imap.gmail.com)}"
+: "${AGENTKEYS_EMAIL_PORT:?AGENTKEYS_EMAIL_PORT must be set (993)}"
+
+# Auto-mint a fresh single-plus alias for THIS run so OpenRouter never sees
+# a repeat email. Strip any existing +suffix on AGENTKEYS_EMAIL_USER first:
+# some email validators (including OpenRouter's) reject double-plus addresses
+# like agent+2026042001+or-...@wildmeta.ai and silently drop the signup. The
+# inbox delivery path doesn't care, but the signup form does.
+if [ -z "${AGENTKEYS_SIGNUP_EMAIL:-}" ]; then
+  RAW_LOCAL="${AGENTKEYS_EMAIL_USER%@*}"
+  CANONICAL_LOCAL="${RAW_LOCAL%%+*}"   # strip first + and everything after
+  DOMAIN="${AGENTKEYS_EMAIL_USER#*@}"
+  export AGENTKEYS_SIGNUP_EMAIL="${CANONICAL_LOCAL}+or-$(date +%s)@${DOMAIN}"
+  say "Auto-minted AGENTKEYS_SIGNUP_EMAIL=$AGENTKEYS_SIGNUP_EMAIL (stripped existing plus-alias before appending)"
+fi
+
+say "Preflight — binary exists"
+[ -x "$BIN" ] || fail "$BIN not found. Run: cargo build --release -p agentkeys-cli"
+
+say "Preflight — mock-server at $BACKEND is up"
+curl -sf "$BACKEND/health" >/dev/null 2>&1 \
+  || curl -sf "$BACKEND" >/dev/null 2>&1 \
+  || fail "mock-server not reachable at $BACKEND. Run: cargo run --release -p agentkeys-mock-server -- --port 8090 &"
+
+say "Preflight — node + playwright deps + chromium browser"
+command -v node >/dev/null || fail "node not on PATH"
+command -v npx  >/dev/null || fail "npx not on PATH"
+[ -d provisioner-scripts/node_modules ] \
+  || fail "provisioner-scripts deps missing. Run: npm install --prefix provisioner-scripts"
+# Playwright caches browsers under \$HOME/Library/Caches/ms-playwright on macOS;
+# a run-in-unusual-HOME provision will hit "browserType.launch: Executable
+# doesn't exist" unless they are installed under THIS \$HOME.
+if ! ls "${HOME}/Library/Caches/ms-playwright/chromium_headless_shell-"* >/dev/null 2>&1 \
+  && ! ls "${HOME}/.cache/ms-playwright/chromium_headless_shell-"* >/dev/null 2>&1; then
+  fail "Playwright chromium not installed under \$HOME=$HOME. Run: npx playwright install chromium --with-deps"
+fi
+
+say "1. Initialize master session"
+$BIN --backend $BACKEND init --mock-token stage5-live-demo || fail "init"
+
+say "2. Env snapshot (masking secrets)"
+env | grep -E 'AGENTKEYS_(EMAIL|SIGNUP)_' | sed 's/\(PASSWORD=\).*/\1***REDACTED***/'
+
+say "3. agentkeys provision openrouter"
+if ! $BIN --backend $BACKEND provision openrouter; then
+  EC=$?
+  echo "---exit=$EC---"
+  LOG=$(ls -t $HOME/.agentkeys/logs/provision-openrouter-*.log 2>/dev/null | head -1)
+  if [ -n "$LOG" ]; then
+    echo "=== most recent provision log: $LOG ==="
+    cat "$LOG"
+  else
+    echo "(no provision log written — orchestrator path unreachable)"
+  fi
+  fail "provision failed; inspect log above"
+fi
+
+say "4. AC#1-#3 — read full key back (exit 0 + masked-key form already checked above)"
+KEY=$($BIN --backend $BACKEND read openrouter) || fail "read openrouter"
+case "$KEY" in
+  sk-or-v1-*) pass "read returned key of correct prefix" ;;
+  *) fail "read returned unexpected prefix: $(echo "$KEY" | head -c 12)..." ;;
+esac
+
+say "5. AC#4 — curl OpenRouter /api/v1/models"
+HTTP_CODE=$(curl -sS -o /tmp/or-models.json -w '%{http_code}' \
+  -H "Authorization: Bearer $KEY" \
+  https://openrouter.ai/api/v1/models)
+if [ "$HTTP_CODE" != "200" ]; then
+  echo "unexpected HTTP $HTTP_CODE"
+  head -c 500 /tmp/or-models.json
+  fail "OpenRouter /api/v1/models did not return 200"
+fi
+head -c 40 /tmp/or-models.json
+echo ''
+pass "OpenRouter /api/v1/models returned 200"
+
+say "ALL FOUR ACCEPTANCE CRITERIA PASS"
+echo "SUCCESS"