Skip to content
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
.omc
.obsidian
/docs/test-screenshots/
.gstack/
58 changes: 54 additions & 4 deletions crates/agentkeys-provisioner/src/orchestrator.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
use std::collections::HashMap;
use std::path::Path;
use std::path::{Path, PathBuf};
use std::sync::{Arc, Mutex};
use std::time::Instant;
use std::time::{Instant, SystemTime, UNIX_EPOCH};

use agentkeys_core::backend::CredentialBackend;
use agentkeys_types::{ProvisionEvent, ServiceName, Session, TripwireKind, WalletAddress};

use crate::error::{ProvisionError, ProvisionResult};
use crate::metrics::{self, ProvisionMetric, VerificationResultLabel};
use crate::subprocess::{spawn_and_collect, SubprocessConfig};
use crate::subprocess::{spawn_and_collect, SubprocessConfig, SubprocessOutcome};

#[derive(Debug, Clone)]
pub struct ActiveProvision {
Expand Down Expand Up @@ -86,6 +86,37 @@ impl Drop for ProvisionGuard {
}
}

/// Best-effort dump of subprocess output to `~/.agentkeys/logs/provision-<service>-<ts>.log`.
/// Returns the file path if the write succeeded. Never errors — failure to write the log
/// must not mask the underlying provision failure.
fn write_provision_log(service: &str, outcome: &SubprocessOutcome) -> Option<PathBuf> {
let home = std::env::var("HOME").ok().map(PathBuf::from)?;
let dir = home.join(".agentkeys").join("logs");
std::fs::create_dir_all(&dir).ok()?;
let ts = SystemTime::now().duration_since(UNIX_EPOCH).ok()?.as_secs();
let safe_service: String = service
.chars()
.map(|c| if c.is_ascii_alphanumeric() || c == '-' || c == '_' { c } else { '_' })
.collect();
let path = dir.join(format!("provision-{}-{}.log", safe_service, ts));

let mut body = String::new();
body.push_str(&format!(
"service: {}\nexit_code: {:?}\nevents_emitted: {}\n\n=== subprocess stdout events ===\n",
service,
outcome.exit_code,
outcome.events.len()
));
for ev in &outcome.events {
body.push_str(&format!("{:?}\n", ev));
}
body.push_str("\n=== subprocess stderr ===\n");
body.push_str(&outcome.stderr);

std::fs::write(&path, body).ok()?;
Some(path)
}

/// Returns first 8 chars + `****...` + last 4. For keys shorter than 12 chars returns `****`.
pub fn mask_key(key: &str) -> String {
if key.len() < 12 {
Expand Down Expand Up @@ -190,7 +221,26 @@ pub async fn run_provision(
}

let raw_key = api_key.ok_or_else(|| {
ProvisionError::Internal("subprocess ended without terminal event".to_string())
let stderr_tail: String = outcome
.stderr
.lines()
.rev()
.take(20)
.collect::<Vec<_>>()
.into_iter()
.rev()
.collect::<Vec<_>>()
.join("\n");
let log_hint = match write_provision_log(service, &outcome) {
Some(path) => format!("full log: {}", path.display()),
None => "full log: (unable to write ~/.agentkeys/logs — check HOME + permissions)".to_string(),
};
ProvisionError::Internal(format!(
"subprocess ended without terminal event (exit {:?}). {}. stderr tail:\n{}",
outcome.exit_code,
log_hint,
if stderr_tail.is_empty() { "(empty)" } else { stderr_tail.as_str() }
))
})?;

let masked = mask_key(&raw_key);
Expand Down
218 changes: 177 additions & 41 deletions docs/manual-test-stage5.md

Large diffs are not rendered by default.

177 changes: 177 additions & 0 deletions docs/manual-test-stage6.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
# Stage 6 Manual Test Guide

**Prerequisite:** Stage 6 SES stack deployed OR the Stage 6 mock equivalent running locally (see [`docs/spec/plans/development-stages.md`](./spec/plans/development-stages.md) §Stage 6). Rust toolchain installed, Node.js 20+.

> **Scope.** This guide covers Stage 6 — **Federated Own Email on `@agentkeys-email.io`**. It does NOT cover Stage 5b's CDP-scraper retest beyond the handoff section at the end — that's done per [`docs/manual-test-stage5.md`](./manual-test-stage5.md) §3 once the Stage 6 pieces this document covers are in place.

Stage 6 has two tests that matter:

1. **Throwaway inbox provisioning** — `agentkeys inbox provision` mints a fresh `<id>@agentkeys-email.io`, mail sent to it lands in the right S3 prefix, fetchable by the agent with correctly-tagged creds. This is what makes Stage 5b's live-demo re-run unblockable.
2. **Per-user isolation** — agent A cannot read agent B's mail. Enforced by `aws:PrincipalTag/agentkeys_user_wallet` on the shared `agentkeys-mail` bucket.

---

## 1. Preflight

```bash
# Rust + Node + provisioner deps built
cd ~/Projects/agentkeys
cargo build --workspace --release
npm install --prefix provisioner-scripts

# Backend (mock or real TEE endpoint)
BACKEND="${BACKEND:-http://127.0.0.1:8090}"
curl -sf "$BACKEND/health" >/dev/null || {
echo "backend not up — run: cargo run --release -p agentkeys-mock-server -- --port 8090 &"
exit 1
}

BIN=$(pwd)/target/release/agentkeys
$BIN --backend $BACKEND init --mock-token stage6-demo
```

**For the real SES path (not mock):** you also need:
- DNS for `agentkeys-email.io` published (MX, Ed25519 DKIM CNAMEs, SPF, DMARC) per `docs/spec/ses-email-architecture.md`
- S3 bucket `agentkeys-mail` with PrincipalTag-conditioned bucket policy
- IAM OIDC provider `oidc.agentkeys.dev` registered
- IAM role `agentkeys-agent` with trust policy on the OIDC provider + MRSIGNER pinning
- TEE reachable + `derive("dkim/agentkeys-email.io/v1")` and `derive("oidc/issuer/v1")` subkeys present

The mock equivalent stubs all of that to SQLite + a local HTTP server; both paths honor the same CLI surface.

---

## 2. The demo — provision a throwaway inbox + verify mail delivery

```bash
# Provision a fresh inbox (returns { address, agent_wallet })
$BIN --backend $BACKEND inbox provision --agent my-agent
# → e.g.: { "address": "bot-ax7kq@agentkeys-email.io", "agent_wallet": "0x..." }
```

Save the address:

```bash
INBOX=$(<output-from-above>)
echo "provisioned: $INBOX"
```

**Send a test message** from any external mail source (use your own Gmail, Mailgun sandbox, or a `curl` to your own SMTP test relay):

```bash
# Example via `mail` on macOS (requires a local SMTP configured)
echo "stage-6 test body" | mail -s "stage-6-$RANDOM" "$INBOX"
```

Or send from a real service's signup form if you want to validate end-to-end (e.g. paste `$INBOX` into the signup form at a simpler service and trigger a verification email).

**Read the mail** — Claude drives the daemon:

```bash
$BIN --backend $BACKEND run my-agent -- \
claude-mcp-client email.list 2>&1 | jq
# → shows the test message + its body

$BIN --backend $BACKEND run my-agent -- \
claude-mcp-client email.get --id <msg-id-from-above>
# → full MIME + parsed body
```

### Expected behavior

1. `inbox provision` exits 0 and returns a JSON object with a `.address` matching `^bot-[a-z0-9]{6}@agentkeys-email.io$` (or whatever shape §Stage 6 commits to; acceptance is "distinct local-part per call, no plus-aliases").
2. `email.list` from the agent returns a non-empty array within ≤30s of the test message being sent. The item's `to` field matches `$INBOX`.
3. The raw MIME in `s3://agentkeys-mail/<agent_wallet>/<inbox_address>/*.eml` is present — verify with `aws s3 ls --profile agentkeys` or whatever read mechanism your deployment uses.
4. An on-chain audit extrinsic `CredentialMinted` was emitted for the `s3.read` mint that serviced `email.list`.

### Failure modes to watch for

- **`inbox provision` 400 / "domain not verified"** — SES identity for `agentkeys-email.io` is not verified in this AWS account. Run `aws sesv2 get-email-identity --email-identity agentkeys-email.io`; if the DKIM / Identity records aren't `PENDING` → `SUCCESS`, publish DNS or re-trigger verification.
- **Mail never appears in S3** — DNS MX record not pointing at `inbound-smtp.us-east-1.amazonaws.com`, OR the SES receipt rule isn't active, OR the S3 bucket's bucket policy denies SES write. `aws logs tail /aws/ses/<rule-set>` to debug.
- **`email.list` returns AccessDenied** — the agent's minted JWT didn't carry `agentkeys_user_wallet` as a session tag, OR bucket policy's `${aws:PrincipalTag/agentkeys_user_wallet}` condition didn't match. Inspect the temp-cred claims via `aws sts get-caller-identity --profile <temp>`.
- **DKIM fails at recipient** — the outbound-path test. Send a message from your agent to a real Gmail inbox; if it lands in Spam with "DKIM: fail", either (a) the Ed25519 DKIM key at `derive("dkim/agentkeys-email.io/v1")` doesn't match what's published in DNS, or (b) the DKIM header isn't being added before SES hands the MIME to the outbound MTA.

---

## 3. Per-user isolation test

```bash
# Provision a second agent + inbox
$BIN --backend $BACKEND inbox provision --agent other-agent
OTHER_WALLET=$(... extract the wallet from the previous output)

# First agent attempts to read second agent's inbox prefix — must fail
$BIN --backend $BACKEND run my-agent -- \
claude-mcp-client email.list --agent "$OTHER_WALLET" 2>&1
# → AccessDenied OR "DENIED session does not own agent"
```

**Expected:** the cross-agent read is refused at either our backend (ownership check) or at AWS's bucket policy (`${aws:PrincipalTag/agentkeys_user_wallet}` mismatch). Either layer is sufficient; both firing is belt-and-suspenders.

---

## 4. Stage 5b live-demo re-run (the payoff)

With Stage 6's throwaway-inbox API in place, Stage 5b's blocker from [`docs/manual-test-stage5.md`](./manual-test-stage5.md) §4 is resolved.

Procedure:

```bash
# 1. Provision a throwaway inbox for this signup
INBOX=$($BIN --backend $BACKEND inbox provision --agent stage5b-retest --json | jq -r .address)
echo "signup email: $INBOX"

# 2. Export it as the signup target for the CDP scraper
export AGENTKEYS_SIGNUP_EMAIL="$INBOX"
export AGENTKEYS_SIGNUP_PASSWORD="Stage5b-$(date +%s)-xZq9!okFg"

# 3. Point the OTP fetcher at Stage 6's SES-S3 reader (not Gmail IMAP)
export AGENTKEYS_EMAIL_BACKEND=ses-s3 # new value, see provisioner-scripts/src/lib/email.ts
# (ses-s3 backend infers wallet + address from AGENTKEYS_SIGNUP_EMAIL and reads S3 directly)

# 4. Launch real Chrome with CDP (same as Stage 5b)
/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome \
--remote-debugging-port=9222 \
--user-data-dir=/tmp/agentkeys-chrome-profile &

# 5. Run the CDP scraper
cd provisioner-scripts
node --import tsx/esm src/scrapers/openrouter-cdp.ts

# 6. Store + verify
$BIN --backend $BACKEND store openrouter "<KEY-FROM-STDOUT>"
$BIN --backend $BACKEND read openrouter
curl -sS -H "Authorization: Bearer $($BIN --backend $BACKEND read openrouter)" \
https://openrouter.ai/api/v1/models | head -c 40
# → expect "{\"data\":["
```

All four Stage 5a live-demo acceptance criteria should pass. If any step fails, check [`docs/manual-test-stage5.md`](./manual-test-stage5.md) §Failure modes first.

---

## 5. What this does NOT cover (post-Stage 6)

- **BYO custom domain** — `bots.theircompany.com` for enterprise users. Deferred per Stage 7+.
- **BYO Workspace DWD** — the advanced `docs/stage5-workspace-email-setup.md` runbook remains valid for Workspace customers but is never the default path.
- **Email drafts as HITL primitive** — daemon-side, per the broker-not-proxy thesis. Not Stage 6 scope.
- **Labels / threads / search** — implemented daemon-side (MCP); not server features. Per [`wiki/email-system.md`](../wiki/email-system.md).

---

## Summary checklist

- [ ] §2 demo passes: throwaway inbox provisioned, inbound mail received, agent reads it
- [ ] §3 isolation test passes: cross-agent read denied
- [ ] §4 Stage 5b re-run passes: `sk-or-v1-*` minted end-to-end with a Stage-6-provisioned inbox
- [ ] DKIM verified by a real recipient (Gmail / Outlook / Fastmail)
- [ ] Audit trail on chain: `agentkeys usage <agent> --filter email` shows the expected mint events

## References

- [`docs/spec/plans/development-stages.md`](./spec/plans/development-stages.md) §Stage 6 — deliverables + tests
- [`docs/spec/ses-email-architecture.md`](./spec/ses-email-architecture.md) — SES architecture spec
- [`wiki/email-system.md`](../wiki/email-system.md) — high-level email system overview
- [`wiki/hosted-first.md`](../wiki/hosted-first.md) — why `@agentkeys-email.io` is the default
- [`wiki/tag-based-access.md`](../wiki/tag-based-access.md) — PrincipalTag mechanism
- [`docs/manual-test-stage5.md`](./manual-test-stage5.md) — Stage 5a + 5b guide (live demo unblocked by this stage)
4 changes: 4 additions & 0 deletions docs/spec/plans/development-stages.md
Original file line number Diff line number Diff line change
Expand Up @@ -896,6 +896,8 @@ See `docs/spec/ses-email-architecture.md` for the full spec. High-level:
- [ ] Chain extrinsic pallet for `CredentialMinted` audit events
- [ ] Daemon MCP tools wired to real minted creds
- [ ] Stage 5's `provisioner-scripts` updated to read OTPs from the hosted inbox
- [ ] **Throwaway inbox provisioning API** — on-demand mint of a fresh `<id>@agentkeys-email.io` address per caller. Acceptance: `POST /inbox/provision` (or `agentkeys inbox provision`) returns `{address, agent_wallet}` where `address` is a new locally-unique local-part under our hosted domain, Clerk-normalization-proof (distinct local-part, not plus-alias suffixes). Readable via the same `fetchVerificationCode`-style API as Stage 5, backed by the SES→S3→TEE-decrypt chain. Auto-cleanup or audit-logged revocation after the inbox is done serving signups.
- [ ] **Stage 5b live-demo re-run against throwaway inbox** — once throwaway-inbox provisioning lands, the Stage 5b CDP scraper (`provisioner-scripts/src/scrapers/openrouter-cdp.ts`) is re-tested end-to-end: provision a throwaway `bot-N@agentkeys-email.io`, run the scraper with that as `AGENTKEYS_SIGNUP_EMAIL`, get a verified `sk-or-v1-*` key back. This closes the [`docs/manual-test-stage5.md`](../../../docs/manual-test-stage5.md) §3 pickup item. (Clerk rejects the Workspace-plus-alias flow as duplicate; only distinct local-parts work.)

### Tests

Expand All @@ -909,6 +911,8 @@ See `docs/spec/ses-email-architecture.md` for the full spec. High-level:
| `email::jwt_without_wallet_claim_denied` | JWT missing `agentkeys_user_wallet` → `sts:AssumeRoleWithWebIdentity` fails per role trust policy |
| `email::audit_emitted_on_mint` | Every SES/S3 credential mint emits a chain extrinsic with `(child, scope, operation, timestamp)` |
| `email::grant_revocation_propagates` | Revoke user's email grant → next mint attempt fails within ≤6s |
| `email::throwaway_inbox_provisioning` | `agentkeys inbox provision` returns a unique `<id>@agentkeys-email.io` address (distinct local-part per call, not plus-alias). A message sent to that address lands in `s3://agentkeys-mail/<wallet>/<address>/*.eml` and is readable via `fetchVerificationCode`. |
| `email::stage5b_live_demo_rerun` | After a throwaway inbox is provisioned and set as `AGENTKEYS_SIGNUP_EMAIL`, the Stage 5b CDP scraper completes end-to-end: signup → Turnstile passes → OTP fetched → key minted → `agentkeys read openrouter` returns a `sk-or-v1-*` string → `curl /api/v1/models` returns HTTP 200. |

### Reviewer E2E Checklist

Expand Down
103 changes: 103 additions & 0 deletions harness/stage-5a-live-demo-handoff.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
#!/usr/bin/env bash
# Stage 5a live-demo one-shot handoff.
# Preconditions checked up front; failures are loud; prints SUCCESS when
# all four acceptance criteria pass.
#
# Usage (with AGENTKEYS_EMAIL_{BACKEND,USER,PASSWORD,HOST,PORT} exported;
# AGENTKEYS_SIGNUP_EMAIL is auto-minted below if unset):
# cd ~/Projects/agentkeys
# bash harness/stage-5a-live-demo-handoff.sh
set -uo pipefail

REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
cd "$REPO_ROOT"
BIN="$REPO_ROOT/target/release/agentkeys"
BACKEND="${BACKEND:-http://127.0.0.1:8090}"

say() { printf '\n\033[1;34m==>\033[0m %s\n' "$*"; }
fail() { printf '\033[1;31mFAIL:\033[0m %s\n' "$*" >&2; exit 1; }
pass() { printf '\033[1;32mPASS:\033[0m %s\n' "$*"; }

say "Preflight — required env"
: "${AGENTKEYS_EMAIL_BACKEND:?AGENTKEYS_EMAIL_BACKEND must be set (e.g. gmail)}"
: "${AGENTKEYS_EMAIL_USER:?AGENTKEYS_EMAIL_USER must be set to the CANONICAL Gmail address (NOT a plus-alias; IMAP login only accepts canonical)}"
: "${AGENTKEYS_EMAIL_PASSWORD:?AGENTKEYS_EMAIL_PASSWORD must be set (Gmail app password; NOT your normal Google password)}"
: "${AGENTKEYS_EMAIL_HOST:?AGENTKEYS_EMAIL_HOST must be set (imap.gmail.com)}"
: "${AGENTKEYS_EMAIL_PORT:?AGENTKEYS_EMAIL_PORT must be set (993)}"

# Auto-mint a fresh single-plus alias for THIS run so OpenRouter never sees
# a repeat email. Strip any existing +suffix on AGENTKEYS_EMAIL_USER first:
# some email validators (including OpenRouter's) reject double-plus addresses
# like agent+2026042001+or-...@wildmeta.ai and silently drop the signup. The
# inbox delivery path doesn't care, but the signup form does.
if [ -z "${AGENTKEYS_SIGNUP_EMAIL:-}" ]; then
RAW_LOCAL="${AGENTKEYS_EMAIL_USER%@*}"
CANONICAL_LOCAL="${RAW_LOCAL%%+*}" # strip first + and everything after
DOMAIN="${AGENTKEYS_EMAIL_USER#*@}"
export AGENTKEYS_SIGNUP_EMAIL="${CANONICAL_LOCAL}+or-$(date +%s)@${DOMAIN}"
say "Auto-minted AGENTKEYS_SIGNUP_EMAIL=$AGENTKEYS_SIGNUP_EMAIL (stripped existing plus-alias before appending)"
fi

say "Preflight — binary exists"
[ -x "$BIN" ] || fail "$BIN not found. Run: cargo build --release -p agentkeys-cli"

say "Preflight — mock-server at $BACKEND is up"
curl -sf "$BACKEND/health" >/dev/null 2>&1 \
|| curl -sf "$BACKEND" >/dev/null 2>&1 \
|| fail "mock-server not reachable at $BACKEND. Run: cargo run --release -p agentkeys-mock-server -- --port 8090 &"

say "Preflight — node + playwright deps + chromium browser"
command -v node >/dev/null || fail "node not on PATH"
command -v npx >/dev/null || fail "npx not on PATH"
[ -d provisioner-scripts/node_modules ] \
|| fail "provisioner-scripts deps missing. Run: npm install --prefix provisioner-scripts"
# Playwright caches browsers under \$HOME/Library/Caches/ms-playwright on macOS;
# a run-in-unusual-HOME provision will hit "browserType.launch: Executable
# doesn't exist" unless they are installed under THIS \$HOME.
if ! ls "${HOME}/Library/Caches/ms-playwright/chromium_headless_shell-"* >/dev/null 2>&1 \
&& ! ls "${HOME}/.cache/ms-playwright/chromium_headless_shell-"* >/dev/null 2>&1; then
fail "Playwright chromium not installed under \$HOME=$HOME. Run: npx playwright install chromium --with-deps"
fi

say "1. Initialize master session"
$BIN --backend $BACKEND init --mock-token stage5-live-demo || fail "init"

say "2. Env snapshot (masking secrets)"
env | grep -E 'AGENTKEYS_(EMAIL|SIGNUP)_' | sed 's/\(PASSWORD=\).*/\1***REDACTED***/'

say "3. agentkeys provision openrouter"
if ! $BIN --backend $BACKEND provision openrouter; then
EC=$?
echo "---exit=$EC---"
LOG=$(ls -t $HOME/.agentkeys/logs/provision-openrouter-*.log 2>/dev/null | head -1)
if [ -n "$LOG" ]; then
echo "=== most recent provision log: $LOG ==="
cat "$LOG"
else
echo "(no provision log written — orchestrator path unreachable)"
fi
fail "provision failed; inspect log above"
fi

say "4. AC#1-#3 — read full key back (exit 0 + masked-key form already checked above)"
KEY=$($BIN --backend $BACKEND read openrouter) || fail "read openrouter"
case "$KEY" in
sk-or-v1-*) pass "read returned key of correct prefix" ;;
*) fail "read returned unexpected prefix: $(echo "$KEY" | head -c 12)..." ;;
esac

say "5. AC#4 — curl OpenRouter /api/v1/models"
HTTP_CODE=$(curl -sS -o /tmp/or-models.json -w '%{http_code}' \
-H "Authorization: Bearer $KEY" \
https://openrouter.ai/api/v1/models)
if [ "$HTTP_CODE" != "200" ]; then
echo "unexpected HTTP $HTTP_CODE"
head -c 500 /tmp/or-models.json
fail "OpenRouter /api/v1/models did not return 200"
fi
head -c 40 /tmp/or-models.json
echo ''
pass "OpenRouter /api/v1/models returned 200"

say "ALL FOUR ACCEPTANCE CRITERIA PASS"
echo "SUCCESS"
Loading
Loading