diff --git a/.gitignore b/.gitignore index 40656fa..cb7e86d 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ .omc .obsidian /docs/test-screenshots/ +.gstack/ diff --git a/crates/agentkeys-provisioner/src/orchestrator.rs b/crates/agentkeys-provisioner/src/orchestrator.rs index 972c024..3dff5cc 100644 --- a/crates/agentkeys-provisioner/src/orchestrator.rs +++ b/crates/agentkeys-provisioner/src/orchestrator.rs @@ -1,14 +1,14 @@ use std::collections::HashMap; -use std::path::Path; +use std::path::{Path, PathBuf}; use std::sync::{Arc, Mutex}; -use std::time::Instant; +use std::time::{Instant, SystemTime, UNIX_EPOCH}; use agentkeys_core::backend::CredentialBackend; use agentkeys_types::{ProvisionEvent, ServiceName, Session, TripwireKind, WalletAddress}; use crate::error::{ProvisionError, ProvisionResult}; use crate::metrics::{self, ProvisionMetric, VerificationResultLabel}; -use crate::subprocess::{spawn_and_collect, SubprocessConfig}; +use crate::subprocess::{spawn_and_collect, SubprocessConfig, SubprocessOutcome}; #[derive(Debug, Clone)] pub struct ActiveProvision { @@ -86,6 +86,37 @@ impl Drop for ProvisionGuard { } } +/// Best-effort dump of subprocess output to `~/.agentkeys/logs/provision--.log`. +/// Returns the file path if the write succeeded. Never errors β€” failure to write the log +/// must not mask the underlying provision failure. +fn write_provision_log(service: &str, outcome: &SubprocessOutcome) -> Option { + let home = std::env::var("HOME").ok().map(PathBuf::from)?; + let dir = home.join(".agentkeys").join("logs"); + std::fs::create_dir_all(&dir).ok()?; + let ts = SystemTime::now().duration_since(UNIX_EPOCH).ok()?.as_secs(); + let safe_service: String = service + .chars() + .map(|c| if c.is_ascii_alphanumeric() || c == '-' || c == '_' { c } else { '_' }) + .collect(); + let path = dir.join(format!("provision-{}-{}.log", safe_service, ts)); + + let mut body = String::new(); + body.push_str(&format!( + "service: {}\nexit_code: {:?}\nevents_emitted: {}\n\n=== subprocess stdout events ===\n", + service, + outcome.exit_code, + outcome.events.len() + )); + for ev in &outcome.events { + body.push_str(&format!("{:?}\n", ev)); + } + body.push_str("\n=== subprocess stderr ===\n"); + body.push_str(&outcome.stderr); + + std::fs::write(&path, body).ok()?; + Some(path) +} + /// Returns first 8 chars + `****...` + last 4. For keys shorter than 12 chars returns `****`. pub fn mask_key(key: &str) -> String { if key.len() < 12 { @@ -190,7 +221,26 @@ pub async fn run_provision( } let raw_key = api_key.ok_or_else(|| { - ProvisionError::Internal("subprocess ended without terminal event".to_string()) + let stderr_tail: String = outcome + .stderr + .lines() + .rev() + .take(20) + .collect::>() + .into_iter() + .rev() + .collect::>() + .join("\n"); + let log_hint = match write_provision_log(service, &outcome) { + Some(path) => format!("full log: {}", path.display()), + None => "full log: (unable to write ~/.agentkeys/logs β€” check HOME + permissions)".to_string(), + }; + ProvisionError::Internal(format!( + "subprocess ended without terminal event (exit {:?}). {}. stderr tail:\n{}", + outcome.exit_code, + log_hint, + if stderr_tail.is_empty() { "(empty)" } else { stderr_tail.as_str() } + )) })?; let masked = mask_key(&raw_key); diff --git a/docs/manual-test-stage5.md b/docs/manual-test-stage5.md index 47d8aca..1828615 100644 --- a/docs/manual-test-stage5.md +++ b/docs/manual-test-stage5.md @@ -30,70 +30,143 @@ For the demo-only purpose of Stage 5, the goal is the **shortest path to a runni > **This is a temporary demo solution.** For production (v0.1), the agent mailbox moves to SES-hosted `*@agentkeys-email.io` under the three-layer `TokenAuthority` abstraction. See the [email-system wiki page](../wiki/email-system.md) for the full architecture and why we're running demo-and-production on different backends deliberately. -#### πŸš€ Demo path: dedicated personal Gmail + TOTP + app password +#### πŸš€ Demo path: your existing Gmail + plus-addressing + app password -Why dedicated (not your personal inbox with plus-addressing): the agent gets a clean inbox it fully controls, no personal mail pollution, cleanup is a single account-delete. +Why plus-addressing as the primary demo path: -**1. Create a fresh Gmail account for the bot.** +- **Unique email per run.** OpenRouter's sign-up/sign-in page is a single URL β€” if you submit an email that already has an account, you land on a returning-user screen the scraper was not designed to traverse, and the provision fails with a `no terminal event`-style error. `you+or-@gmail.com` is a fresh address to OpenRouter on every run, so every run hits the pristine signup path. +- **Zero account creation.** Uses your existing Gmail β€” no new Google account, no new phone verification. +- **Single inbox to clean up.** The OpenRouter confirmation mail lands in your real inbox; delete one thread after the demo and you're done. +- **Scales to repeat testing.** Rotate the local-part (`+or-1`, `+or-2`, …) or include a timestamp and you have DWD-equivalent disposable emails without building DWD. -Sign up at [accounts.google.com](https://accounts.google.com) with a name like `wildmeta-stage5-demo@gmail.com`. Google will ask for a recovery phone β€” use your personal phone; you only need it once for step 2. +**1. Generate a Gmail app password for IMAP.** -**2. Enable 2-Step Verification and enroll TOTP as the second factor.** +- Requires 2FA enabled on your Google account. If not already enabled: [myaccount.google.com](https://myaccount.google.com) β†’ Security β†’ turn on 2-Step Verification (TOTP or SMS is fine; enrollment is a one-time cost). +- Visit [myaccount.google.com/apppasswords](https://myaccount.google.com/apppasswords). Create one named `agentkeys-stage5`. Google gives you a 16-character password. +- Copy immediately β€” it's shown once. Revoke anytime from the same page. -Gmail IMAP access chain: `app password` requires `2FA enabled` requires `second factor enrolled`. Using an authenticator app as that second factor makes the account non-interactive after this one-time enrollment. +**2. Export the env vars.** -- Open [myaccount.google.com](https://myaccount.google.com) β†’ **Security** -- **Turn on 2-Step Verification.** Google sends an SMS to your recovery phone to start enrollment. -- Under 2-Step Verification settings, add **Authenticator app** as a second step. Google shows a QR code and a secret. -- Scan into Google Authenticator / Authy / 1Password / Bitwarden / whatever TOTP client you already use. You now own the second factor. -- (Optional) once TOTP is active, you can drop SMS as a 2FA method β€” Google keeps the phone for account recovery but stops using it as a live second factor. +The scraper splits **IMAP login** from **signup email**. Set both: -**3. Generate an app password for IMAP.** +```bash +export AGENTKEYS_EMAIL_BACKEND=gmail -- Visit [myaccount.google.com/apppasswords](https://myaccount.google.com/apppasswords). -- Create one named "agentkeys-stage5". Google gives you a 16-character password. -- Copy it immediately β€” it's shown once. Revoke anytime from the same page. +# IMAP login β€” must be the canonical Gmail address. +export AGENTKEYS_EMAIL_USER="you@gmail.com" +export AGENTKEYS_EMAIL_PASSWORD="xxxx xxxx xxxx xxxx" # 16-char app password -**4. Export the four env vars.** +# What we type into OpenRouter's signup form. +# Plus-addressed alias so OpenRouter sees a brand-new email per run; +# mail is still delivered to you@gmail.com. +export AGENTKEYS_SIGNUP_EMAIL="you+or-$(date +%s)@gmail.com" -```bash -export AGENTKEYS_EMAIL_BACKEND=gmail -export AGENTKEYS_EMAIL_USER="wildmeta-stage5-demo@gmail.com" # the bot account from step 1 -export AGENTKEYS_EMAIL_PASSWORD="xxxx xxxx xxxx xxxx" # 16-char app password from step 3 export AGENTKEYS_EMAIL_HOST="imap.gmail.com" export AGENTKEYS_EMAIL_PORT="993" ``` +> **Why two email vars.** `AGENTKEYS_EMAIL_USER` is the IMAP login β€” Gmail IMAP only accepts your canonical address (plus-addressing aliases are rejected at login). `AGENTKEYS_SIGNUP_EMAIL` is what we fill into the service's sign-up form β€” plus-addressing works there because SMTP delivery honors the `+alias` suffix. If `AGENTKEYS_SIGNUP_EMAIL` is unset, the scraper falls back to `AGENTKEYS_EMAIL_USER` β€” which is fine for a dedicated bot account (see alternative below) but guarantees a "account already exists" collision if you reuse a canonical address across runs. + Once the app password is set, the demo sees **zero 2FA prompts**. App passwords bypass 2FA by design β€” they're Google's non-interactive credential, scoped to IMAP only, revocable anytime. -**5. Daemon running and paired** β€” see the Stage 4 manual test guide. +**3. Build binaries + install provisioner-script deps (one-time).** + +```bash +cd ~/Projects/agentkeys +cargo build --workspace --release +npm install --prefix provisioner-scripts +npx playwright install chromium --with-deps +```
-Alternative: Google Workspace DWD (for operators with an existing Workspace subscription) +Alternative: dedicated throwaway Gmail (cleanest but more setup) -See [`docs/stage5-workspace-email-setup.md`](stage5-workspace-email-setup.md). That path mints a throwaway `stage5test-@wildmeta.ai` per run, reads its inbox via the Gmail API (no app password, no interactive OAuth), and deletes the user at the end. One-time ~20-minute admin setup + currently 3-5 days of code work to replace the `imapflow` fetcher with a Gmail-API fetcher that uses DWD impersonation. Longer upfront cost than the dedicated-Gmail demo path, but the right choice for enterprise deployments that already run Workspace. +Create a fresh bot Gmail (`wildmeta-stage5-demo@gmail.com`), enable 2FA + TOTP, generate an app password. Set `AGENTKEYS_EMAIL_USER` to the bot address; leave `AGENTKEYS_SIGNUP_EMAIL` unset. One-time ~10 minutes setup; gives you a fully controlled inbox with no personal-mail pollution. Re-runs need `--force` or account-delete between attempts because the bot address itself will collide.
-Alternative: plus-addressed personal Gmail (shared-inbox quick demo) +Alternative: Google Workspace DWD (for operators with an existing Workspace subscription) -If you don't want to create a dedicated account and are OK with one-off OpenRouter mail landing in your real inbox, plus-addressing on your existing Gmail works for a single demo run. +See [`docs/stage5-workspace-email-setup.md`](stage5-workspace-email-setup.md). That path mints a throwaway `stage5test-@wildmeta.ai` per run, reads its inbox via the Gmail API (no app password, no interactive OAuth), and deletes the user at the end. One-time ~20-minute admin setup + currently 3-5 days of code work to replace the `imapflow` fetcher with a Gmail-API fetcher that uses DWD impersonation. Right choice for enterprise deployments that already run Workspace; overkill for the demo. -1. **Your existing personal Gmail account** β€” plus-addressing is a Gmail-native feature: mail sent to `you+anything@gmail.com` is delivered to `you@gmail.com` without any configuration. A single inbox supports unlimited test aliases (`you+stage5test-20260418@gmail.com`). -2. **Gmail app password** (not your regular password) β€” generate at https://myaccount.google.com/apppasswords. Scoped to IMAP access only; revoke after the demo. -3. **Environment:** - ```bash - export AGENTKEYS_EMAIL_BACKEND=gmail - export AGENTKEYS_EMAIL_USER="you@gmail.com" # your real Gmail; Stage 5a appends +alias at signup - export AGENTKEYS_EMAIL_PASSWORD="" # NOT your normal Google password - export AGENTKEYS_EMAIL_HOST="imap.gmail.com" - export AGENTKEYS_EMAIL_PORT="993" - ``` +
-Downside: the agent doesn't fully control the inbox (shared with the human), and the OpenRouter confirmation email lingers in your personal mail until you delete it. +### Run it - +Two terminals. Everything runs from the repo root (`~/Projects/agentkeys`). + +**Terminal 1 β€” mock backend.** Stage 5a stores the provisioned key via the mock server (real Heima + TEE ships in v0.1). Leave this running. + +```bash +cd ~/Projects/agentkeys +cargo run --release -p agentkeys-mock-server -- --port 8090 +# Expected: "Mock server running on port 8090" +``` + +**Terminal 2 β€” provision.** Carry the Gmail env vars from step 2 into this shell (or re-`export` them here). Note: if you are using plus-addressing, **re-evaluate `AGENTKEYS_SIGNUP_EMAIL` for every run** so the timestamp is fresh and OpenRouter sees a new email β€” otherwise your second run will collide with the first run's account. + +```bash +cd ~/Projects/agentkeys +BIN=$(pwd)/target/release/agentkeys +BACKEND=http://127.0.0.1:8090 + +# 1. Initialize the master session (one-time per shell / mock restart). +$BIN --backend $BACKEND init --mock-token stage5-demo +# Expected: wallet printed; ~/.agentkeys/master/session.json created. + +# 2. Sanity-check the email env vars landed in this shell. +env | grep -E 'AGENTKEYS_(EMAIL|SIGNUP)_' +# Expected: AGENTKEYS_EMAIL_{BACKEND,USER,PASSWORD,HOST,PORT} and AGENTKEYS_SIGNUP_EMAIL. +# If AGENTKEYS_SIGNUP_EMAIL is missing, the scraper falls back to AGENTKEYS_EMAIL_USER, +# which will hit "account already exists" on the second run against OpenRouter. + +# 3. Re-seed a fresh signup alias for this run (plus-addressing path only). +export AGENTKEYS_SIGNUP_EMAIL="you+or-$(date +%s)@gmail.com" + +# 4. Run the live OpenRouter provision. +$BIN --backend $BACKEND provision openrouter +# Expect ~30-90 s: browser opens headless, account created, +# email verified, API key extracted + verified, stored in the mock backend. +``` + +**What this does under the hood:** + +- `init` authenticates the master CLI to the mock backend and caches the session token (OS keychain on macOS/Linux with keychain, file fallback otherwise). +- `provision openrouter` runs `npx tsx provisioner-scripts/src/scrapers/openrouter.ts` against a real Chromium session, uses the Gmail IMAP creds from your exported env to read the confirmation email, extracts + verifies the key against `https://openrouter.ai/api/v1/models`, and stores it into the mock backend under the master session's wallet. +- No daemon, no pairing β€” Stage 5a provision runs entirely as the master CLI. Daemon + pairing are Stage 4's flow for agent-side credential access, not needed for the live provision demo. + +**After it succeeds:** + +```bash +# Read the full stored key back. +$BIN --backend $BACKEND read openrouter +# Expected: sk-or-v1-... + +# Verify it works against OpenRouter. +curl -s -H "Authorization: Bearer $($BIN --backend $BACKEND read openrouter)" \ + https://openrouter.ai/api/v1/models | head -c 200 +# Expected: HTTP 200 + a JSON body starting with {"data":[... +``` + +**Artifacts you can inspect:** + +- `~/.agentkeys/master/session.json` β€” the master session (wallet + bearer token). +- `~/.agentkeys/logs/provision-openrouter-.log` β€” **written automatically when a provision fails with "no terminal event."** Contains the exit code, every event the subprocess emitted, and the full captured stderr. `ls -lt ~/.agentkeys/logs/ | head` to find the most recent. +- Stderr of `provision openrouter` β€” the single-shot step lines shown under "Expected behavior" below. + +**Debugging a failure:** + +1. Check the error message on stderr β€” if it ends with `full log: /path/to/provision-openrouter-.log`, that file has the full signal. +2. `cat` the log file. The `=== subprocess stderr ===` section usually shows the real cause (Playwright browser-launch error, IMAP connection refused, an unhandled rejection from the pattern, etc.). +3. For interactive debugging, run the TS scraper directly against a visible browser: + ```bash + # Temporarily flip headless:false at provisioner-scripts/src/scrapers/openrouter.ts:~116, + # then: + cd ~/Projects/agentkeys + npx tsx provisioner-scripts/src/scrapers/openrouter.ts + ``` + You'll see the page in real time β€” instant diagnosis for selector drift, returning-user UI paths, or CAPTCHA challenges. ### Expected behavior @@ -116,6 +189,8 @@ Downside: the agent doesn't fully control the inbox (shared with the human), and ### Failure modes to watch for +- **"subprocess ended without terminal event"** β€” the scraper crashed before emitting any event (Playwright browser-launch failed, IMAP connection refused, unhandled rejection, etc.). The error message now ends with `full log: ~/.agentkeys/logs/provision-openrouter-.log` β€” open that file; the `=== subprocess stderr ===` section has the real cause. If stderr is empty, re-run the TS scraper directly with `npx tsx provisioner-scripts/src/scrapers/openrouter.ts` and watch the node-side output. +- **"account already exists" (returning-user path)** β€” OpenRouter's `/auth` is signup+signin on one URL. If `AGENTKEYS_SIGNUP_EMAIL` is an address that already has an OpenRouter account, the site lands on a returning-user UI the scraper can't traverse, and you'll get a `selector_timeout` tripwire or (if the path is weirder) a "no terminal event." Re-evaluate `AGENTKEYS_SIGNUP_EMAIL` with a fresh timestamp (`you+or-$(date +%s)@gmail.com`) and retry. - **CAPTCHA / Cloudflare challenge** β€” the Tier 2 script does not solve CAPTCHAs. Expect a Tripwire event with `kind: selector_timeout`. This is the signal that Stage 5b's agentic fallback is needed. Until 5b ships, abort and retry from a different IP. - **Email didn't arrive within 60 s** β€” check spam, check plus-addressing forwarding. Tripwire `email_timeout` means the IMAP fetch exhausted its polling window. - **Key verification fails with `phantom`** β€” the scraper extracted something key-shaped that isn't a real API key. OpenRouter may have changed its DOM; inspect the page at the success-step selector and file an issue with the HAR dump. @@ -206,17 +281,78 @@ These are slop markers. Apply the suggested `cargo clippy --fix` or hand-replace --- -## 4. What to do when Stage 5b lands +## 4. Stage 5b β€” CDP-connected real-Chrome scraper (partial: proven working, blocked on email duplicate) + +### What's landed -When Stage 5b ships (agentic fallback, `/agentkeys-record-scraper` skill, script-generation loop), this document will grow: +- **[provisioner-scripts/src/scrapers/openrouter-cdp.ts](../provisioner-scripts/src/scrapers/openrouter-cdp.ts)** β€” connects to a user-launched real Chrome via `chromium.connectOverCDP()`, drives the OpenRouter Clerk-hosted signup form, polls Gmail IMAP for the OTP code, mints a new key on `/keys`, prints the `sk-or-v1-*` value on stdout. +- **Why CDP, not Playwright-launched Chromium:** Playwright's bundled Chromium ships with `--enable-automation` baked in. Cloudflare Turnstile detects this at runtime (error **600010** β€” "browser execution environment suspicious") and refuses to issue a token even when a human clicks the checkbox. Connecting to a user-launched *real* Chrome bypasses this because the browser process has no automation flags. Verified 2026-04-20: Turnstile passes invisibly in real Chrome, Clerk backend returns normal responses. +### How to run (when you have a fresh-to-OpenRouter email) + +1. **Launch real Chrome with CDP enabled** (fresh profile, separate from your daily browsing): + ```bash + /Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome \ + --remote-debugging-port=9222 \ + --user-data-dir=/tmp/agentkeys-chrome-profile & + ``` + A blank Chrome window opens. Don't navigate it manually β€” the scraper drives it. + +2. **Export env** (Gmail IMAP creds + a signup email OpenRouter hasn't seen): + ```bash + export AGENTKEYS_EMAIL_BACKEND=gmail + export AGENTKEYS_EMAIL_USER="you@gmail.com" # canonical IMAP login + export AGENTKEYS_EMAIL_PASSWORD="" + export AGENTKEYS_EMAIL_HOST="imap.gmail.com" + export AGENTKEYS_EMAIL_PORT="993" + export AGENTKEYS_SIGNUP_EMAIL="" + export AGENTKEYS_SIGNUP_PASSWORD="" + ``` + +3. **Run the scraper:** + ```bash + cd ~/Projects/agentkeys + node --import tsx/esm provisioner-scripts/src/scrapers/openrouter-cdp.ts + ``` + Last stdout line is the `sk-or-v1-*` key. Stderr shows `[cdp]