diff --git a/experiments/bot_detection/data/open_pr_activity.parquet b/experiments/bot_detection/data/open_pr_activity.parquet
new file mode 100644
index 0000000..5545276
Binary files /dev/null and b/experiments/bot_detection/data/open_pr_activity.parquet differ
diff --git a/experiments/bot_detection/data/results/pocket_veto_analysis.json b/experiments/bot_detection/data/results/pocket_veto_analysis.json
new file mode 100644
index 0000000..4138096
--- /dev/null
+++ b/experiments/bot_detection/data/results/pocket_veto_analysis.json
@@ -0,0 +1,447 @@
+{
+  "universal_threshold_days": 90,
+  "characterization": {
+    "state_totals": {
+      "CLOSED": 34637,
+      "MERGED": 146033,
+      "OPEN": 19502
+    },
+    "outcome_totals": {
+      "merged": 146033,
+      "pocket_veto": 30894,
+      "rejected": 23245
+    },
+    "state_outcome_crosstab": [
+      {
+        "state": "CLOSED",
+        "outcome": "pocket_veto",
+        "n": 11392
+      },
+      {
+        "state": "CLOSED",
+        "outcome": "rejected",
+        "n": 23245
+      },
+      {
+        "state": "MERGED",
+        "outcome": "merged",
+        "n": 146033
+      },
+      {
+        "state": "OPEN",
+        "outcome": "pocket_veto",
+        "n": 19502
+      }
+    ],
+    "stale_threshold_distribution": [
+      {
+        "threshold_days": 30.0,
+        "n": 179748
+      },
+      {
+        "threshold_days": 51.44804763793945,
+        "n": 7325
+      },
+      {
+        "threshold_days": 62.094017028808594,
+        "n": 4728
+      },
+      {
+        "threshold_days": 32.073368072509766,
+        "n": 4034
+      },
+      {
+        "threshold_days": 30.195825576782227,
+        "n": 1035
+      },
+      {
+        "threshold_days": 73.39346313476562,
+        "n": 790
+      },
+      {
+        "threshold_days": 30.832239151000977,
+        "n": 426
+      },
+      {
+        "threshold_days": 40.00093460083008,
+        "n": 398
+      },
+      {
+        "threshold_days": 33.224021911621094,
+        "n": 296
+      },
+      {
+        "threshold_days": 104.15167236328125,
+        "n": 197
+      },
+      {
+        "threshold_days": 34.333587646484375,
+        "n": 176
+      },
+      {
+        "threshold_days": 30.21098518371582,
+        "n": 150
+      },
+      {
+        "threshold_days": 33.63052749633789,
+        "n": 150
+      },
+      {
+        "threshold_days": 41.0978889465332,
+        "n": 137
+      },
+      {
+        "threshold_days": 30.72226905822754,
+        "n": 130
+      },
+      {
+        "threshold_days": 39.4652099609375,
+        "n": 127
+      },
+      {
+        "threshold_days": 58.54113006591797,
+        "n": 122
+      },
+      {
+        "threshold_days": 92.92449951171875,
+        "n": 118
+      },
+      {
+        "threshold_days": 64.95616912841797,
+        "n": 85
+      }
+    ],
+    "repos_total": 96,
+    "repos_using_default_30d": 78,
+    "repos_calibrated": 18,
+    "per_repo_calibration_check": {
+      "mean_delta_vs_2x_median_ttc": 31.311005377063044,
+      "median_delta_vs_2x_median_ttc": 28.69724537037037,
+      "n_repos_with_closed_prs": 96
+    },
+    "open_pr_age_quantiles_days": {
+      "p10": 72.75832986111112,
+      "p25": 138.96756076388888,
+      "p50": 245.59493055555555,
+      "p75": 481.70465856481485,
+      "p90": 923.2952430555556,
+      "p95": 1096.9740763888894
+    }
+  },
+  "distributions": {
+    "merge_rate_v3": {
+      "mean": 0.5567027952888478,
+      "median": 0.75,
+      "p10": 0.0,
+      "p25": 0.0,
+      "p75": 1.0,
+      "p90": 1.0
+    },
+    "merge_rate_universal": {
+      "mean": 0.5368757212868577,
+      "median": 0.6666666666666666,
+      "p10": 0.0,
+      "p25": 0.0,
+      "p75": 1.0,
+      "p90": 1.0
+    },
+    "merge_rate_per_repo": {
+      "mean": 0.5332918174688098,
+      "median": 0.6182486417385746,
+      "p10": 0.0,
+      "p25": 0.0,
+      "p75": 1.0,
+      "p90": 1.0
+    },
+    "merge_rate_idle_universal": {
+      "mean": 0.5514260290930824,
+      "median": 0.7,
+      "p10": 0.0,
+      "p25": 0.0,
+      "p75": 1.0,
+      "p90": 1.0
+    },
+    "merge_rate_idle_per_repo": {
+      "mean": 0.5505710864785152,
+      "median": 0.6729415904292751,
+      "p10": 0.0,
+      "p25": 0.0,
+      "p75": 1.0,
+      "p90": 1.0
+    },
+    "merge_rate_universal_30d": {
+      "mean": 0.5332379062475792,
+      "median": 0.6153846153846154,
+      "p10": 0.0,
+      "p25": 0.0,
+      "p75": 1.0,
+      "p90": 1.0
+    },
+    "merge_rate_universal_60d": {
+      "mean": 0.5355565622443484,
+      "median": 0.6363636363636364,
+      "p10": 0.0,
+      "p25": 0.0,
+      "p75": 1.0,
+      "p90": 1.0
+    },
+    "merge_rate_universal_90d": {
+      "mean": 0.5368757212868577,
+      "median": 0.6666666666666666,
+      "p10": 0.0,
+      "p25": 0.0,
+      "p75": 1.0,
+      "p90": 1.0
+    },
+    "merge_rate_universal_180d": {
+      "mean": 0.5409542251449829,
+      "median": 0.6666666666666666,
+      "p10": 0.0,
+      "p25": 0.0,
+      "p75": 1.0,
+      "p90": 1.0
+    }
+  },
+  "shift_analysis": {
+    "n_authors": 31296,
+    "merge_rate_universal": {
+      "mean_delta": -0.019827074001990234,
+      "median_delta": 0.0,
+      "n_dropped_gt_0.05": 2370,
+      "n_dropped_gt_0.10": 1836,
+      "n_dropped_gt_0.25": 895,
+      "n_unchanged": 28493
+    },
+    "merge_rate_per_repo": {
+      "mean_delta": -0.023410977820038193,
+      "median_delta": 0.0,
+      "n_dropped_gt_0.05": 2663,
+      "n_dropped_gt_0.10": 2111,
+      "n_dropped_gt_0.25": 1094,
+      "n_unchanged": 28179
+    },
+    "merge_rate_idle_universal": {
+      "mean_delta": -0.005276766195765481,
+      "median_delta": 0.0,
+      "n_dropped_gt_0.05": 607,
+      "n_dropped_gt_0.10": 459,
+      "n_dropped_gt_0.25": 241,
+      "n_unchanged": 30434
+    },
+    "merge_rate_idle_per_repo": {
+      "mean_delta": -0.006131708810332761,
+      "median_delta": 0.0,
+      "n_dropped_gt_0.05": 705,
+      "n_dropped_gt_0.10": 529,
+      "n_dropped_gt_0.25": 279,
+      "n_unchanged": 30305
+    }
+  },
+  "signal_evaluation": {
+    "n_labeled": 31293,
+    "n_suspended": 739,
+    "n_active": 30554,
+    "merge_rate_v3": {
+      "cv_auc": 0.5493659354900655,
+      "fold_auc_std": 0.01149119403429272,
+      "mean_merge_rate_suspended": 0.5136005337296283,
+      "mean_merge_rate_active": 0.557734499146874,
+      "cohens_d_active_vs_suspended": 0.09623777997840265,
+      "n_suspended": 739,
+      "n_active": 30554
+    },
+    "merge_rate_universal": {
+      "cv_auc": 0.548848295654899,
+      "fold_auc_std": 0.011339429192468128,
+      "mean_merge_rate_suspended": 0.5011117987116154,
+      "mean_merge_rate_active": 0.5377443527572696,
+      "cohens_d_active_vs_suspended": 0.08114560040346083,
+      "n_suspended": 739,
+      "n_active": 30554
+    },
+    "merge_rate_per_repo": {
+      "cv_auc": 0.5486495526055911,
+      "fold_auc_std": 0.011542410042848126,
+      "mean_merge_rate_suspended": 0.5010423916595617,
+      "mean_merge_rate_active": 0.534091457487316,
+      "cohens_d_active_vs_suspended": 0.0734021937818801,
+      "n_suspended": 739,
+      "n_active": 30554
+    },
+    "merge_rate_idle_universal": {
+      "cv_auc": 0.5488917644689147,
+      "fold_auc_std": 0.011813844138263785,
+      "mean_merge_rate_suspended": 0.5128096256371848,
+      "mean_merge_rate_active": 0.552348716801441,
+      "cohens_d_active_vs_suspended": 0.08659711664734189,
+      "n_suspended": 739,
+      "n_active": 30554
+    },
+    "merge_rate_idle_per_repo": {
+      "cv_auc": 0.5487741351566113,
+      "fold_auc_std": 0.011853501921961123,
+      "mean_merge_rate_suspended": 0.5128096256371848,
+      "mean_merge_rate_active": 0.5514730120143265,
+      "cohens_d_active_vs_suspended": 0.08473831668514915,
+      "n_suspended": 739,
+      "n_active": 30554
+    }
+  },
+  "example_authors": [
+    {
+      "login": "wuhang2014",
+      "account_status": "active",
+      "total_prs": 9,
+      "merged": 1.0,
+      "closed": 0.0,
+      "open_total": 8.0,
+      "open_stale_per_repo": 8.0,
+      "open_stale_universal": 8.0,
+      "open_stale_idle_universal": 0.0,
+      "open_stale_idle_per_repo": 0.0,
+      "merge_rate_v3": 1.0,
+      "merge_rate_universal": 0.1111111111111111,
+      "merge_rate_per_repo": 0.1111111111111111,
+      "merge_rate_idle_universal": 1.0,
+      "merge_rate_idle_per_repo": 1.0
+    },
+    {
+      "login": "simondanielsson",
+      "account_status": "active",
+      "total_prs": 7,
+      "merged": 1.0,
+      "closed": 0.0,
+      "open_total": 6.0,
+      "open_stale_per_repo": 6.0,
+      "open_stale_universal": 6.0,
+      "open_stale_idle_universal": 1.0,
+      "open_stale_idle_per_repo": 1.0,
+      "merge_rate_v3": 1.0,
+      "merge_rate_universal": 0.14285714285714285,
+      "merge_rate_per_repo": 0.14285714285714285,
+      "merge_rate_idle_universal": 0.5,
+      "merge_rate_idle_per_repo": 0.5
+    },
+    {
+      "login": "sahelib25",
+      "account_status": "active",
+      "total_prs": 6,
+      "merged": 1.0,
+      "closed": 0.0,
+      "open_total": 5.0,
+      "open_stale_per_repo": 5.0,
+      "open_stale_universal": 5.0,
+      "open_stale_idle_universal": 0.0,
+      "open_stale_idle_per_repo": 0.0,
+      "merge_rate_v3": 1.0,
+      "merge_rate_universal": 0.16666666666666666,
+      "merge_rate_per_repo": 0.16666666666666666,
+      "merge_rate_idle_universal": 1.0,
+      "merge_rate_idle_per_repo": 1.0
+    },
+    {
+      "login": "Copilot",
+      "account_status": "suspended",
+      "total_prs": 438,
+      "merged": 177.0,
+      "closed": 38.0,
+      "open_total": 223.0,
+      "open_stale_per_repo": 223.0,
+      "open_stale_universal": 204.0,
+      "open_stale_idle_universal": 4.0,
+      "open_stale_idle_per_repo": 4.0,
+      "merge_rate_v3": 0.8232558139534883,
+      "merge_rate_universal": 0.4224343675417661,
+      "merge_rate_per_repo": 0.4041095890410959,
+      "merge_rate_idle_universal": 0.8082191780821918,
+      "merge_rate_idle_per_repo": 0.8082191780821918
+    },
+    {
+      "login": "iycheng",
+      "account_status": "suspended",
+      "total_prs": 423,
+      "merged": 295.0,
+      "closed": 110.0,
+      "open_total": 18.0,
+      "open_stale_per_repo": 18.0,
+      "open_stale_universal": 18.0,
+      "open_stale_idle_universal": 0.0,
+      "open_stale_idle_per_repo": 0.0,
+      "merge_rate_v3": 0.7283950617283951,
+      "merge_rate_universal": 0.6973995271867612,
+      "merge_rate_per_repo": 0.6973995271867612,
+      "merge_rate_idle_universal": 0.7283950617283951,
+      "merge_rate_idle_per_repo": 0.7283950617283951
+    },
+    {
+      "login": "amd-jmacaran",
+      "account_status": "suspended",
+      "total_prs": 105,
+      "merged": 105.0,
+      "closed": 0.0,
+      "open_total": 0.0,
+      "open_stale_per_repo": 0.0,
+      "open_stale_universal": 0.0,
+      "open_stale_idle_universal": 0.0,
+      "open_stale_idle_per_repo": 0.0,
+      "merge_rate_v3": 1.0,
+      "merge_rate_universal": 1.0,
+      "merge_rate_per_repo": 1.0,
+      "merge_rate_idle_universal": 1.0,
+      "merge_rate_idle_per_repo": 1.0
+    },
+    {
+      "login": "harupy",
+      "account_status": "active",
+      "total_prs": 2771,
+      "merged": 2151.0,
+      "closed": 327.0,
+      "open_total": 293.0,
+      "open_stale_per_repo": 293.0,
+      "open_stale_universal": 281.0,
+      "open_stale_idle_universal": 0.0,
+      "open_stale_idle_per_repo": 0.0,
+      "merge_rate_v3": 0.8680387409200968,
+      "merge_rate_universal": 0.7796303008336354,
+      "merge_rate_per_repo": 0.776254059906171,
+      "merge_rate_idle_universal": 0.8680387409200968,
+      "merge_rate_idle_per_repo": 0.8680387409200968
+    },
+    {
+      "login": "baskaryan",
+      "account_status": "active",
+      "total_prs": 1494,
+      "merged": 1295.0,
+      "closed": 153.0,
+      "open_total": 46.0,
+      "open_stale_per_repo": 46.0,
+      "open_stale_universal": 46.0,
+      "open_stale_idle_universal": 0.0,
+      "open_stale_idle_per_repo": 0.0,
+      "merge_rate_v3": 0.8943370165745856,
+      "merge_rate_universal": 0.8668005354752343,
+      "merge_rate_per_repo": 0.8668005354752343,
+      "merge_rate_idle_universal": 0.8943370165745856,
+      "merge_rate_idle_per_repo": 0.8943370165745856
+    }
+  ],
+  "recommendation": {
+    "decision": "Keep v3 as-is",
+    "rationale": "No variant beats v3 CV AUC 0.5494 by >0.005 (aucs={'merge_rate_v3': 0.5494, 'merge_rate_universal': 0.5488, 'merge_rate_per_repo': 0.5486, 'merge_rate_idle_universal': 0.5489, 'merge_rate_idle_per_repo': 0.5488}). Cohen's d also fails to improve (base=0.096, best_alt=0.087).",
+    "cv_aucs": {
+      "merge_rate_v3": 0.5493659354900655,
+      "merge_rate_universal": 0.548848295654899,
+      "merge_rate_per_repo": 0.5486495526055911,
+      "merge_rate_idle_universal": 0.5488917644689147,
+      "merge_rate_idle_per_repo": 0.5487741351566113
+    },
+    "cohens_d": {
+      "merge_rate_v3": 0.09623777997840265,
+      "merge_rate_universal": 0.08114560040346083,
+      "merge_rate_per_repo": 0.0734021937818801,
+      "merge_rate_idle_universal": 0.08659711664734189,
+      "merge_rate_idle_per_repo": 0.08473831668514915
+    },
+    "universal_threshold_days": 90
+  }
+}
\ No newline at end of file
diff --git a/experiments/bot_detection/pocket_veto_findings.md b/experiments/bot_detection/pocket_veto_findings.md
new file mode 100644
index 0000000..813af4a
--- /dev/null
+++ b/experiments/bot_detection/pocket_veto_findings.md
@@ -0,0 +1,86 @@
+# Pocket Veto Investigation — Findings
+
+Investigation for issue #51. Does counting stale open PRs as implicit
+rejections meaningfully change merge-rate distributions and improve the
+signal's ability to separate suspended from active accounts?
+
+## Dataset
+
+- 200172 PRs across 96 repos
+- State totals: {'CLOSED': 34637, 'MERGED': 146033, 'OPEN': 19502}
+- Outcome totals: {'merged': 146033, 'pocket_veto': 30894, 'rejected': 23245}
+- Labeled authors: 31293 (739 suspended, 30554 active)
+
+## Staleness definitions compared
+
+- **v3 (baseline)**: `merged / (merged + closed)` — current scorer.py.
+- **age_universal**: open PR is stale if age > 90d since `created_at`.
+- **age_per_repo**: open PR is stale if age > that repo's
+  `stale_threshold_days` (populated in the DuckDB; default 30d).
+- **idle_universal**: open PR is stale if it is still open AND idle > 90d (`fetch_now - updated_at`).
+- **idle_per_repo**: same, with the per-repo threshold substituted.
+
+The `idle_*` variants use a live re-fetch of every DB-OPEN PR's
+`updatedAt` (see `fetch_open_pr_activity.py`). PRs that were OPEN at
+the snapshot but have since been closed or merged are treated as
+non-stale — the close/merge event itself is activity.
+
+## Calibration sanity check
+
+- Repos using the default 30d threshold: 78 / 96
+- Repos with a calibrated threshold: 18
+- Per-repo calibrated thresholds vs 2x median time-to-close:
+  mean delta = 31.3110, median delta = 28.6972 (days).
+
+## Distribution shift
+
+Mean merge rate across all authors:
+
+| Definition | mean | median | p10 | p90 |
+|---|---|---|---|---|
+| v3 baseline | 0.5567 | 0.7500 | 0.0000 | 1.0000 |
+| age_universal (90d) | 0.5369 | 0.6667 | 0.0000 | 1.0000 |
+| age_per_repo | 0.5333 | 0.6182 | 0.0000 | 1.0000 |
+| idle_universal (90d) | 0.5514 | 0.7000 | 0.0000 | 1.0000 |
+| idle_per_repo | 0.5506 | 0.6729 | 0.0000 | 1.0000 |
+
+Per-author drop from the v3 baseline (n authors, >0.10 / >0.25):
+
+- **age_universal**: 1836 / 895
+- **age_per_repo**: 2111 / 1094
+- **idle_universal**: 459 / 241
+- **idle_per_repo**: 529 / 279
+
+## Signal quality vs ground truth
+
+2-feature logistic regression (merge_rate + log1p(median_additions)),
+5-fold CV on 31293 labeled authors:
+
+| Definition | CV AUC | Active mean | Suspended mean | Cohen's d |
+|---|---|---|---|---|
+| v3 baseline | 0.5494 | 0.5577 | 0.5136 | 0.0962 |
+| age_universal | 0.5488 | 0.5377 | 0.5011 | 0.0811 |
+| age_per_repo | 0.5486 | 0.5341 | 0.5010 | 0.0734 |
+| idle_universal | 0.5489 | 0.5523 | 0.5128 | 0.0866 |
+| idle_per_repo | 0.5488 | 0.5515 | 0.5128 | 0.0847 |
+
+## Recommendation
+
+See the `recommendation` field in `data/results/pocket_veto_analysis.json` for the machine-readable
+decision logic. Text summary and follow-up branch sketch below.
+
+**Keep v3 as-is** — No variant beats v3 CV AUC 0.5494 by >0.005 (aucs={'merge_rate_v3': 0.5494, 'merge_rate_universal': 0.5488, 'merge_rate_per_repo': 0.5486, 'merge_rate_idle_universal': 0.5489, 'merge_rate_idle_per_repo': 0.5488}). Cohen's d also fails to improve (base=0.096, best_alt=0.087).
+
+### Follow-up branch sketch (if adopted)
+
+- `src/good_egg/github_client.py`: extend `_COMBINED_QUERY` with an
+  `openPullRequests` selection that pulls `createdAt`/`updatedAt` for
+  each OPEN PR on the scored user (or `totalCount` if we can push the
+  staleness filter into the query).
+- `src/good_egg/models.py`: add `open_stale_pr_count: int` (or similar)
+  to `UserContributionData`.
+- `src/good_egg/scorer.py:256-261`: change the `_score_v3` merge-rate
+  formula to `merged / (merged + closed + open_stale)`.
+- `src/good_egg/config.py`: add the staleness threshold as a tunable
+  config value.
+- Tests: parallel coverage in `tests/test_scorer.py`.
diff --git a/experiments/bot_detection/scripts/fetch_open_pr_activity.py b/experiments/bot_detection/scripts/fetch_open_pr_activity.py
new file mode 100644
index 0000000..2fa964f
--- /dev/null
+++ b/experiments/bot_detection/scripts/fetch_open_pr_activity.py
@@ -0,0 +1,182 @@
+"""Fetch updatedAt for every OPEN PR in the bot_detection DuckDB.
+
+Used by pocket_veto_analysis.py to compute idle-time-based staleness (a
+better proxy than age-since-created, which the DuckDB schema forces).
+
+For each repo that has OPEN PRs in the DB, paginate
+repository.pullRequests(states: OPEN) and collect (number, updatedAt). PRs
+that were OPEN in the DB snapshot but have since been closed or merged are
+by definition non-stale (the close/merge event itself is activity), so we
+don't need to look them up — they just won't appear in the fetched set and
+the analysis treats them as non-stale.
+
+Output: experiments/bot_detection/data/open_pr_activity.parquet
+  columns: repo, number, updated_at, fetch_now
+"""
+
+from __future__ import annotations
+
+import os
+import subprocess
+import sys
+import time
+from datetime import UTC, datetime
+from pathlib import Path
+
+import duckdb
+import httpx
+import pandas as pd
+
+BASE = Path(__file__).resolve().parents[1]
+DB_PATH = BASE / "data" / "bot_detection.duckdb"
+OUT_PATH = BASE / "data" / "open_pr_activity.parquet"
+
+GRAPHQL_URL = "https://api.github.com/graphql"
+PAGE_SIZE = 100
+
+QUERY = """
+query($owner: String!, $name: String!, $cursor: String) {
+  repository(owner: $owner, name: $name) {
+    pullRequests(states: OPEN, first: 100, after: $cursor,
+                 orderBy: {field: CREATED_AT, direction: ASC}) {
+      pageInfo { hasNextPage endCursor }
+      nodes { number updatedAt }
+    }
+  }
+  rateLimit { remaining resetAt }
+}
+"""
+
+
+def get_token() -> str:
+    token = os.environ.get("GITHUB_TOKEN") or os.environ.get("GH_TOKEN")
+    if token:
+        return token
+    result = subprocess.run(
+        ["gh", "auth", "token"], check=True, capture_output=True, text=True,
+    )
+    return result.stdout.strip()
+
+
+def fetch_repo(
+    client: httpx.Client, owner: str, name: str,
+) -> list[tuple[int, str]]:
+    results: list[tuple[int, str]] = []
+    cursor: str | None = None
+    while True:
+        resp = client.post(
+            GRAPHQL_URL,
+            json={
+                "query": QUERY,
+                "variables": {"owner": owner, "name": name, "cursor": cursor},
+            },
+        )
+        resp.raise_for_status()
+        payload = resp.json()
+        if "errors" in payload:
+            print(f"    GraphQL errors: {payload['errors']}", file=sys.stderr)
+            break
+        repo_data = payload["data"]["repository"]
+        if repo_data is None:
+            print(f"    repo not found: {owner}/{name}", file=sys.stderr)
+            break
+        prs = repo_data["pullRequests"]
+        for node in prs["nodes"]:
+            results.append((node["number"], node["updatedAt"]))
+        remaining = payload["data"]["rateLimit"]["remaining"]
+        if remaining < 100:
+            reset_at = payload["data"]["rateLimit"]["resetAt"]
+            print(
+                f"    rate limit low ({remaining}), sleeping until {reset_at}",
+                file=sys.stderr,
+            )
+            time.sleep(60)
+        if not prs["pageInfo"]["hasNextPage"]:
+            break
+        cursor = prs["pageInfo"]["endCursor"]
+    return results
+
+
+def main() -> None:
+    con = duckdb.connect(str(DB_PATH), read_only=True)
+    repo_counts = con.execute("""
+        SELECT repo, COUNT(*) AS n
+        FROM prs WHERE state='OPEN'
+        GROUP BY repo ORDER BY n DESC
+    """).fetchall()
+    con.close()
+
+    db_open_keys: dict[str, set[int]] = {}
+    con = duckdb.connect(str(DB_PATH), read_only=True)
+    for (repo,) in con.execute(
+        "SELECT DISTINCT repo FROM prs WHERE state='OPEN'"
+    ).fetchall():
+        numbers = con.execute(
+            "SELECT number FROM prs WHERE state='OPEN' AND repo=?", [repo]
+        ).fetchall()
+        db_open_keys[repo] = {n[0] for n in numbers}
+    con.close()
+
+    token = get_token()
+    headers = {
+        "Authorization": f"bearer {token}",
+        "Accept": "application/vnd.github+json",
+    }
+    fetch_now = datetime.now(UTC).isoformat()
+    rows: list[dict[str, object]] = []
+
+    with httpx.Client(headers=headers, timeout=60.0) as client:
+        for idx, (repo, n_db_open) in enumerate(repo_counts, start=1):
+            owner, name = repo.split("/", 1)
+            print(
+                f"[{idx}/{len(repo_counts)}] {repo} "
+                f"(db_open={n_db_open})...",
+                flush=True,
+            )
+            try:
+                fetched = fetch_repo(client, owner, name)
+            except httpx.HTTPStatusError as exc:
+                print(
+                    f"    HTTP error for {repo}: {exc}", file=sys.stderr,
+                )
+                continue
+            relevant = [
+                (num, ts) for (num, ts) in fetched
+                if num in db_open_keys.get(repo, set())
+            ]
+            print(
+                f"    fetched {len(fetched)} currently-open, "
+                f"{len(relevant)} match db-open set",
+                flush=True,
+            )
+            for num, ts in relevant:
+                rows.append(
+                    {
+                        "repo": repo,
+                        "number": num,
+                        "updated_at": ts,
+                        "fetch_now": fetch_now,
+                    }
+                )
+
+    df = pd.DataFrame(rows)
+    df["updated_at"] = pd.to_datetime(df["updated_at"])
+    df["fetch_now"] = pd.to_datetime(df["fetch_now"])
+    OUT_PATH.parent.mkdir(parents=True, exist_ok=True)
+    df.to_parquet(OUT_PATH, index=False)
+
+    total_db_open = sum(len(v) for v in db_open_keys.values())
+    print()
+    print(f"Wrote {len(df)} rows to {OUT_PATH}")
+    print(
+        f"Coverage: {len(df)} / {total_db_open} db-OPEN PRs still currently "
+        f"open ({100 * len(df) / total_db_open:.1f}%)"
+    )
+    print(
+        f"Missing: {total_db_open - len(df)} PRs (closed/merged since snapshot"
+        f" — treated as non-stale)"
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/experiments/bot_detection/scripts/pocket_veto_analysis.py b/experiments/bot_detection/scripts/pocket_veto_analysis.py
new file mode 100644
index 0000000..512c414
--- /dev/null
+++ b/experiments/bot_detection/scripts/pocket_veto_analysis.py
@@ -0,0 +1,637 @@
+"""Pocket-veto investigation for issue #51.
+
+Analyze whether counting stale open PRs as implicit rejections meaningfully
+shifts merge-rate distributions and improves the signal's ability to separate
+suspended from active GitHub accounts.
+
+Operates entirely on the existing bot_detection DuckDB. Does not fetch from
+GitHub. Does not modify src/good_egg/.
+
+Outputs:
+  - experiments/bot_detection/data/results/pocket_veto_analysis.json
+  - experiments/bot_detection/pocket_veto_findings.md
+"""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Any
+
+import duckdb
+import numpy as np
+import pandas as pd
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import roc_auc_score
+from sklearn.model_selection import StratifiedKFold
+from sklearn.preprocessing import StandardScaler
+
+BASE = Path(__file__).resolve().parents[1]
+DB_PATH = BASE / "data" / "bot_detection.duckdb"
+ACTIVITY_PATH = BASE / "data" / "open_pr_activity.parquet"
+RESULTS_PATH = BASE / "data" / "results" / "pocket_veto_analysis.json"
+FINDINGS_PATH = BASE / "pocket_veto_findings.md"
+
+UNIVERSAL_THRESHOLD_DAYS = 90
+SENSITIVITY_THRESHOLDS = (30, 60, 90, 180)
+SEED = 42
+
+MERGE_RATE_COLS = (
+    "merge_rate_v3",
+    "merge_rate_universal",
+    "merge_rate_per_repo",
+    "merge_rate_idle_universal",
+    "merge_rate_idle_per_repo",
+)
+
+
+def characterize(con: duckdb.DuckDBPyConnection) -> dict[str, Any]:
+    """Phase 1: sanity-check the existing outcome/stale_threshold_days columns."""
+    totals = con.execute("""
+        SELECT state, COUNT(*) FROM prs GROUP BY state ORDER BY state
+    """).fetchall()
+    outcomes = con.execute("""
+        SELECT outcome, COUNT(*) FROM prs GROUP BY outcome ORDER BY outcome
+    """).fetchall()
+    state_outcome = con.execute("""
+        SELECT state, outcome, COUNT(*) FROM prs
+        GROUP BY state, outcome ORDER BY state, outcome
+    """).fetchall()
+
+    thresh_dist = con.execute("""
+        SELECT stale_threshold_days, COUNT(*) AS n
+        FROM prs WHERE stale_threshold_days IS NOT NULL
+        GROUP BY stale_threshold_days ORDER BY n DESC
+    """).fetchall()
+
+    # Per-repo: how does stored stale_threshold_days compare to 2x median
+    # time-to-close (the hypothesis in the issue)?
+    repo_stats = con.execute("""
+        WITH closed AS (
+            SELECT repo,
+                   EXTRACT(EPOCH FROM (closed_at - created_at))/86400.0 AS ttc_days
+            FROM prs
+            WHERE state IN ('MERGED', 'CLOSED')
+              AND closed_at IS NOT NULL
+              AND created_at IS NOT NULL
+              AND closed_at > created_at
+        ),
+        repo_ttc AS (
+            SELECT repo, MEDIAN(ttc_days) AS median_ttc_days, COUNT(*) AS n_closed
+            FROM closed GROUP BY repo
+        ),
+        repo_thresh AS (
+            SELECT repo,
+                   ANY_VALUE(stale_threshold_days) AS stale_threshold_days,
+                   COUNT(DISTINCT stale_threshold_days) AS distinct_thresh
+            FROM prs
+            WHERE stale_threshold_days IS NOT NULL
+            GROUP BY repo
+        )
+        SELECT t.repo, t.stale_threshold_days, t.distinct_thresh,
+               r.median_ttc_days, r.n_closed
+        FROM repo_thresh t LEFT JOIN repo_ttc r USING (repo)
+        ORDER BY t.repo
+    """).fetchdf()
+
+    repo_stats["two_x_median"] = 2.0 * repo_stats["median_ttc_days"]
+    repo_stats["delta_vs_2x"] = (
+        repo_stats["stale_threshold_days"] - repo_stats["two_x_median"]
+    )
+
+    # Open PR age distribution (relative to the DB's max created_at as "now")
+    age_stats = con.execute("""
+        WITH ref AS (SELECT MAX(created_at) AS now FROM prs)
+        SELECT
+            quantile_cont(
+                EXTRACT(EPOCH FROM (ref.now - p.created_at))/86400.0,
+                [0.1, 0.25, 0.5, 0.75, 0.9, 0.95]
+            ) AS quantiles
+        FROM prs p, ref WHERE p.state = 'OPEN'
+    """).fetchone()
+
+    return {
+        "state_totals": dict(totals),
+        "outcome_totals": dict(outcomes),
+        "state_outcome_crosstab": [
+            {"state": s, "outcome": o, "n": n} for s, o, n in state_outcome
+        ],
+        "stale_threshold_distribution": [
+            {"threshold_days": float(t), "n": int(n)} for t, n in thresh_dist
+        ],
+        "repos_total": int(len(repo_stats)),
+        "repos_using_default_30d": int(
+            (repo_stats["stale_threshold_days"] == 30.0).sum()
+        ),
+        "repos_calibrated": int(
+            (repo_stats["stale_threshold_days"] != 30.0).sum()
+        ),
+        "per_repo_calibration_check": {
+            "mean_delta_vs_2x_median_ttc": float(
+                repo_stats["delta_vs_2x"].dropna().mean()
+            ),
+            "median_delta_vs_2x_median_ttc": float(
+                repo_stats["delta_vs_2x"].dropna().median()
+            ),
+            "n_repos_with_closed_prs": int(
+                repo_stats["median_ttc_days"].notna().sum()
+            ),
+        },
+        "open_pr_age_quantiles_days": {
+            label: float(v) for label, v in zip(
+                ["p10", "p25", "p50", "p75", "p90", "p95"],
+                age_stats[0],
+                strict=True,
+            )
+        },
+    }
+
+
+def build_author_features(con: duckdb.DuckDBPyConnection) -> pd.DataFrame:
+    """Phase 2: per-author counts and all five merge-rate definitions.
+
+    Age-based variants use (now - created_at) with 'now' = max(created_at)
+    in the DB. Idle-based variants use (fetch_now - updated_at) from the
+    open_pr_activity.parquet sidecar; PRs that were OPEN in the DB snapshot
+    but are no longer currently open are treated as non-stale (the close or
+    merge event since the snapshot is itself evidence of activity).
+    """
+    has_activity = ACTIVITY_PATH.exists()
+    sensitivity_cols = ",\n            ".join(
+        f"SUM(CASE WHEN state='OPEN' AND age_days > {d} "
+        f"THEN 1 ELSE 0 END) AS open_stale_{d}d"
+        for d in SENSITIVITY_THRESHOLDS
+    )
+    activity_join = ""
+    idle_cols = (
+        "0 AS open_stale_idle_universal, 0 AS open_stale_idle_per_repo,"
+    )
+    if has_activity:
+        activity_join = f"""
+            LEFT JOIN read_parquet('{ACTIVITY_PATH}') oa
+              ON oa.repo = aged.repo AND oa.number = aged.number
+        """
+        idle_cols = f"""
+            SUM(CASE
+                WHEN state='OPEN' AND oa.updated_at IS NOT NULL
+                  AND EXTRACT(EPOCH FROM (oa.fetch_now - oa.updated_at))
+                      /86400.0 > {UNIVERSAL_THRESHOLD_DAYS}
+                THEN 1 ELSE 0
+            END) AS open_stale_idle_universal,
+            SUM(CASE
+                WHEN state='OPEN' AND oa.updated_at IS NOT NULL
+                  AND EXTRACT(EPOCH FROM (oa.fetch_now - oa.updated_at))
+                      /86400.0 > COALESCE(stale_threshold_days, 30)
+                THEN 1 ELSE 0
+            END) AS open_stale_idle_per_repo,
+        """
+    query = f"""
+        WITH ref AS (SELECT MAX(created_at) AS now FROM prs),
+        aged AS (
+            SELECT p.*,
+                   EXTRACT(EPOCH FROM (ref.now - p.created_at))/86400.0 AS age_days
+            FROM prs p, ref
+        )
+        SELECT
+            author AS login,
+            COUNT(*) AS total_prs,
+            SUM(CASE WHEN state='MERGED' THEN 1 ELSE 0 END) AS merged,
+            SUM(CASE WHEN state='CLOSED' THEN 1 ELSE 0 END) AS closed,
+            SUM(CASE WHEN state='OPEN' THEN 1 ELSE 0 END) AS open_total,
+            SUM(CASE WHEN state='OPEN'
+                     AND age_days > COALESCE(stale_threshold_days, 30)
+                     THEN 1 ELSE 0 END) AS open_stale_per_repo,
+            SUM(CASE WHEN state='OPEN' AND age_days > {UNIVERSAL_THRESHOLD_DAYS}
+                     THEN 1 ELSE 0 END) AS open_stale_universal,
+            {idle_cols}
+            {sensitivity_cols},
+            MEDIAN(additions) AS median_additions
+        FROM aged
+        {activity_join}
+        GROUP BY author
+    """
+    df = con.execute(query).fetchdf()
+
+    def compute(col: str, stale_col: str) -> None:
+        denom = df["merged"] + df["closed"] + df[stale_col]
+        df[col] = np.where(denom > 0, df["merged"] / denom, 0.0)
+
+    df["merge_rate_v3"] = np.where(
+        (df["merged"] + df["closed"]) > 0,
+        df["merged"] / (df["merged"] + df["closed"]),
+        0.0,
+    )
+    compute("merge_rate_universal", "open_stale_universal")
+    compute("merge_rate_per_repo", "open_stale_per_repo")
+    compute("merge_rate_idle_universal", "open_stale_idle_universal")
+    compute("merge_rate_idle_per_repo", "open_stale_idle_per_repo")
+    for d in SENSITIVITY_THRESHOLDS:
+        compute(f"merge_rate_universal_{d}d", f"open_stale_{d}d")
+    return df
+
+
+def distribution_summary(df: pd.DataFrame) -> dict[str, Any]:
+    """Phase 2 deliverable: summary stats for each merge-rate definition."""
+
+    def summarize(col: str) -> dict[str, float]:
+        s = df[col]
+        return {
+            "mean": float(s.mean()),
+            "median": float(s.median()),
+            "p10": float(s.quantile(0.10)),
+            "p25": float(s.quantile(0.25)),
+            "p75": float(s.quantile(0.75)),
+            "p90": float(s.quantile(0.90)),
+        }
+
+    cols = [
+        *MERGE_RATE_COLS,
+        *[f"merge_rate_universal_{d}d" for d in SENSITIVITY_THRESHOLDS],
+    ]
+    return {col: summarize(col) for col in cols}
+
+
+def shift_analysis(df: pd.DataFrame) -> dict[str, Any]:
+    """Phase 3: per-author shift from v3 baseline to each alternative."""
+    out: dict[str, Any] = {"n_authors": int(len(df))}
+    for alt in [c for c in MERGE_RATE_COLS if c != "merge_rate_v3"]:
+        delta = df[alt] - df["merge_rate_v3"]
+        out[alt] = {
+            "mean_delta": float(delta.mean()),
+            "median_delta": float(delta.median()),
+            "n_dropped_gt_0.05": int((delta < -0.05).sum()),
+            "n_dropped_gt_0.10": int((delta < -0.10).sum()),
+            "n_dropped_gt_0.25": int((delta < -0.25).sum()),
+            "n_unchanged": int((delta == 0).sum()),
+        }
+    return out
+
+
+def cohens_d(group_a: np.ndarray, group_b: np.ndarray) -> float:
+    if len(group_a) < 2 or len(group_b) < 2:
+        return float("nan")
+    pooled = np.sqrt(
+        ((len(group_a) - 1) * group_a.var(ddof=1)
+         + (len(group_b) - 1) * group_b.var(ddof=1))
+        / (len(group_a) + len(group_b) - 2)
+    )
+    if pooled == 0:
+        return float("nan")
+    return float((group_a.mean() - group_b.mean()) / pooled)
+
+
+def cv_auc(
+    df: pd.DataFrame,
+    merge_rate_col: str,
+    n_folds: int = 5,
+) -> dict[str, float]:
+    """Phase 4: minimal 2-feature LR CV — merge_rate variant + median_additions.
+
+    Mirrors the 2-feature baseline in scripts/refit_bad_egg.py but swaps the
+    merge-rate column so all three definitions are evaluated on identical
+    labeled-author splits.
+    """
+    y = (df["account_status"] == "suspended").astype(int).values
+    mr = df[merge_rate_col].fillna(0).to_numpy(dtype=float)
+    ma = df["median_additions"].fillna(0).to_numpy(dtype=float)
+    ma = np.log1p(np.abs(ma)) * np.sign(ma)
+    x = np.column_stack([mr, ma])
+
+    skf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=SEED)
+    oof = np.full(len(y), np.nan)
+    fold_aucs: list[float] = []
+    for train_idx, test_idx in skf.split(x, y):
+        scaler = StandardScaler()
+        x_train = scaler.fit_transform(x[train_idx])
+        x_test = scaler.transform(x[test_idx])
+        model = LogisticRegression(
+            class_weight="balanced", max_iter=1000, random_state=SEED,
+        )
+        model.fit(x_train, y[train_idx])
+        probs = model.predict_proba(x_test)[:, 1]
+        oof[test_idx] = probs
+        fold_aucs.append(roc_auc_score(y[test_idx], probs))
+
+    mr_susp = mr[y == 1]
+    mr_act = mr[y == 0]
+    return {
+        "cv_auc": float(roc_auc_score(y, oof)),
+        "fold_auc_std": float(np.std(fold_aucs)),
+        "mean_merge_rate_suspended": float(mr_susp.mean()),
+        "mean_merge_rate_active": float(mr_act.mean()),
+        "cohens_d_active_vs_suspended": cohens_d(mr_act, mr_susp),
+        "n_suspended": int(y.sum()),
+        "n_active": int((1 - y).sum()),
+    }
+
+
+def signal_evaluation(
+    df: pd.DataFrame, con: duckdb.DuckDBPyConnection,
+) -> dict[str, Any]:
+    """Phase 4 deliverable: run CV for each merge-rate definition."""
+    authors = con.execute(
+        "SELECT login, account_status FROM authors"
+    ).fetchdf()
+    labeled = df.merge(authors, on="login", how="inner")
+    labeled = labeled[labeled["account_status"].isin(["active", "suspended"])]
+    labeled = labeled.copy()
+
+    results: dict[str, Any] = {
+        "n_labeled": int(len(labeled)),
+        "n_suspended": int((labeled["account_status"] == "suspended").sum()),
+        "n_active": int((labeled["account_status"] == "active").sum()),
+    }
+    for col in MERGE_RATE_COLS:
+        results[col] = cv_auc(labeled, col)
+    return results
+
+
+def pick_example_authors(
+    df: pd.DataFrame, con: duckdb.DuckDBPyConnection,
+) -> list[dict[str, Any]]:
+    """Phase 3 deliverable: handful of authors where definitions disagree."""
+    authors = con.execute(
+        "SELECT login, account_status FROM authors"
+    ).fetchdf()
+    merged = df.merge(authors, on="login", how="left")
+    merged["shift_universal"] = (
+        merged["merge_rate_universal"] - merged["merge_rate_v3"]
+    )
+    # 3 big-shift authors (most affected) + 3 suspended authors + 2 active
+    # high-PR authors.
+    big_shift = merged.nsmallest(3, "shift_universal")
+    susp = merged[merged["account_status"] == "suspended"].nlargest(
+        3, "total_prs",
+    )
+    act = merged[merged["account_status"] == "active"].nlargest(
+        2, "total_prs",
+    )
+    picks = pd.concat([big_shift, susp, act]).drop_duplicates(subset=["login"])
+    cols = [
+        "login", "account_status", "total_prs", "merged", "closed",
+        "open_total", "open_stale_per_repo", "open_stale_universal",
+        "open_stale_idle_universal", "open_stale_idle_per_repo",
+        *MERGE_RATE_COLS,
+    ]
+    return picks[cols].to_dict("records")
+
+
+def write_findings(results: dict[str, Any]) -> None:
+    c = results["characterization"]
+    d = results["distributions"]
+    s = results["shift_analysis"]
+    e = results["signal_evaluation"]
+
+    def fmt(x: float) -> str:
+        return f"{x:.4f}"
+
+    lines = [
+        "# Pocket Veto Investigation — Findings",
+        "",
+        "Investigation for issue #51. Does counting stale open PRs as implicit",
+        "rejections meaningfully change merge-rate distributions and improve the",
+        "signal's ability to separate suspended from active accounts?",
+        "",
+        "## Dataset",
+        "",
+        f"- {sum(c['state_totals'].values())} PRs across "
+        f"{c['repos_total']} repos",
+        f"- State totals: {c['state_totals']}",
+        f"- Outcome totals: {c['outcome_totals']}",
+        f"- Labeled authors: {e['n_labeled']} "
+        f"({e['n_suspended']} suspended, {e['n_active']} active)",
+        "",
+        "## Staleness definitions compared",
+        "",
+        "- **v3 (baseline)**: `merged / (merged + closed)` — current scorer.py.",
+        f"- **age_universal**: open PR is stale if age > "
+        f"{UNIVERSAL_THRESHOLD_DAYS}d since `created_at`.",
+        "- **age_per_repo**: open PR is stale if age > that repo's",
+        "  `stale_threshold_days` (populated in the DuckDB; default 30d).",
+        f"- **idle_universal**: open PR is stale if it is still open AND idle "
+        f"> {UNIVERSAL_THRESHOLD_DAYS}d (`fetch_now - updated_at`).",
+        "- **idle_per_repo**: same, with the per-repo threshold substituted.",
+        "",
+        "The `idle_*` variants use a live re-fetch of every DB-OPEN PR's",
+        "`updatedAt` (see `fetch_open_pr_activity.py`). PRs that were OPEN at",
+        "the snapshot but have since been closed or merged are treated as",
+        "non-stale — the close/merge event itself is activity.",
+        "",
+        "## Calibration sanity check",
+        "",
+        f"- Repos using the default 30d threshold: {c['repos_using_default_30d']}"
+        f" / {c['repos_total']}",
+        f"- Repos with a calibrated threshold: {c['repos_calibrated']}",
+        "- Per-repo calibrated thresholds vs 2x median time-to-close:",
+        f"  mean delta = "
+        f"{fmt(c['per_repo_calibration_check']['mean_delta_vs_2x_median_ttc'])}"
+        f", median delta = "
+        f"{fmt(c['per_repo_calibration_check']['median_delta_vs_2x_median_ttc'])}"
+        " (days).",
+        "",
+        "## Distribution shift",
+        "",
+        "Mean merge rate across all authors:",
+        "",
+        "| Definition | mean | median | p10 | p90 |",
+        "|---|---|---|---|---|",
+        *[
+            f"| {label} | "
+            f"{fmt(d[col]['mean'])} | "
+            f"{fmt(d[col]['median'])} | "
+            f"{fmt(d[col]['p10'])} | "
+            f"{fmt(d[col]['p90'])} |"
+            for label, col in [
+                ("v3 baseline", "merge_rate_v3"),
+                (f"age_universal ({UNIVERSAL_THRESHOLD_DAYS}d)",
+                 "merge_rate_universal"),
+                ("age_per_repo", "merge_rate_per_repo"),
+                (f"idle_universal ({UNIVERSAL_THRESHOLD_DAYS}d)",
+                 "merge_rate_idle_universal"),
+                ("idle_per_repo", "merge_rate_idle_per_repo"),
+            ]
+        ],
+        "",
+        "Per-author drop from the v3 baseline (n authors, >0.10 / >0.25):",
+        "",
+        *[
+            f"- **{label}**: "
+            f"{s[col]['n_dropped_gt_0.10']} / {s[col]['n_dropped_gt_0.25']}"
+            for label, col in [
+                ("age_universal", "merge_rate_universal"),
+                ("age_per_repo", "merge_rate_per_repo"),
+                ("idle_universal", "merge_rate_idle_universal"),
+                ("idle_per_repo", "merge_rate_idle_per_repo"),
+            ]
+        ],
+        "",
+        "## Signal quality vs ground truth",
+        "",
+        "2-feature logistic regression (merge_rate + log1p(median_additions)),",
+        f"5-fold CV on {e['n_labeled']} labeled authors:",
+        "",
+        "| Definition | CV AUC | Active mean | Suspended mean | Cohen's d |",
+        "|---|---|---|---|---|",
+        *[
+            f"| {label} | "
+            f"{fmt(e[col]['cv_auc'])} | "
+            f"{fmt(e[col]['mean_merge_rate_active'])} | "
+            f"{fmt(e[col]['mean_merge_rate_suspended'])} | "
+            f"{fmt(e[col]['cohens_d_active_vs_suspended'])} |"
+            for label, col in [
+                ("v3 baseline", "merge_rate_v3"),
+                ("age_universal", "merge_rate_universal"),
+                ("age_per_repo", "merge_rate_per_repo"),
+                ("idle_universal", "merge_rate_idle_universal"),
+                ("idle_per_repo", "merge_rate_idle_per_repo"),
+            ]
+        ],
+        "",
+        "## Recommendation",
+        "",
+        "See the `recommendation` field in "
+        "`data/results/pocket_veto_analysis.json` for the machine-readable",
+        "decision logic. Text summary and follow-up branch sketch below.",
+        "",
+        f"**{results['recommendation']['decision']}** — "
+        f"{results['recommendation']['rationale']}",
+        "",
+        "### Follow-up branch sketch (if adopted)",
+        "",
+        "- `src/good_egg/github_client.py`: extend `_COMBINED_QUERY` with an",
+        "  `openPullRequests` selection that pulls `createdAt`/`updatedAt` for",
+        "  each OPEN PR on the scored user (or `totalCount` if we can push the",
+        "  staleness filter into the query).",
+        "- `src/good_egg/models.py`: add `open_stale_pr_count: int` (or similar)",
+        "  to `UserContributionData`.",
+        "- `src/good_egg/scorer.py:256-261`: change the `_score_v3` merge-rate",
+        "  formula to `merged / (merged + closed + open_stale)`.",
+        "- `src/good_egg/config.py`: add the staleness threshold as a tunable",
+        "  config value.",
+        "- Tests: parallel coverage in `tests/test_scorer.py`.",
+        "",
+    ]
+    FINDINGS_PATH.write_text("\n".join(lines))
+
+
+def decide(
+    e: dict[str, Any], s: dict[str, Any], d: dict[str, Any],
+) -> dict[str, Any]:
+    """Produce a simple quantitative recommendation."""
+    base_auc = e["merge_rate_v3"]["cv_auc"]
+    aucs = {col: e[col]["cv_auc"] for col in MERGE_RATE_COLS}
+
+    best_name = "merge_rate_v3"
+    for col, auc in aucs.items():
+        if col == "merge_rate_v3":
+            continue
+        if auc > aucs[best_name] + 0.005:
+            best_name = col
+
+    cohens = {
+        col: e[col]["cohens_d_active_vs_suspended"] for col in MERGE_RATE_COLS
+    }
+
+    if best_name == "merge_rate_v3":
+        decision = "Keep v3 as-is"
+        rationale = (
+            f"No variant beats v3 CV AUC {base_auc:.4f} by >0.005 "
+            f"(aucs={ {k: round(v, 4) for k, v in aucs.items()} }). "
+            f"Cohen's d also fails to improve "
+            f"(base={cohens['merge_rate_v3']:.3f}, "
+            f"best_alt={max(v for k, v in cohens.items() if k != 'merge_rate_v3'):.3f})."
+        )
+    else:
+        affected = s[best_name]["n_dropped_gt_0.10"]
+        decision = f"Adopt {best_name}"
+        rationale = (
+            f"{best_name} CV AUC {aucs[best_name]:.4f} beats v3 baseline "
+            f"{base_auc:.4f} by >0.005. Cohen's d "
+            f"{cohens[best_name]:.3f} vs v3 {cohens['merge_rate_v3']:.3f}. "
+            f"{affected} authors shift by >0.10 in merge rate."
+        )
+    return {
+        "decision": decision,
+        "rationale": rationale,
+        "cv_aucs": aucs,
+        "cohens_d": cohens,
+        "universal_threshold_days": UNIVERSAL_THRESHOLD_DAYS,
+    }
+
+
+def main() -> None:
+    print(f"Loading {DB_PATH}")
+    con = duckdb.connect(str(DB_PATH), read_only=True)
+
+    print("Phase 1: characterization")
+    characterization = characterize(con)
+    print(f"  state totals: {characterization['state_totals']}")
+    print(f"  outcome totals: {characterization['outcome_totals']}")
+    print(
+        f"  repos calibrated: {characterization['repos_calibrated']} / "
+        f"{characterization['repos_total']}"
+    )
+
+    print("Phase 2: per-author features + merge-rate variants")
+    if ACTIVITY_PATH.exists():
+        print(f"  using idle-time sidecar: {ACTIVITY_PATH.name}")
+    else:
+        print("  (no idle-time sidecar; idle_* variants will be all-zero)")
+    df = build_author_features(con)
+    print(f"  {len(df)} authors")
+    distributions = distribution_summary(df)
+    for col in MERGE_RATE_COLS:
+        v = distributions[col]
+        print(f"  {col}: mean={v['mean']:.4f} median={v['median']:.4f}")
+
+    print("Phase 3: shift analysis")
+    shifts = shift_analysis(df)
+    for alt in [c for c in MERGE_RATE_COLS if c != "merge_rate_v3"]:
+        print(
+            f"  {alt}: mean_delta={shifts[alt]['mean_delta']:+.4f} "
+            f"n_dropped>0.10={shifts[alt]['n_dropped_gt_0.10']}"
+        )
+
+    print("Phase 4: signal evaluation")
+    signal = signal_evaluation(df, con)
+    for col in MERGE_RATE_COLS:
+        print(
+            f"  {col}: CV AUC={signal[col]['cv_auc']:.4f} "
+            f"cohens_d={signal[col]['cohens_d_active_vs_suspended']:.4f}"
+        )
+
+    examples = pick_example_authors(df, con)
+    print("Phase 3: example authors (most-shifted + labeled high-volume)")
+    for row in examples:
+        print(
+            f"  {row['login']:20s} [{row['account_status']}] "
+            f"total={row['total_prs']} v3={row['merge_rate_v3']:.3f} "
+            f"uni={row['merge_rate_universal']:.3f} "
+            f"per_repo={row['merge_rate_per_repo']:.3f}"
+        )
+
+    recommendation = decide(signal, shifts, distributions)
+    print(f"\nRecommendation: {recommendation['decision']}")
+    print(f"  {recommendation['rationale']}")
+
+    results = {
+        "universal_threshold_days": UNIVERSAL_THRESHOLD_DAYS,
+        "characterization": characterization,
+        "distributions": distributions,
+        "shift_analysis": shifts,
+        "signal_evaluation": signal,
+        "example_authors": examples,
+        "recommendation": recommendation,
+    }
+    RESULTS_PATH.parent.mkdir(parents=True, exist_ok=True)
+    RESULTS_PATH.write_text(json.dumps(results, indent=2, default=str))
+    print(f"\nWrote {RESULTS_PATH}")
+
+    write_findings(results)
+    print(f"Wrote {FINDINGS_PATH}")
+
+    con.close()
+
+
+if __name__ == "__main__":
+    main()