From 58b8e7f57ca2b69e6e07ef19f6300265f7b5d7d5 Mon Sep 17 00:00:00 2001 From: Ben Vinegar Date: Sun, 19 Apr 2026 09:33:21 -0400 Subject: [PATCH 1/4] Add per-rule signal benchmark --- benchmarks/README.md | 23 + benchmarks/results/rule-signal-mini.json | 3414 +++++++++++++++++ benchmarks/sets/rule-signal-mini.json | 135 + package.json | 1 + reports/rule-signal-mini.md | 459 +++ scripts/benchmark-fetch.ts | 58 +- scripts/benchmark-rule-signals.ts | 153 + scripts/benchmark-scan.ts | 15 +- src/benchmarks/checkouts.ts | 90 + src/benchmarks/rule-signal-readme.ts | 83 + src/benchmarks/rule-signal-report.ts | 140 + src/benchmarks/rule-signal.ts | 389 ++ src/rules/async-noise/README.md | 10 + src/rules/barrel-density/README.md | 10 + src/rules/directory-fanout-hotspot/README.md | 10 + .../duplicate-function-signatures/README.md | 10 + src/rules/duplicate-mock-setup/README.md | 10 + src/rules/empty-catch/README.md | 10 + src/rules/error-obscuring/README.md | 10 + src/rules/error-swallowing/README.md | 10 + src/rules/over-fragmentation/README.md | 10 + src/rules/pass-through-wrappers/README.md | 10 + src/rules/placeholder-comments/README.md | 10 + tests/rule-signal-benchmark.test.ts | 206 + 24 files changed, 5206 insertions(+), 70 deletions(-) create mode 100644 benchmarks/results/rule-signal-mini.json create mode 100644 benchmarks/sets/rule-signal-mini.json create mode 100644 reports/rule-signal-mini.md create mode 100644 scripts/benchmark-rule-signals.ts create mode 100644 src/benchmarks/checkouts.ts create mode 100644 src/benchmarks/rule-signal-readme.ts create mode 100644 src/benchmarks/rule-signal-report.ts create mode 100644 src/benchmarks/rule-signal.ts create mode 100644 tests/rule-signal-benchmark.test.ts diff --git a/benchmarks/README.md b/benchmarks/README.md index 9d9392d..425b460 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -79,6 +79,22 @@ If a repo did not exist yet for an older backfill date, that weekly point is ski The rolling history is intentionally separate from the pinned benchmark snapshot so reproducible benchmark claims still point at exact SHAs. +## Per-rule signal benchmark + +A separate pinned mini cohort runs each built-in rule **in isolation** so we can compare which rules separate the explicit-AI and mature-OSS cohorts most cleanly. + +Refresh it locally with: + +```bash +bun run benchmark:rules +``` + +That writes: + +- an aggregate JSON summary at `benchmarks/results/rule-signal-mini.json` +- a markdown leaderboard/report at `reports/rule-signal-mini.md` +- benchmark summary sections into each `src/rules/*/README.md` + ## Artifacts For the current pinned set: @@ -93,6 +109,13 @@ For rolling history: - latest summary: `benchmarks/history/known-ai-vs-solid-oss/latest.json` - generated history report: `reports/known-ai-vs-solid-oss-history.md` +For per-rule signal benchmarking: + +- manifest: `benchmarks/sets/rule-signal-mini.json` +- summary: `benchmarks/results/rule-signal-mini.json` +- generated report: `reports/rule-signal-mini.md` +- per-rule docs: `src/rules/*/README.md` + ## Notes - Checkouts are stored under `benchmarks/.cache/` and are gitignored. diff --git a/benchmarks/results/rule-signal-mini.json b/benchmarks/results/rule-signal-mini.json new file mode 100644 index 0000000..e0bcb1e --- /dev/null +++ b/benchmarks/results/rule-signal-mini.json @@ -0,0 +1,3414 @@ +{ + "schemaVersion": 1, + "benchmarkSetId": "rule-signal-mini", + "benchmarkSetName": "Per-rule signal mini benchmark", + "generatedAt": "2026-04-19T02:29:58.464Z", + "analyzerVersion": "0.3.0", + "artifacts": { + "manifestPath": "benchmarks/sets/rule-signal-mini.json", + "summaryPath": "benchmarks/results/rule-signal-mini.json", + "reportPath": "reports/rule-signal-mini.md" + }, + "rules": [ + { + "ruleId": "defensive.error-swallowing", + "ruleSlug": "error-swallowing", + "family": "defensive", + "severity": "strong", + "scope": "file", + "requires": [ + "file.tryCatchSummaries" + ], + "signalScore": 0.7166666666666668, + "bestMetric": "findingsPerFile", + "bestMetricAuc": 0.8666666666666667, + "metricAucs": { + "scorePerFile": 0.8666666666666667, + "scorePerKloc": 0.8, + "scorePerFunction": 0.5, + "findingsPerFile": 0.8666666666666667, + "findingsPerKloc": 0.7666666666666667, + "findingsPerFunction": 0.5 + }, + "cohorts": { + "explicit-ai": { + "repoCount": 6, + "hitCount": 6, + "hitRate": 1, + "repoScoreMedian": 9.1, + "findingCountMedian": 3, + "medians": { + "scorePerFile": 0.0695086484004286, + "scorePerKloc": 0.5267386471458326, + "scorePerFunction": null, + "findingsPerFile": 0.026748813714985456, + "findingsPerKloc": 0.24436722316785256, + "findingsPerFunction": null + } + }, + "mature-oss": { + "repoCount": 5, + "hitCount": 3, + "hitRate": 0.6, + "repoScoreMedian": 13.8, + "findingCountMedian": 6, + "medians": { + "scorePerFile": 0.007080554130323243, + "scorePerKloc": 0.17047981420170977, + "scorePerFunction": null, + "findingsPerFile": 0.0035915854284248334, + "findingsPerKloc": 0.08647526807333103, + "findingsPerFunction": null + } + } + }, + "repos": [ + { + "id": "devworkbench", + "repo": "jiayun/DevWorkbench", + "cohort": "explicit-ai", + "ref": "ea50862107c377b0cdd6e508073ad0330b367576", + "summary": { + "fileCount": 32, + "directoryCount": 7, + "findingCount": 10, + "repoScore": 17.4, + "physicalLineCount": 9408, + "logicalLineCount": 2986, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.54375, + "scorePerKloc": 5.827193569993301, + "scorePerFunction": null, + "findingsPerFile": 0.3125, + "findingsPerKloc": 3.348961821835231, + "findingsPerFunction": null + } + } + }, + { + "id": "gstack", + "repo": "garrytan/gstack", + "cohort": "explicit-ai", + "ref": "6cc094cd4190e3fac65b69fb29e0c3cae7e0f214", + "summary": { + "fileCount": 176, + "directoryCount": 19, + "findingCount": 8, + "repoScore": 37.400000000000006, + "physicalLineCount": 62616, + "logicalLineCount": 18958, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.21250000000000002, + "scorePerKloc": 1.972781939023104, + "scorePerFunction": null, + "findingsPerFile": 0.045454545454545456, + "findingsPerKloc": 0.4219854415022682, + "findingsPerFunction": null + } + } + }, + { + "id": "openusage", + "repo": "robinebers/openusage", + "cohort": "explicit-ai", + "ref": "857f537a243483acf98ccd9ea32e20b380c63823", + "summary": { + "fileCount": 139, + "directoryCount": 29, + "findingCount": 3, + "repoScore": 14, + "physicalLineCount": 33794, + "logicalLineCount": 22270, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.10071942446043165, + "scorePerKloc": 0.6286484059272565, + "scorePerFunction": null, + "findingsPerFile": 0.02158273381294964, + "findingsPerKloc": 0.1347103726986978, + "findingsPerFunction": null + } + } + }, + { + "id": "agent-ci", + "repo": "redwoodjs/agent-ci", + "cohort": "explicit-ai", + "ref": "4de00d69487e275d5bc37e30f7818a9303b22352", + "summary": { + "fileCount": 94, + "directoryCount": 24, + "findingCount": 3, + "repoScore": 3.6000000000000005, + "physicalLineCount": 17222, + "logicalLineCount": 8474, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.03829787234042554, + "scorePerKloc": 0.42482888836440885, + "scorePerFunction": null, + "findingsPerFile": 0.031914893617021274, + "findingsPerKloc": 0.35402407363700733, + "findingsPerFunction": null + } + } + }, + { + "id": "hunk", + "repo": "modem-dev/hunk", + "cohort": "explicit-ai", + "ref": "b37663f3c3055f860290587865199192e15d9f35", + "summary": { + "fileCount": 166, + "directoryCount": 36, + "findingCount": 1, + "repoScore": 3, + "physicalLineCount": 32498, + "logicalLineCount": 13564, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.018072289156626505, + "scorePerKloc": 0.22117369507519904, + "scorePerFunction": null, + "findingsPerFile": 0.006024096385542169, + "findingsPerKloc": 0.07372456502506636, + "findingsPerFunction": null + } + } + }, + { + "id": "vinext", + "repo": "cloudflare/vinext", + "cohort": "explicit-ai", + "ref": "28980b01f1692b47756a6768fc5733712c290f84", + "summary": { + "fileCount": 1129, + "directoryCount": 486, + "findingCount": 2, + "repoScore": 4.2, + "physicalLineCount": 150365, + "logicalLineCount": 59523, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.003720106288751107, + "scorePerKloc": 0.07056095962905096, + "scorePerFunction": null, + "findingsPerFile": 0.001771479185119575, + "findingsPerKloc": 0.03360045696621474, + "findingsPerFunction": null + } + } + }, + { + "id": "vite", + "repo": "vitejs/vite", + "cohort": "mature-oss", + "ref": "a4922537a8d705da7769d30626a0d846511fc124", + "summary": { + "fileCount": 1229, + "directoryCount": 525, + "findingCount": 6, + "repoScore": 19.2, + "physicalLineCount": 77629, + "logicalLineCount": 37251, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.015622457282343368, + "scorePerKloc": 0.5154224047676572, + "scorePerFunction": null, + "findingsPerFile": 0.004882017900732303, + "findingsPerKloc": 0.1610695014898929, + "findingsPerFunction": null + } + } + }, + { + "id": "payload", + "repo": "payloadcms/payload", + "cohort": "mature-oss", + "ref": "f3f36d801010f3c95ae74655ff22a09ea66ab1ac", + "summary": { + "fileCount": 4234, + "directoryCount": 1837, + "findingCount": 29, + "repoScore": 84.80000000000001, + "physicalLineCount": 384327, + "logicalLineCount": 251992, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.02002834199338687, + "scorePerKloc": 0.33651861963871876, + "scorePerFunction": null, + "findingsPerFile": 0.00684931506849315, + "findingsPerKloc": 0.11508301850852408, + "findingsPerFunction": null + } + } + }, + { + "id": "astro", + "repo": "withastro/astro", + "cohort": "mature-oss", + "ref": "f7068995aa451dced13853789b0d51433c2373b5", + "summary": { + "fileCount": 1949, + "directoryCount": 852, + "findingCount": 7, + "repoScore": 13.8, + "physicalLineCount": 138854, + "logicalLineCount": 80948, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.007080554130323243, + "scorePerKloc": 0.17047981420170977, + "scorePerFunction": null, + "findingsPerFile": 0.0035915854284248334, + "findingsPerKloc": 0.08647526807333103, + "findingsPerFunction": null + } + } + }, + { + "id": "execa", + "repo": "sindresorhus/execa", + "cohort": "mature-oss", + "ref": "99d1741d2525eca71b986282148bbf2983356428", + "summary": { + "fileCount": 580, + "directoryCount": 46, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 35995, + "logicalLineCount": 20374, + "functionCount": 0, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": null, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": null + } + } + }, + { + "id": "umami", + "repo": "umami-software/umami", + "cohort": "mature-oss", + "ref": "227b2554b4a373e63ceb7f48decdc60c8d3e6eaf", + "summary": { + "fileCount": 512, + "directoryCount": 87, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 29677, + "logicalLineCount": 20508, + "functionCount": 0, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": null, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": null + } + } + } + ], + "rank": 1 + }, + { + "ruleId": "defensive.empty-catch", + "ruleSlug": "empty-catch", + "family": "defensive", + "severity": "strong", + "scope": "file", + "requires": [ + "file.tryCatchSummaries" + ], + "signalScore": 0.6666666666666666, + "bestMetric": "findingsPerFile", + "bestMetricAuc": 0.9333333333333333, + "metricAucs": { + "scorePerFile": 0.8666666666666667, + "scorePerKloc": 0.6333333333333333, + "scorePerFunction": 0.5, + "findingsPerFile": 0.9333333333333333, + "findingsPerKloc": 0.5666666666666667, + "findingsPerFunction": 0.5 + }, + "cohorts": { + "explicit-ai": { + "repoCount": 6, + "hitCount": 6, + "hitRate": 1, + "repoScoreMedian": 42.3, + "findingCountMedian": 10, + "medians": { + "scorePerFile": 0.11837132703475772, + "scorePerKloc": 1.2343535610214642, + "scorePerFunction": null, + "findingsPerFile": 0.030013489208633094, + "findingsPerKloc": 0.3148972211418942, + "findingsPerFunction": null + } + }, + "mature-oss": { + "repoCount": 5, + "hitCount": 5, + "hitRate": 1, + "repoScoreMedian": 45.1, + "findingCountMedian": 13, + "medians": { + "scorePerFile": 0.036696501220504475, + "scorePerKloc": 1.210705752865695, + "scorePerFunction": null, + "findingsPerFile": 0.010577705451586655, + "findingsPerKloc": 0.34898391989476796, + "findingsPerFunction": null + } + } + }, + "repos": [ + { + "id": "gstack", + "repo": "garrytan/gstack", + "cohort": "explicit-ai", + "ref": "6cc094cd4190e3fac65b69fb29e0c3cae7e0f214", + "summary": { + "fileCount": 176, + "directoryCount": 19, + "findingCount": 55, + "repoScore": 301.3, + "physicalLineCount": 62616, + "logicalLineCount": 18958, + "functionCount": 0, + "normalized": { + "scorePerFile": 1.7119318181818182, + "scorePerKloc": 15.893026690579177, + "scorePerFunction": null, + "findingsPerFile": 0.3125, + "findingsPerKloc": 2.901149910328094, + "findingsPerFunction": null + } + } + }, + { + "id": "agent-ci", + "repo": "redwoodjs/agent-ci", + "cohort": "explicit-ai", + "ref": "4de00d69487e275d5bc37e30f7818a9303b22352", + "summary": { + "fileCount": 94, + "directoryCount": 24, + "findingCount": 18, + "repoScore": 69.3, + "physicalLineCount": 17222, + "logicalLineCount": 8474, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.7372340425531915, + "scorePerKloc": 8.17795610101487, + "scorePerFunction": null, + "findingsPerFile": 0.19148936170212766, + "findingsPerKloc": 2.1241444418220436, + "findingsPerFunction": null + } + } + }, + { + "id": "hunk", + "repo": "modem-dev/hunk", + "cohort": "explicit-ai", + "ref": "b37663f3c3055f860290587865199192e15d9f35", + "summary": { + "fileCount": 166, + "directoryCount": 36, + "findingCount": 4, + "repoScore": 18.4, + "physicalLineCount": 32498, + "logicalLineCount": 13564, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.1108433734939759, + "scorePerKloc": 1.3565319964612208, + "scorePerFunction": null, + "findingsPerFile": 0.024096385542168676, + "findingsPerKloc": 0.29489826010026543, + "findingsPerFunction": null + } + } + }, + { + "id": "vinext", + "repo": "cloudflare/vinext", + "cohort": "explicit-ai", + "ref": "28980b01f1692b47756a6768fc5733712c290f84", + "summary": { + "fileCount": 1129, + "directoryCount": 486, + "findingCount": 16, + "repoScore": 66.19999999999999, + "physicalLineCount": 150365, + "logicalLineCount": 59523, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.05863596102745792, + "scorePerKloc": 1.1121751255817076, + "scorePerFunction": null, + "findingsPerFile": 0.0141718334809566, + "findingsPerKloc": 0.2688036557297179, + "findingsPerFunction": null + } + } + }, + { + "id": "openusage", + "repo": "robinebers/openusage", + "cohort": "explicit-ai", + "ref": "857f537a243483acf98ccd9ea32e20b380c63823", + "summary": { + "fileCount": 139, + "directoryCount": 29, + "findingCount": 4, + "repoScore": 17.5, + "physicalLineCount": 33794, + "logicalLineCount": 22270, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.12589928057553956, + "scorePerKloc": 0.7858105074090705, + "scorePerFunction": null, + "findingsPerFile": 0.02877697841726619, + "findingsPerKloc": 0.1796138302649304, + "findingsPerFunction": null + } + } + }, + { + "id": "devworkbench", + "repo": "jiayun/DevWorkbench", + "cohort": "explicit-ai", + "ref": "ea50862107c377b0cdd6e508073ad0330b367576", + "summary": { + "fileCount": 32, + "directoryCount": 7, + "findingCount": 1, + "repoScore": 1.9000000000000001, + "physicalLineCount": 9408, + "logicalLineCount": 2986, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.059375000000000004, + "scorePerKloc": 0.6363027461486939, + "scorePerFunction": null, + "findingsPerFile": 0.03125, + "findingsPerKloc": 0.3348961821835231, + "findingsPerFunction": null + } + } + }, + { + "id": "execa", + "repo": "sindresorhus/execa", + "cohort": "mature-oss", + "ref": "99d1741d2525eca71b986282148bbf2983356428", + "summary": { + "fileCount": 580, + "directoryCount": 46, + "findingCount": 11, + "repoScore": 43.9, + "physicalLineCount": 35995, + "logicalLineCount": 20374, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.07568965517241379, + "scorePerKloc": 2.1547069794836555, + "scorePerFunction": null, + "findingsPerFile": 0.01896551724137931, + "findingsPerKloc": 0.5399037989594582, + "findingsPerFunction": null + } + } + }, + { + "id": "astro", + "repo": "withastro/astro", + "cohort": "mature-oss", + "ref": "f7068995aa451dced13853789b0d51433c2373b5", + "summary": { + "fileCount": 1949, + "directoryCount": 852, + "findingCount": 39, + "repoScore": 133.90000000000006, + "physicalLineCount": 138854, + "logicalLineCount": 80948, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.06870189840944077, + "scorePerKloc": 1.6541483421455758, + "scorePerFunction": null, + "findingsPerFile": 0.020010261672652643, + "findingsPerKloc": 0.48179077926570146, + "findingsPerFunction": null + } + } + }, + { + "id": "vite", + "repo": "vitejs/vite", + "cohort": "mature-oss", + "ref": "a4922537a8d705da7769d30626a0d846511fc124", + "summary": { + "fileCount": 1229, + "directoryCount": 525, + "findingCount": 13, + "repoScore": 45.1, + "physicalLineCount": 77629, + "logicalLineCount": 37251, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.036696501220504475, + "scorePerKloc": 1.210705752865695, + "scorePerFunction": null, + "findingsPerFile": 0.010577705451586655, + "findingsPerKloc": 0.34898391989476796, + "findingsPerFunction": null + } + } + }, + { + "id": "umami", + "repo": "umami-software/umami", + "cohort": "mature-oss", + "ref": "227b2554b4a373e63ceb7f48decdc60c8d3e6eaf", + "summary": { + "fileCount": 512, + "directoryCount": 87, + "findingCount": 4, + "repoScore": 10.8, + "physicalLineCount": 29677, + "logicalLineCount": 20508, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.02109375, + "scorePerKloc": 0.526623756582797, + "scorePerFunction": null, + "findingsPerFile": 0.0078125, + "findingsPerKloc": 0.1950458357714063, + "findingsPerFunction": null + } + } + }, + { + "id": "payload", + "repo": "payloadcms/payload", + "cohort": "mature-oss", + "ref": "f3f36d801010f3c95ae74655ff22a09ea66ab1ac", + "summary": { + "fileCount": 4234, + "directoryCount": 1837, + "findingCount": 21, + "repoScore": 71.2, + "physicalLineCount": 384327, + "logicalLineCount": 251992, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.016816249409541805, + "scorePerKloc": 0.2825486523381695, + "scorePerFunction": null, + "findingsPerFile": 0.004959848842701937, + "findingsPerKloc": 0.08333597891996572, + "findingsPerFunction": null + } + } + } + ], + "rank": 2 + }, + { + "ruleId": "structure.pass-through-wrappers", + "ruleSlug": "pass-through-wrappers", + "family": "structure", + "severity": "strong", + "scope": "file", + "requires": [ + "file.functionSummaries", + "file.comments" + ], + "signalScore": 0.6666666666666666, + "bestMetric": "findingsPerFile", + "bestMetricAuc": 0.85, + "metricAucs": { + "scorePerFile": 0.85, + "scorePerKloc": 0.65, + "scorePerFunction": 0.5, + "findingsPerFile": 0.85, + "findingsPerKloc": 0.65, + "findingsPerFunction": 0.5 + }, + "cohorts": { + "explicit-ai": { + "repoCount": 6, + "hitCount": 5, + "hitRate": 0.8333333333333334, + "repoScoreMedian": 13, + "findingCountMedian": 5.5, + "medians": { + "scorePerFile": 0.08309847813833643, + "scorePerKloc": 1.1223688092403985, + "scorePerFunction": null, + "findingsPerFile": 0.033610611510791366, + "findingsPerKloc": 0.3520667217490039, + "findingsPerFunction": null + } + }, + "mature-oss": { + "repoCount": 5, + "hitCount": 4, + "hitRate": 0.8, + "repoScoreMedian": 41, + "findingCountMedian": 13, + "medians": { + "scorePerFile": 0.015625, + "scorePerKloc": 0.3900916715428126, + "scorePerFunction": null, + "findingsPerFile": 0.005859375, + "findingsPerKloc": 0.14628437682855472, + "findingsPerFunction": null + } + } + }, + "repos": [ + { + "id": "devworkbench", + "repo": "jiayun/DevWorkbench", + "cohort": "explicit-ai", + "ref": "ea50862107c377b0cdd6e508073ad0330b367576", + "summary": { + "fileCount": 32, + "directoryCount": 7, + "findingCount": 1, + "repoScore": 5, + "physicalLineCount": 9408, + "logicalLineCount": 2986, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.15625, + "scorePerKloc": 1.6744809109176155, + "scorePerFunction": null, + "findingsPerFile": 0.03125, + "findingsPerKloc": 0.3348961821835231, + "findingsPerFunction": null + } + } + }, + { + "id": "vinext", + "repo": "cloudflare/vinext", + "cohort": "explicit-ai", + "ref": "28980b01f1692b47756a6768fc5733712c290f84", + "summary": { + "fileCount": 1129, + "directoryCount": 486, + "findingCount": 29, + "repoScore": 85, + "physicalLineCount": 150365, + "logicalLineCount": 59523, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.07528786536758193, + "scorePerKloc": 1.4280194210641264, + "scorePerFunction": null, + "findingsPerFile": 0.025686448184233834, + "findingsPerKloc": 0.4872066260101137, + "findingsPerFunction": null + } + } + }, + { + "id": "hunk", + "repo": "modem-dev/hunk", + "cohort": "explicit-ai", + "ref": "b37663f3c3055f860290587865199192e15d9f35", + "summary": { + "fileCount": 166, + "directoryCount": 36, + "findingCount": 6, + "repoScore": 19, + "physicalLineCount": 32498, + "logicalLineCount": 13564, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.1144578313253012, + "scorePerKloc": 1.4007667354762607, + "scorePerFunction": null, + "findingsPerFile": 0.03614457831325301, + "findingsPerKloc": 0.4423473901503981, + "findingsPerFunction": null + } + } + }, + { + "id": "gstack", + "repo": "garrytan/gstack", + "cohort": "explicit-ai", + "ref": "6cc094cd4190e3fac65b69fb29e0c3cae7e0f214", + "summary": { + "fileCount": 176, + "directoryCount": 19, + "findingCount": 7, + "repoScore": 16, + "physicalLineCount": 62616, + "logicalLineCount": 18958, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.09090909090909091, + "scorePerKloc": 0.8439708830045364, + "scorePerFunction": null, + "findingsPerFile": 0.03977272727272727, + "findingsPerKloc": 0.3692372613144847, + "findingsPerFunction": null + } + } + }, + { + "id": "openusage", + "repo": "robinebers/openusage", + "cohort": "explicit-ai", + "ref": "857f537a243483acf98ccd9ea32e20b380c63823", + "summary": { + "fileCount": 139, + "directoryCount": 29, + "findingCount": 5, + "repoScore": 10, + "physicalLineCount": 33794, + "logicalLineCount": 22270, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.07194244604316546, + "scorePerKloc": 0.449034575662326, + "scorePerFunction": null, + "findingsPerFile": 0.03597122302158273, + "findingsPerKloc": 0.224517287831163, + "findingsPerFunction": null + } + } + }, + { + "id": "agent-ci", + "repo": "redwoodjs/agent-ci", + "cohort": "explicit-ai", + "ref": "4de00d69487e275d5bc37e30f7818a9303b22352", + "summary": { + "fileCount": 94, + "directoryCount": 24, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 17222, + "logicalLineCount": 8474, + "functionCount": 0, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": null, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": null + } + } + }, + { + "id": "vite", + "repo": "vitejs/vite", + "cohort": "mature-oss", + "ref": "a4922537a8d705da7769d30626a0d846511fc124", + "summary": { + "fileCount": 1229, + "directoryCount": 525, + "findingCount": 25, + "repoScore": 65, + "physicalLineCount": 77629, + "logicalLineCount": 37251, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.05288852725793328, + "scorePerKloc": 1.7449195994738398, + "scorePerFunction": null, + "findingsPerFile": 0.02034174125305126, + "findingsPerKloc": 0.6711229228745538, + "findingsPerFunction": null + } + } + }, + { + "id": "astro", + "repo": "withastro/astro", + "cohort": "mature-oss", + "ref": "f7068995aa451dced13853789b0d51433c2373b5", + "summary": { + "fileCount": 1949, + "directoryCount": 852, + "findingCount": 24, + "repoScore": 62, + "physicalLineCount": 138854, + "logicalLineCount": 80948, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.03181118522319138, + "scorePerKloc": 0.7659238029352178, + "scorePerFunction": null, + "findingsPerFile": 0.012314007183170857, + "findingsPerKloc": 0.29648663339427783, + "findingsPerFunction": null + } + } + }, + { + "id": "umami", + "repo": "umami-software/umami", + "cohort": "mature-oss", + "ref": "227b2554b4a373e63ceb7f48decdc60c8d3e6eaf", + "summary": { + "fileCount": 512, + "directoryCount": 87, + "findingCount": 3, + "repoScore": 8, + "physicalLineCount": 29677, + "logicalLineCount": 20508, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.015625, + "scorePerKloc": 0.3900916715428126, + "scorePerFunction": null, + "findingsPerFile": 0.005859375, + "findingsPerKloc": 0.14628437682855472, + "findingsPerFunction": null + } + } + }, + { + "id": "payload", + "repo": "payloadcms/payload", + "cohort": "mature-oss", + "ref": "f3f36d801010f3c95ae74655ff22a09ea66ab1ac", + "summary": { + "fileCount": 4234, + "directoryCount": 1837, + "findingCount": 13, + "repoScore": 41, + "physicalLineCount": 384327, + "logicalLineCount": 251992, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.009683514407179971, + "scorePerKloc": 0.16270357789136164, + "scorePerFunction": null, + "findingsPerFile": 0.0030703826169107226, + "findingsPerKloc": 0.051588939331407346, + "findingsPerFunction": null + } + } + }, + { + "id": "execa", + "repo": "sindresorhus/execa", + "cohort": "mature-oss", + "ref": "99d1741d2525eca71b986282148bbf2983356428", + "summary": { + "fileCount": 580, + "directoryCount": 46, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 35995, + "logicalLineCount": 20374, + "functionCount": 0, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": null, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": null + } + } + } + ], + "rank": 3 + }, + { + "ruleId": "defensive.error-obscuring", + "ruleSlug": "error-obscuring", + "family": "defensive", + "severity": "strong", + "scope": "file", + "requires": [ + "file.tryCatchSummaries" + ], + "signalScore": 0.6555555555555556, + "bestMetric": "findingsPerFile", + "bestMetricAuc": 0.8333333333333334, + "metricAucs": { + "scorePerFile": 0.8, + "scorePerKloc": 0.6, + "scorePerFunction": 0.5, + "findingsPerFile": 0.8333333333333334, + "findingsPerKloc": 0.7, + "findingsPerFunction": 0.5 + }, + "cohorts": { + "explicit-ai": { + "repoCount": 6, + "hitCount": 5, + "hitRate": 0.8333333333333334, + "repoScoreMedian": 9.4, + "findingCountMedian": 4.5, + "medians": { + "scorePerFile": 0.06158622769161187, + "scorePerKloc": 0.8214329438453718, + "scorePerFunction": null, + "findingsPerFile": 0.026586321919228086, + "findingsPerKloc": 0.37861477861579207, + "findingsPerFunction": null + } + }, + "mature-oss": { + "repoCount": 5, + "hitCount": 5, + "hitRate": 1, + "repoScoreMedian": 14.4, + "findingCountMedian": 5, + "medians": { + "scorePerFile": 0.018233349078885217, + "scorePerKloc": 0.38656680357574297, + "scorePerFunction": null, + "findingsPerFile": 0.006613131790269249, + "findingsPerKloc": 0.13422458457491074, + "findingsPerFunction": null + } + } + }, + "repos": [ + { + "id": "gstack", + "repo": "garrytan/gstack", + "cohort": "explicit-ai", + "ref": "6cc094cd4190e3fac65b69fb29e0c3cae7e0f214", + "summary": { + "fileCount": 176, + "directoryCount": 19, + "findingCount": 19, + "repoScore": 49.40000000000002, + "physicalLineCount": 62616, + "logicalLineCount": 18958, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.2806818181818183, + "scorePerKloc": 2.6057601012765073, + "scorePerFunction": null, + "findingsPerFile": 0.10795454545454546, + "findingsPerKloc": 1.002215423567887, + "findingsPerFunction": null + } + } + }, + { + "id": "vinext", + "repo": "cloudflare/vinext", + "cohort": "explicit-ai", + "ref": "28980b01f1692b47756a6768fc5733712c290f84", + "summary": { + "fileCount": 1129, + "directoryCount": 486, + "findingCount": 24, + "repoScore": 69.40000000000002, + "physicalLineCount": 150365, + "logicalLineCount": 59523, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.061470327723649264, + "scorePerKloc": 1.1659358567276519, + "scorePerFunction": null, + "findingsPerFile": 0.021257750221434897, + "findingsPerKloc": 0.40320548359457686, + "findingsPerFunction": null + } + } + }, + { + "id": "hunk", + "repo": "modem-dev/hunk", + "cohort": "explicit-ai", + "ref": "b37663f3c3055f860290587865199192e15d9f35", + "summary": { + "fileCount": 166, + "directoryCount": 36, + "findingCount": 6, + "repoScore": 13, + "physicalLineCount": 32498, + "logicalLineCount": 13564, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.0783132530120482, + "scorePerKloc": 0.9584193453258626, + "scorePerFunction": null, + "findingsPerFile": 0.03614457831325301, + "findingsPerKloc": 0.4423473901503981, + "findingsPerFunction": null + } + } + }, + { + "id": "agent-ci", + "repo": "redwoodjs/agent-ci", + "cohort": "explicit-ai", + "ref": "4de00d69487e275d5bc37e30f7818a9303b22352", + "summary": { + "fileCount": 94, + "directoryCount": 24, + "findingCount": 3, + "repoScore": 5.800000000000001, + "physicalLineCount": 17222, + "logicalLineCount": 8474, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.061702127659574474, + "scorePerKloc": 0.6844465423648809, + "scorePerFunction": null, + "findingsPerFile": 0.031914893617021274, + "findingsPerKloc": 0.35402407363700733, + "findingsPerFunction": null + } + } + }, + { + "id": "openusage", + "repo": "robinebers/openusage", + "cohort": "explicit-ai", + "ref": "857f537a243483acf98ccd9ea32e20b380c63823", + "summary": { + "fileCount": 139, + "directoryCount": 29, + "findingCount": 2, + "repoScore": 4.2, + "physicalLineCount": 33794, + "logicalLineCount": 22270, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.030215827338129497, + "scorePerKloc": 0.18859452177817693, + "scorePerFunction": null, + "findingsPerFile": 0.014388489208633094, + "findingsPerKloc": 0.0898069151324652, + "findingsPerFunction": null + } + } + }, + { + "id": "devworkbench", + "repo": "jiayun/DevWorkbench", + "cohort": "explicit-ai", + "ref": "ea50862107c377b0cdd6e508073ad0330b367576", + "summary": { + "fileCount": 32, + "directoryCount": 7, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 9408, + "logicalLineCount": 2986, + "functionCount": 0, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": null, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": null + } + } + }, + { + "id": "astro", + "repo": "withastro/astro", + "cohort": "mature-oss", + "ref": "f7068995aa451dced13853789b0d51433c2373b5", + "summary": { + "fileCount": 1949, + "directoryCount": 852, + "findingCount": 26, + "repoScore": 80.7, + "physicalLineCount": 138854, + "logicalLineCount": 80948, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.04140584915341201, + "scorePerKloc": 0.9969363047882592, + "scorePerFunction": null, + "findingsPerFile": 0.013340174448435094, + "findingsPerKloc": 0.321193852843801, + "findingsPerFunction": null + } + } + }, + { + "id": "umami", + "repo": "umami-software/umami", + "cohort": "mature-oss", + "ref": "227b2554b4a373e63ceb7f48decdc60c8d3e6eaf", + "summary": { + "fileCount": 512, + "directoryCount": 87, + "findingCount": 4, + "repoScore": 11.5, + "physicalLineCount": 29677, + "logicalLineCount": 20508, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.0224609375, + "scorePerKloc": 0.5607567778427931, + "scorePerFunction": null, + "findingsPerFile": 0.0078125, + "findingsPerKloc": 0.1950458357714063, + "findingsPerFunction": null + } + } + }, + { + "id": "vite", + "repo": "vitejs/vite", + "cohort": "mature-oss", + "ref": "a4922537a8d705da7769d30626a0d846511fc124", + "summary": { + "fileCount": 1229, + "directoryCount": 525, + "findingCount": 5, + "repoScore": 14.4, + "physicalLineCount": 77629, + "logicalLineCount": 37251, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.011716842961757526, + "scorePerKloc": 0.38656680357574297, + "scorePerFunction": null, + "findingsPerFile": 0.0040683482506102524, + "findingsPerKloc": 0.13422458457491074, + "findingsPerFunction": null + } + } + }, + { + "id": "payload", + "repo": "payloadcms/payload", + "cohort": "mature-oss", + "ref": "f3f36d801010f3c95ae74655ff22a09ea66ab1ac", + "summary": { + "fileCount": 4234, + "directoryCount": 1837, + "findingCount": 28, + "repoScore": 77.2, + "physicalLineCount": 384327, + "logicalLineCount": 251992, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.018233349078885217, + "scorePerKloc": 0.30635893202958825, + "scorePerFunction": null, + "findingsPerFile": 0.006613131790269249, + "findingsPerKloc": 0.11111463855995429, + "findingsPerFunction": null + } + } + }, + { + "id": "execa", + "repo": "sindresorhus/execa", + "cohort": "mature-oss", + "ref": "99d1741d2525eca71b986282148bbf2983356428", + "summary": { + "fileCount": 580, + "directoryCount": 46, + "findingCount": 1, + "repoScore": 5, + "physicalLineCount": 35995, + "logicalLineCount": 20374, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.008620689655172414, + "scorePerKloc": 0.24541081770884463, + "scorePerFunction": null, + "findingsPerFile": 0.0017241379310344827, + "findingsPerKloc": 0.049082163541768926, + "findingsPerFunction": null + } + } + } + ], + "rank": 4 + }, + { + "ruleId": "tests.duplicate-mock-setup", + "ruleSlug": "duplicate-mock-setup", + "family": "tests", + "severity": "medium", + "scope": "file", + "requires": [ + "repo.testMockDuplication" + ], + "signalScore": 0.6333333333333333, + "bestMetric": "findingsPerFile", + "bestMetricAuc": 0.7, + "metricAucs": { + "scorePerFile": 0.7, + "scorePerKloc": 0.7, + "scorePerFunction": 0.5, + "findingsPerFile": 0.7, + "findingsPerKloc": 0.7, + "findingsPerFunction": 0.5 + }, + "cohorts": { + "explicit-ai": { + "repoCount": 6, + "hitCount": 3, + "hitRate": 0.5, + "repoScoreMedian": 4.5, + "findingCountMedian": 1.5, + "medians": { + "scorePerFile": 0.03985828166519043, + "scorePerKloc": 0.5310361104555109, + "scorePerFunction": null, + "findingsPerFile": 0.007971656333038087, + "findingsPerKloc": 0.15120205634796632, + "findingsPerFunction": null + } + }, + "mature-oss": { + "repoCount": 5, + "hitCount": 1, + "hitRate": 0.2, + "repoScoreMedian": 0, + "findingCountMedian": 0, + "medians": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": null, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": null + } + } + }, + "repos": [ + { + "id": "openusage", + "repo": "robinebers/openusage", + "cohort": "explicit-ai", + "ref": "857f537a243483acf98ccd9ea32e20b380c63823", + "summary": { + "fileCount": 139, + "directoryCount": 29, + "findingCount": 25, + "repoScore": 112, + "physicalLineCount": 33794, + "logicalLineCount": 22270, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.8057553956834532, + "scorePerKloc": 5.029187247418052, + "scorePerFunction": null, + "findingsPerFile": 0.17985611510791366, + "findingsPerKloc": 1.1225864391558151, + "findingsPerFunction": null + } + } + }, + { + "id": "vinext", + "repo": "cloudflare/vinext", + "cohort": "explicit-ai", + "ref": "28980b01f1692b47756a6768fc5733712c290f84", + "summary": { + "fileCount": 1129, + "directoryCount": 486, + "findingCount": 18, + "repoScore": 90, + "physicalLineCount": 150365, + "logicalLineCount": 59523, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.07971656333038087, + "scorePerKloc": 1.5120205634796633, + "scorePerFunction": null, + "findingsPerFile": 0.015943312666076175, + "findingsPerKloc": 0.30240411269593265, + "findingsPerFunction": null + } + } + }, + { + "id": "agent-ci", + "repo": "redwoodjs/agent-ci", + "cohort": "explicit-ai", + "ref": "4de00d69487e275d5bc37e30f7818a9303b22352", + "summary": { + "fileCount": 94, + "directoryCount": 24, + "findingCount": 3, + "repoScore": 9, + "physicalLineCount": 17222, + "logicalLineCount": 8474, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.09574468085106383, + "scorePerKloc": 1.0620722209110218, + "scorePerFunction": null, + "findingsPerFile": 0.031914893617021274, + "findingsPerKloc": 0.35402407363700733, + "findingsPerFunction": null + } + } + }, + { + "id": "gstack", + "repo": "garrytan/gstack", + "cohort": "explicit-ai", + "ref": "6cc094cd4190e3fac65b69fb29e0c3cae7e0f214", + "summary": { + "fileCount": 176, + "directoryCount": 19, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 62616, + "logicalLineCount": 18958, + "functionCount": 0, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": null, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": null + } + } + }, + { + "id": "devworkbench", + "repo": "jiayun/DevWorkbench", + "cohort": "explicit-ai", + "ref": "ea50862107c377b0cdd6e508073ad0330b367576", + "summary": { + "fileCount": 32, + "directoryCount": 7, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 9408, + "logicalLineCount": 2986, + "functionCount": 0, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": null, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": null + } + } + }, + { + "id": "hunk", + "repo": "modem-dev/hunk", + "cohort": "explicit-ai", + "ref": "b37663f3c3055f860290587865199192e15d9f35", + "summary": { + "fileCount": 166, + "directoryCount": 36, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 32498, + "logicalLineCount": 13564, + "functionCount": 0, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": null, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": null + } + } + }, + { + "id": "payload", + "repo": "payloadcms/payload", + "cohort": "mature-oss", + "ref": "f3f36d801010f3c95ae74655ff22a09ea66ab1ac", + "summary": { + "fileCount": 4234, + "directoryCount": 1837, + "findingCount": 6, + "repoScore": 22.5, + "physicalLineCount": 384327, + "logicalLineCount": 251992, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.005314123760037789, + "scorePerKloc": 0.0892885488428204, + "scorePerFunction": null, + "findingsPerFile": 0.0014170996693434106, + "findingsPerKloc": 0.023810279691418777, + "findingsPerFunction": null + } + } + }, + { + "id": "execa", + "repo": "sindresorhus/execa", + "cohort": "mature-oss", + "ref": "99d1741d2525eca71b986282148bbf2983356428", + "summary": { + "fileCount": 580, + "directoryCount": 46, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 35995, + "logicalLineCount": 20374, + "functionCount": 0, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": null, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": null + } + } + }, + { + "id": "umami", + "repo": "umami-software/umami", + "cohort": "mature-oss", + "ref": "227b2554b4a373e63ceb7f48decdc60c8d3e6eaf", + "summary": { + "fileCount": 512, + "directoryCount": 87, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 29677, + "logicalLineCount": 20508, + "functionCount": 0, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": null, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": null + } + } + }, + { + "id": "vite", + "repo": "vitejs/vite", + "cohort": "mature-oss", + "ref": "a4922537a8d705da7769d30626a0d846511fc124", + "summary": { + "fileCount": 1229, + "directoryCount": 525, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 77629, + "logicalLineCount": 37251, + "functionCount": 0, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": null, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": null + } + } + }, + { + "id": "astro", + "repo": "withastro/astro", + "cohort": "mature-oss", + "ref": "f7068995aa451dced13853789b0d51433c2373b5", + "summary": { + "fileCount": 1949, + "directoryCount": 852, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 138854, + "logicalLineCount": 80948, + "functionCount": 0, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": null, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": null + } + } + } + ], + "rank": 5 + }, + { + "ruleId": "comments.placeholder-comments", + "ruleSlug": "placeholder-comments", + "family": "comments", + "severity": "weak", + "scope": "file", + "requires": [ + "file.comments" + ], + "signalScore": 0.5, + "bestMetric": "findingsPerFile", + "bestMetricAuc": 0.5, + "metricAucs": { + "scorePerFile": 0.5, + "scorePerKloc": 0.5, + "scorePerFunction": 0.5, + "findingsPerFile": 0.5, + "findingsPerKloc": 0.5, + "findingsPerFunction": 0.5 + }, + "cohorts": { + "explicit-ai": { + "repoCount": 6, + "hitCount": 0, + "hitRate": 0, + "repoScoreMedian": 0, + "findingCountMedian": 0, + "medians": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": null, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": null + } + }, + "mature-oss": { + "repoCount": 5, + "hitCount": 0, + "hitRate": 0, + "repoScoreMedian": 0, + "findingCountMedian": 0, + "medians": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": null, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": null + } + } + }, + "repos": [ + { + "id": "vinext", + "repo": "cloudflare/vinext", + "cohort": "explicit-ai", + "ref": "28980b01f1692b47756a6768fc5733712c290f84", + "summary": { + "fileCount": 1129, + "directoryCount": 486, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 150365, + "logicalLineCount": 59523, + "functionCount": 0, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": null, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": null + } + } + }, + { + "id": "gstack", + "repo": "garrytan/gstack", + "cohort": "explicit-ai", + "ref": "6cc094cd4190e3fac65b69fb29e0c3cae7e0f214", + "summary": { + "fileCount": 176, + "directoryCount": 19, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 62616, + "logicalLineCount": 18958, + "functionCount": 0, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": null, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": null + } + } + }, + { + "id": "devworkbench", + "repo": "jiayun/DevWorkbench", + "cohort": "explicit-ai", + "ref": "ea50862107c377b0cdd6e508073ad0330b367576", + "summary": { + "fileCount": 32, + "directoryCount": 7, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 9408, + "logicalLineCount": 2986, + "functionCount": 0, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": null, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": null + } + } + }, + { + "id": "hunk", + "repo": "modem-dev/hunk", + "cohort": "explicit-ai", + "ref": "b37663f3c3055f860290587865199192e15d9f35", + "summary": { + "fileCount": 166, + "directoryCount": 36, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 32498, + "logicalLineCount": 13564, + "functionCount": 0, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": null, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": null + } + } + }, + { + "id": "agent-ci", + "repo": "redwoodjs/agent-ci", + "cohort": "explicit-ai", + "ref": "4de00d69487e275d5bc37e30f7818a9303b22352", + "summary": { + "fileCount": 94, + "directoryCount": 24, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 17222, + "logicalLineCount": 8474, + "functionCount": 0, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": null, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": null + } + } + }, + { + "id": "openusage", + "repo": "robinebers/openusage", + "cohort": "explicit-ai", + "ref": "857f537a243483acf98ccd9ea32e20b380c63823", + "summary": { + "fileCount": 139, + "directoryCount": 29, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 33794, + "logicalLineCount": 22270, + "functionCount": 0, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": null, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": null + } + } + }, + { + "id": "payload", + "repo": "payloadcms/payload", + "cohort": "mature-oss", + "ref": "f3f36d801010f3c95ae74655ff22a09ea66ab1ac", + "summary": { + "fileCount": 4234, + "directoryCount": 1837, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 384327, + "logicalLineCount": 251992, + "functionCount": 0, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": null, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": null + } + } + }, + { + "id": "execa", + "repo": "sindresorhus/execa", + "cohort": "mature-oss", + "ref": "99d1741d2525eca71b986282148bbf2983356428", + "summary": { + "fileCount": 580, + "directoryCount": 46, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 35995, + "logicalLineCount": 20374, + "functionCount": 0, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": null, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": null + } + } + }, + { + "id": "umami", + "repo": "umami-software/umami", + "cohort": "mature-oss", + "ref": "227b2554b4a373e63ceb7f48decdc60c8d3e6eaf", + "summary": { + "fileCount": 512, + "directoryCount": 87, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 29677, + "logicalLineCount": 20508, + "functionCount": 0, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": null, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": null + } + } + }, + { + "id": "vite", + "repo": "vitejs/vite", + "cohort": "mature-oss", + "ref": "a4922537a8d705da7769d30626a0d846511fc124", + "summary": { + "fileCount": 1229, + "directoryCount": 525, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 77629, + "logicalLineCount": 37251, + "functionCount": 0, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": null, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": null + } + } + }, + { + "id": "astro", + "repo": "withastro/astro", + "cohort": "mature-oss", + "ref": "f7068995aa451dced13853789b0d51433c2373b5", + "summary": { + "fileCount": 1949, + "directoryCount": 852, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 138854, + "logicalLineCount": 80948, + "functionCount": 0, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": null, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": null + } + } + } + ], + "rank": 6 + }, + { + "ruleId": "defensive.async-noise", + "ruleSlug": "async-noise", + "family": "defensive", + "severity": "medium", + "scope": "file", + "requires": [ + "file.functionSummaries" + ], + "signalScore": 0.41111111111111115, + "bestMetric": "findingsPerFunction", + "bestMetricAuc": 0.5, + "metricAucs": { + "scorePerFile": 0.4166666666666667, + "scorePerKloc": 0.35, + "scorePerFunction": 0.5, + "findingsPerFile": 0.4166666666666667, + "findingsPerKloc": 0.2833333333333333, + "findingsPerFunction": 0.5 + }, + "cohorts": { + "explicit-ai": { + "repoCount": 6, + "hitCount": 3, + "hitRate": 0.5, + "repoScoreMedian": 0.375, + "findingCountMedian": 0.5, + "medians": { + "scorePerFile": 0.002259036144578313, + "scorePerKloc": 0.02764671188439988, + "scorePerFunction": null, + "findingsPerFile": 0.001771479185119575, + "findingsPerKloc": 0.03360045696621474, + "findingsPerFunction": null + } + }, + "mature-oss": { + "repoCount": 5, + "hitCount": 4, + "hitRate": 0.8, + "repoScoreMedian": 9, + "findingCountMedian": 6, + "medians": { + "scorePerFile": 0.0029296875, + "scorePerKloc": 0.07314218841427736, + "scorePerFunction": null, + "findingsPerFile": 0.00390625, + "findingsPerKloc": 0.09752291788570315, + "findingsPerFunction": null + } + } + }, + "repos": [ + { + "id": "gstack", + "repo": "garrytan/gstack", + "cohort": "explicit-ai", + "ref": "6cc094cd4190e3fac65b69fb29e0c3cae7e0f214", + "summary": { + "fileCount": 176, + "directoryCount": 19, + "findingCount": 2, + "repoScore": 4.5, + "physicalLineCount": 62616, + "logicalLineCount": 18958, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.02556818181818182, + "scorePerKloc": 0.23736681084502587, + "scorePerFunction": null, + "findingsPerFile": 0.011363636363636364, + "findingsPerKloc": 0.10549636037556705, + "findingsPerFunction": null + } + } + }, + { + "id": "vinext", + "repo": "cloudflare/vinext", + "cohort": "explicit-ai", + "ref": "28980b01f1692b47756a6768fc5733712c290f84", + "summary": { + "fileCount": 1129, + "directoryCount": 486, + "findingCount": 4, + "repoScore": 6, + "physicalLineCount": 150365, + "logicalLineCount": 59523, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.005314437555358724, + "scorePerKloc": 0.10080137089864422, + "scorePerFunction": null, + "findingsPerFile": 0.00354295837023915, + "findingsPerKloc": 0.06720091393242948, + "findingsPerFunction": null + } + } + }, + { + "id": "hunk", + "repo": "modem-dev/hunk", + "cohort": "explicit-ai", + "ref": "b37663f3c3055f860290587865199192e15d9f35", + "summary": { + "fileCount": 166, + "directoryCount": 36, + "findingCount": 1, + "repoScore": 0.75, + "physicalLineCount": 32498, + "logicalLineCount": 13564, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.004518072289156626, + "scorePerKloc": 0.05529342376879976, + "scorePerFunction": null, + "findingsPerFile": 0.006024096385542169, + "findingsPerKloc": 0.07372456502506636, + "findingsPerFunction": null + } + } + }, + { + "id": "devworkbench", + "repo": "jiayun/DevWorkbench", + "cohort": "explicit-ai", + "ref": "ea50862107c377b0cdd6e508073ad0330b367576", + "summary": { + "fileCount": 32, + "directoryCount": 7, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 9408, + "logicalLineCount": 2986, + "functionCount": 0, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": null, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": null + } + } + }, + { + "id": "agent-ci", + "repo": "redwoodjs/agent-ci", + "cohort": "explicit-ai", + "ref": "4de00d69487e275d5bc37e30f7818a9303b22352", + "summary": { + "fileCount": 94, + "directoryCount": 24, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 17222, + "logicalLineCount": 8474, + "functionCount": 0, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": null, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": null + } + } + }, + { + "id": "openusage", + "repo": "robinebers/openusage", + "cohort": "explicit-ai", + "ref": "857f537a243483acf98ccd9ea32e20b380c63823", + "summary": { + "fileCount": 139, + "directoryCount": 29, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 33794, + "logicalLineCount": 22270, + "functionCount": 0, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": null, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": null + } + } + }, + { + "id": "vite", + "repo": "vitejs/vite", + "cohort": "mature-oss", + "ref": "a4922537a8d705da7769d30626a0d846511fc124", + "summary": { + "fileCount": 1229, + "directoryCount": 525, + "findingCount": 8, + "repoScore": 11.25, + "physicalLineCount": 77629, + "logicalLineCount": 37251, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.009153783563873067, + "scorePerKloc": 0.3020053152935492, + "scorePerFunction": null, + "findingsPerFile": 0.006509357200976403, + "findingsPerKloc": 0.2147593353198572, + "findingsPerFunction": null + } + } + }, + { + "id": "astro", + "repo": "withastro/astro", + "cohort": "mature-oss", + "ref": "f7068995aa451dced13853789b0d51433c2373b5", + "summary": { + "fileCount": 1949, + "directoryCount": 852, + "findingCount": 11, + "repoScore": 18, + "physicalLineCount": 138854, + "logicalLineCount": 80948, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.009235505387378143, + "scorePerKloc": 0.22236497504570837, + "scorePerFunction": null, + "findingsPerFile": 0.005643919958953309, + "findingsPerKloc": 0.13588970697237734, + "findingsPerFunction": null + } + } + }, + { + "id": "umami", + "repo": "umami-software/umami", + "cohort": "mature-oss", + "ref": "227b2554b4a373e63ceb7f48decdc60c8d3e6eaf", + "summary": { + "fileCount": 512, + "directoryCount": 87, + "findingCount": 2, + "repoScore": 1.5, + "physicalLineCount": 29677, + "logicalLineCount": 20508, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.0029296875, + "scorePerKloc": 0.07314218841427736, + "scorePerFunction": null, + "findingsPerFile": 0.00390625, + "findingsPerKloc": 0.09752291788570315, + "findingsPerFunction": null + } + } + }, + { + "id": "payload", + "repo": "payloadcms/payload", + "cohort": "mature-oss", + "ref": "f3f36d801010f3c95ae74655ff22a09ea66ab1ac", + "summary": { + "fileCount": 4234, + "directoryCount": 1837, + "findingCount": 6, + "repoScore": 9, + "physicalLineCount": 384327, + "logicalLineCount": 251992, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.0021256495040151155, + "scorePerKloc": 0.03571541953712817, + "scorePerFunction": null, + "findingsPerFile": 0.0014170996693434106, + "findingsPerKloc": 0.023810279691418777, + "findingsPerFunction": null + } + } + }, + { + "id": "execa", + "repo": "sindresorhus/execa", + "cohort": "mature-oss", + "ref": "99d1741d2525eca71b986282148bbf2983356428", + "summary": { + "fileCount": 580, + "directoryCount": 46, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 35995, + "logicalLineCount": 20374, + "functionCount": 0, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": null, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": null + } + } + } + ], + "rank": 7 + }, + { + "ruleId": "structure.barrel-density", + "ruleSlug": "barrel-density", + "family": "structure", + "severity": "medium", + "scope": "file", + "requires": [ + "file.exportSummary" + ], + "signalScore": 0.3499999999999999, + "bestMetric": "findingsPerFunction", + "bestMetricAuc": 0.5, + "metricAucs": { + "scorePerFile": 0.3333333333333333, + "scorePerKloc": 0.2, + "scorePerFunction": 0.5, + "findingsPerFile": 0.3333333333333333, + "findingsPerKloc": 0.23333333333333334, + "findingsPerFunction": 0.5 + }, + "cohorts": { + "explicit-ai": { + "repoCount": 6, + "hitCount": 3, + "hitRate": 0.5, + "repoScoreMedian": 1, + "findingCountMedian": 0.5, + "medians": { + "scorePerFile": 0.003100088573959256, + "scorePerKloc": 0.058800799690875795, + "scorePerFunction": null, + "findingsPerFile": 0.001328609388839681, + "findingsPerKloc": 0.025200342724661054, + "findingsPerFunction": null + } + }, + "mature-oss": { + "repoCount": 5, + "hitCount": 5, + "hitRate": 1, + "repoScoreMedian": 19, + "findingCountMedian": 8, + "medians": { + "scorePerFile": 0.017578125, + "scorePerKloc": 0.43885313048566416, + "scorePerFunction": null, + "findingsPerFile": 0.006509357200976403, + "findingsPerKloc": 0.14628437682855472, + "findingsPerFunction": null + } + } + }, + "repos": [ + { + "id": "devworkbench", + "repo": "jiayun/DevWorkbench", + "cohort": "explicit-ai", + "ref": "ea50862107c377b0cdd6e508073ad0330b367576", + "summary": { + "fileCount": 32, + "directoryCount": 7, + "findingCount": 1, + "repoScore": 3, + "physicalLineCount": 9408, + "logicalLineCount": 2986, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.09375, + "scorePerKloc": 1.0046885465505693, + "scorePerFunction": null, + "findingsPerFile": 0.03125, + "findingsPerKloc": 0.3348961821835231, + "findingsPerFunction": null + } + } + }, + { + "id": "agent-ci", + "repo": "redwoodjs/agent-ci", + "cohort": "explicit-ai", + "ref": "4de00d69487e275d5bc37e30f7818a9303b22352", + "summary": { + "fileCount": 94, + "directoryCount": 24, + "findingCount": 1, + "repoScore": 2, + "physicalLineCount": 17222, + "logicalLineCount": 8474, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.02127659574468085, + "scorePerKloc": 0.2360160490913382, + "scorePerFunction": null, + "findingsPerFile": 0.010638297872340425, + "findingsPerKloc": 0.1180080245456691, + "findingsPerFunction": null + } + } + }, + { + "id": "vinext", + "repo": "cloudflare/vinext", + "cohort": "explicit-ai", + "ref": "28980b01f1692b47756a6768fc5733712c290f84", + "summary": { + "fileCount": 1129, + "directoryCount": 486, + "findingCount": 3, + "repoScore": 7, + "physicalLineCount": 150365, + "logicalLineCount": 59523, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.006200177147918512, + "scorePerKloc": 0.11760159938175159, + "scorePerFunction": null, + "findingsPerFile": 0.002657218777679362, + "findingsPerKloc": 0.05040068544932211, + "findingsPerFunction": null + } + } + }, + { + "id": "gstack", + "repo": "garrytan/gstack", + "cohort": "explicit-ai", + "ref": "6cc094cd4190e3fac65b69fb29e0c3cae7e0f214", + "summary": { + "fileCount": 176, + "directoryCount": 19, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 62616, + "logicalLineCount": 18958, + "functionCount": 0, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": null, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": null + } + } + }, + { + "id": "hunk", + "repo": "modem-dev/hunk", + "cohort": "explicit-ai", + "ref": "b37663f3c3055f860290587865199192e15d9f35", + "summary": { + "fileCount": 166, + "directoryCount": 36, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 32498, + "logicalLineCount": 13564, + "functionCount": 0, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": null, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": null + } + } + }, + { + "id": "openusage", + "repo": "robinebers/openusage", + "cohort": "explicit-ai", + "ref": "857f537a243483acf98ccd9ea32e20b380c63823", + "summary": { + "fileCount": 139, + "directoryCount": 29, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 33794, + "logicalLineCount": 22270, + "functionCount": 0, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": null, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": null + } + } + }, + { + "id": "astro", + "repo": "withastro/astro", + "cohort": "mature-oss", + "ref": "f7068995aa451dced13853789b0d51433c2373b5", + "summary": { + "fileCount": 1949, + "directoryCount": 852, + "findingCount": 27, + "repoScore": 68.5, + "physicalLineCount": 138854, + "logicalLineCount": 80948, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.03514622883530016, + "scorePerKloc": 0.846222266146168, + "scorePerFunction": null, + "findingsPerFile": 0.013853258081067214, + "findingsPerKloc": 0.33354746256856255, + "findingsPerFunction": null + } + } + }, + { + "id": "vite", + "repo": "vitejs/vite", + "cohort": "mature-oss", + "ref": "a4922537a8d705da7769d30626a0d846511fc124", + "summary": { + "fileCount": 1229, + "directoryCount": 525, + "findingCount": 8, + "repoScore": 19, + "physicalLineCount": 77629, + "logicalLineCount": 37251, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.015459723352318959, + "scorePerKloc": 0.5100534213846608, + "scorePerFunction": null, + "findingsPerFile": 0.006509357200976403, + "findingsPerKloc": 0.2147593353198572, + "findingsPerFunction": null + } + } + }, + { + "id": "umami", + "repo": "umami-software/umami", + "cohort": "mature-oss", + "ref": "227b2554b4a373e63ceb7f48decdc60c8d3e6eaf", + "summary": { + "fileCount": 512, + "directoryCount": 87, + "findingCount": 3, + "repoScore": 9, + "physicalLineCount": 29677, + "logicalLineCount": 20508, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.017578125, + "scorePerKloc": 0.43885313048566416, + "scorePerFunction": null, + "findingsPerFile": 0.005859375, + "findingsPerKloc": 0.14628437682855472, + "findingsPerFunction": null + } + } + }, + { + "id": "payload", + "repo": "payloadcms/payload", + "cohort": "mature-oss", + "ref": "f3f36d801010f3c95ae74655ff22a09ea66ab1ac", + "summary": { + "fileCount": 4234, + "directoryCount": 1837, + "findingCount": 33, + "repoScore": 83, + "physicalLineCount": 384327, + "logicalLineCount": 251992, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.019603212092583845, + "scorePerKloc": 0.3293755357312931, + "scorePerFunction": null, + "findingsPerFile": 0.007794048181388758, + "findingsPerKloc": 0.13095653830280327, + "findingsPerFunction": null + } + } + }, + { + "id": "execa", + "repo": "sindresorhus/execa", + "cohort": "mature-oss", + "ref": "99d1741d2525eca71b986282148bbf2983356428", + "summary": { + "fileCount": 580, + "directoryCount": 46, + "findingCount": 1, + "repoScore": 3, + "physicalLineCount": 35995, + "logicalLineCount": 20374, + "functionCount": 0, + "normalized": { + "scorePerFile": 0.005172413793103448, + "scorePerKloc": 0.1472464906253068, + "scorePerFunction": null, + "findingsPerFile": 0.0017241379310344827, + "findingsPerKloc": 0.049082163541768926, + "findingsPerFunction": null + } + } + } + ], + "rank": 8 + }, + { + "ruleId": "structure.duplicate-function-signatures", + "ruleSlug": "duplicate-function-signatures", + "family": "structure", + "severity": "medium", + "scope": "file", + "requires": [ + "repo.duplicateFunctionSignatures" + ], + "signalScore": 0.32222222222222224, + "bestMetric": "findingsPerFile", + "bestMetricAuc": 0.4, + "metricAucs": { + "scorePerFile": 0.4, + "scorePerKloc": 0.3, + "scorePerFunction": 0.26666666666666666, + "findingsPerFile": 0.4, + "findingsPerKloc": 0.26666666666666666, + "findingsPerFunction": 0.3 + }, + "cohorts": { + "explicit-ai": { + "repoCount": 6, + "hitCount": 2, + "hitRate": 0.3333333333333333, + "repoScoreMedian": 0, + "findingCountMedian": 0, + "medians": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": 0, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": 0 + } + }, + "mature-oss": { + "repoCount": 5, + "hitCount": 4, + "hitRate": 0.8, + "repoScoreMedian": 25.75, + "findingCountMedian": 12, + "medians": { + "scorePerFile": 0.038994356080041044, + "scorePerKloc": 0.9388743390818798, + "scorePerFunction": 0.025182239893969515, + "findingsPerFile": 0.014366341713699333, + "findingsPerKloc": 0.3459010722933241, + "findingsPerFunction": 0.00927766732935719 + } + } + }, + "repos": [ + { + "id": "vinext", + "repo": "cloudflare/vinext", + "cohort": "explicit-ai", + "ref": "28980b01f1692b47756a6768fc5733712c290f84", + "summary": { + "fileCount": 1129, + "directoryCount": 486, + "findingCount": 50, + "repoScore": 143.25, + "physicalLineCount": 150365, + "logicalLineCount": 59523, + "functionCount": 2917, + "normalized": { + "scorePerFile": 0.12688219663418954, + "scorePerKloc": 2.4066327302051307, + "scorePerFunction": 0.04910867329448063, + "findingsPerFile": 0.04428697962798937, + "findingsPerKloc": 0.8400114241553684, + "findingsPerFunction": 0.017140898183064794 + } + } + }, + { + "id": "openusage", + "repo": "robinebers/openusage", + "cohort": "explicit-ai", + "ref": "857f537a243483acf98ccd9ea32e20b380c63823", + "summary": { + "fileCount": 139, + "directoryCount": 29, + "findingCount": 5, + "repoScore": 11.25, + "physicalLineCount": 33794, + "logicalLineCount": 22270, + "functionCount": 491, + "normalized": { + "scorePerFile": 0.08093525179856115, + "scorePerKloc": 0.5051638976201167, + "scorePerFunction": 0.022912423625254582, + "findingsPerFile": 0.03597122302158273, + "findingsPerKloc": 0.224517287831163, + "findingsPerFunction": 0.010183299389002037 + } + } + }, + { + "id": "gstack", + "repo": "garrytan/gstack", + "cohort": "explicit-ai", + "ref": "6cc094cd4190e3fac65b69fb29e0c3cae7e0f214", + "summary": { + "fileCount": 176, + "directoryCount": 19, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 62616, + "logicalLineCount": 18958, + "functionCount": 832, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": 0, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": 0 + } + } + }, + { + "id": "devworkbench", + "repo": "jiayun/DevWorkbench", + "cohort": "explicit-ai", + "ref": "ea50862107c377b0cdd6e508073ad0330b367576", + "summary": { + "fileCount": 32, + "directoryCount": 7, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 9408, + "logicalLineCount": 2986, + "functionCount": 147, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": 0, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": 0 + } + } + }, + { + "id": "hunk", + "repo": "modem-dev/hunk", + "cohort": "explicit-ai", + "ref": "b37663f3c3055f860290587865199192e15d9f35", + "summary": { + "fileCount": 166, + "directoryCount": 36, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 32498, + "logicalLineCount": 13564, + "functionCount": 752, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": 0, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": 0 + } + } + }, + { + "id": "agent-ci", + "repo": "redwoodjs/agent-ci", + "cohort": "explicit-ai", + "ref": "4de00d69487e275d5bc37e30f7818a9303b22352", + "summary": { + "fileCount": 94, + "directoryCount": 24, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 17222, + "logicalLineCount": 8474, + "functionCount": 220, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": 0, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": 0 + } + } + }, + { + "id": "umami", + "repo": "umami-software/umami", + "cohort": "mature-oss", + "ref": "227b2554b4a373e63ceb7f48decdc60c8d3e6eaf", + "summary": { + "fileCount": 512, + "directoryCount": 87, + "findingCount": 12, + "repoScore": 25.75, + "physicalLineCount": 29677, + "logicalLineCount": 20508, + "functionCount": 911, + "normalized": { + "scorePerFile": 0.05029296875, + "scorePerKloc": 1.255607567778428, + "scorePerFunction": 0.028265642151481887, + "findingsPerFile": 0.0234375, + "findingsPerKloc": 0.5851375073142189, + "findingsPerFunction": 0.013172338090010977 + } + } + }, + { + "id": "payload", + "repo": "payloadcms/payload", + "cohort": "mature-oss", + "ref": "f3f36d801010f3c95ae74655ff22a09ea66ab1ac", + "summary": { + "fileCount": 4234, + "directoryCount": 1837, + "findingCount": 131, + "repoScore": 309, + "physicalLineCount": 384327, + "logicalLineCount": 251992, + "functionCount": 3544, + "normalized": { + "scorePerFile": 0.07298063297118564, + "scorePerKloc": 1.226229404108067, + "scorePerFunction": 0.08718961625282166, + "findingsPerFile": 0.030940009447331128, + "findingsPerKloc": 0.5198577732626433, + "findingsPerFunction": 0.036963882618510156 + } + } + }, + { + "id": "astro", + "repo": "withastro/astro", + "cohort": "mature-oss", + "ref": "f7068995aa451dced13853789b0d51433c2373b5", + "summary": { + "fileCount": 1949, + "directoryCount": 852, + "findingCount": 28, + "repoScore": 76, + "physicalLineCount": 138854, + "logicalLineCount": 80948, + "functionCount": 3018, + "normalized": { + "scorePerFile": 0.038994356080041044, + "scorePerKloc": 0.9388743390818798, + "scorePerFunction": 0.025182239893969515, + "findingsPerFile": 0.014366341713699333, + "findingsPerKloc": 0.3459010722933241, + "findingsPerFunction": 0.00927766732935719 + } + } + }, + { + "id": "execa", + "repo": "sindresorhus/execa", + "cohort": "mature-oss", + "ref": "99d1741d2525eca71b986282148bbf2983356428", + "summary": { + "fileCount": 580, + "directoryCount": 46, + "findingCount": 6, + "repoScore": 7.5, + "physicalLineCount": 35995, + "logicalLineCount": 20374, + "functionCount": 1007, + "normalized": { + "scorePerFile": 0.01293103448275862, + "scorePerKloc": 0.3681162265632669, + "scorePerFunction": 0.007447864945382324, + "findingsPerFile": 0.010344827586206896, + "findingsPerKloc": 0.2944929812506136, + "findingsPerFunction": 0.005958291956305859 + } + } + }, + { + "id": "vite", + "repo": "vitejs/vite", + "cohort": "mature-oss", + "ref": "a4922537a8d705da7769d30626a0d846511fc124", + "summary": { + "fileCount": 1229, + "directoryCount": 525, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 77629, + "logicalLineCount": 37251, + "functionCount": 1904, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": 0, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": 0 + } + } + } + ], + "rank": 9 + }, + { + "ruleId": "structure.directory-fanout-hotspot", + "ruleSlug": "directory-fanout-hotspot", + "family": "structure", + "severity": "medium", + "scope": "directory", + "requires": [ + "directory.metrics" + ], + "signalScore": 0.22222222222222224, + "bestMetric": "findingsPerFile", + "bestMetricAuc": 0.5, + "metricAucs": { + "scorePerFile": 0.4666666666666667, + "scorePerKloc": 0.13333333333333333, + "scorePerFunction": 0.1, + "findingsPerFile": 0.5, + "findingsPerKloc": 0.1, + "findingsPerFunction": 0.03333333333333333 + }, + "cohorts": { + "explicit-ai": { + "repoCount": 6, + "hitCount": 6, + "hitRate": 1, + "repoScoreMedian": 6.097222222222223, + "findingCountMedian": 1.5, + "medians": { + "scorePerFile": 0.051404198968162076, + "scorePerKloc": 0.5428176498054266, + "scorePerFunction": 0.014598312555288116, + "findingsPerFile": 0.013837291548514954, + "findingsPerKloc": 0.16959085981043406, + "findingsPerFunction": 0.00430938715052768 + } + }, + "mature-oss": { + "repoCount": 5, + "hitCount": 5, + "hitRate": 1, + "repoScoreMedian": 76.98412698412699, + "findingCountMedian": 21, + "medians": { + "scorePerFile": 0.044517510484307354, + "scorePerKloc": 1.1346421384736092, + "scorePerFunction": 0.028696507821756326, + "findingsPerFile": 0.012314007183170857, + "findingsPerKloc": 0.3055652560398743, + "findingsPerFunction": 0.007952286282306162 + } + } + }, + "repos": [ + { + "id": "devworkbench", + "repo": "jiayun/DevWorkbench", + "cohort": "explicit-ai", + "ref": "ea50862107c377b0cdd6e508073ad0330b367576", + "summary": { + "fileCount": 32, + "directoryCount": 7, + "findingCount": 1, + "repoScore": 4.833333333333334, + "physicalLineCount": 9408, + "logicalLineCount": 2986, + "functionCount": 147, + "normalized": { + "scorePerFile": 0.15104166666666669, + "scorePerKloc": 1.6186648805536952, + "scorePerFunction": 0.03287981859410431, + "findingsPerFile": 0.03125, + "findingsPerKloc": 0.3348961821835231, + "findingsPerFunction": 0.006802721088435374 + } + } + }, + { + "id": "vinext", + "repo": "cloudflare/vinext", + "cohort": "explicit-ai", + "ref": "28980b01f1692b47756a6768fc5733712c290f84", + "summary": { + "fileCount": 1129, + "directoryCount": 486, + "findingCount": 15, + "repoScore": 56.28151260504201, + "physicalLineCount": 150365, + "logicalLineCount": 59523, + "functionCount": 2917, + "normalized": { + "scorePerFile": 0.04985076404343845, + "scorePerKloc": 0.9455422711395932, + "scorePerFunction": 0.019294313543038056, + "findingsPerFile": 0.013286093888396812, + "findingsPerKloc": 0.25200342724661057, + "findingsPerFunction": 0.005142269454919438 + } + } + }, + { + "id": "hunk", + "repo": "modem-dev/hunk", + "cohort": "explicit-ai", + "ref": "b37663f3c3055f860290587865199192e15d9f35", + "summary": { + "fileCount": 166, + "directoryCount": 36, + "findingCount": 3, + "repoScore": 9.723492063492063, + "physicalLineCount": 32498, + "logicalLineCount": 13564, + "functionCount": 752, + "normalized": { + "scorePerFile": 0.0585752533945305, + "scorePerKloc": 0.7168602229056372, + "scorePerFunction": 0.012930175616345829, + "findingsPerFile": 0.018072289156626505, + "findingsPerKloc": 0.22117369507519904, + "findingsPerFunction": 0.003989361702127659 + } + } + }, + { + "id": "agent-ci", + "repo": "redwoodjs/agent-ci", + "cohort": "explicit-ai", + "ref": "4de00d69487e275d5bc37e30f7818a9303b22352", + "summary": { + "fileCount": 94, + "directoryCount": 24, + "findingCount": 1, + "repoScore": 3.125, + "physicalLineCount": 17222, + "logicalLineCount": 8474, + "functionCount": 220, + "normalized": { + "scorePerFile": 0.03324468085106383, + "scorePerKloc": 0.3687750767052159, + "scorePerFunction": 0.014204545454545454, + "findingsPerFile": 0.010638297872340425, + "findingsPerKloc": 0.1180080245456691, + "findingsPerFunction": 0.004545454545454545 + } + } + }, + { + "id": "openusage", + "repo": "robinebers/openusage", + "cohort": "explicit-ai", + "ref": "857f537a243483acf98ccd9ea32e20b380c63823", + "summary": { + "fileCount": 139, + "directoryCount": 29, + "findingCount": 2, + "repoScore": 7.361111111111112, + "physicalLineCount": 33794, + "logicalLineCount": 22270, + "functionCount": 491, + "normalized": { + "scorePerFile": 0.05295763389288569, + "scorePerKloc": 0.3305393404181011, + "scorePerFunction": 0.014992079656030777, + "findingsPerFile": 0.014388489208633094, + "findingsPerKloc": 0.0898069151324652, + "findingsPerFunction": 0.004073319755600814 + } + } + }, + { + "id": "gstack", + "repo": "garrytan/gstack", + "cohort": "explicit-ai", + "ref": "6cc094cd4190e3fac65b69fb29e0c3cae7e0f214", + "summary": { + "fileCount": 176, + "directoryCount": 19, + "findingCount": 1, + "repoScore": 3, + "physicalLineCount": 62616, + "logicalLineCount": 18958, + "functionCount": 832, + "normalized": { + "scorePerFile": 0.017045454545454544, + "scorePerKloc": 0.15824454056335058, + "scorePerFunction": 0.003605769230769231, + "findingsPerFile": 0.005681818181818182, + "findingsPerKloc": 0.052748180187783524, + "findingsPerFunction": 0.001201923076923077 + } + } + }, + { + "id": "vite", + "repo": "vitejs/vite", + "cohort": "mature-oss", + "ref": "a4922537a8d705da7769d30626a0d846511fc124", + "summary": { + "fileCount": 1229, + "directoryCount": 525, + "findingCount": 21, + "repoScore": 76.98412698412699, + "physicalLineCount": 77629, + "logicalLineCount": 37251, + "functionCount": 1904, + "normalized": { + "scorePerFile": 0.0626396476681261, + "scorePerKloc": 2.0666324926613244, + "scorePerFunction": 0.0404328398025877, + "findingsPerFile": 0.01708706265256306, + "findingsPerKloc": 0.5637432552146251, + "findingsPerFunction": 0.011029411764705883 + } + } + }, + { + "id": "execa", + "repo": "sindresorhus/execa", + "cohort": "mature-oss", + "ref": "99d1741d2525eca71b986282148bbf2983356428", + "summary": { + "fileCount": 580, + "directoryCount": 46, + "findingCount": 7, + "repoScore": 25.680555555555554, + "physicalLineCount": 35995, + "logicalLineCount": 20374, + "functionCount": 1007, + "normalized": { + "scorePerFile": 0.044276819923371645, + "scorePerKloc": 1.2604572276212602, + "scorePerFunction": 0.025502041266688733, + "findingsPerFile": 0.01206896551724138, + "findingsPerKloc": 0.3435751447923825, + "findingsPerFunction": 0.006951340615690168 + } + } + }, + { + "id": "payload", + "repo": "payloadcms/payload", + "cohort": "mature-oss", + "ref": "f3f36d801010f3c95ae74655ff22a09ea66ab1ac", + "summary": { + "fileCount": 4234, + "directoryCount": 1837, + "findingCount": 77, + "repoScore": 285.9207417582417, + "physicalLineCount": 384327, + "logicalLineCount": 251992, + "functionCount": 3544, + "normalized": { + "scorePerFile": 0.06752969810067116, + "scorePerKloc": 1.1346421384736092, + "scorePerFunction": 0.08067741020266414, + "findingsPerFile": 0.018186112423240433, + "findingsPerKloc": 0.3055652560398743, + "findingsPerFunction": 0.02172686230248307 + } + } + }, + { + "id": "umami", + "repo": "umami-software/umami", + "cohort": "mature-oss", + "ref": "227b2554b4a373e63ceb7f48decdc60c8d3e6eaf", + "summary": { + "fileCount": 512, + "directoryCount": 87, + "findingCount": 6, + "repoScore": 22.792965367965365, + "physicalLineCount": 29677, + "logicalLineCount": 20508, + "functionCount": 911, + "normalized": { + "scorePerFile": 0.044517510484307354, + "scorePerKloc": 1.111418244975881, + "scorePerFunction": 0.02501972049172927, + "findingsPerFile": 0.01171875, + "findingsPerKloc": 0.29256875365710944, + "findingsPerFunction": 0.006586169045005488 + } + } + }, + { + "id": "astro", + "repo": "withastro/astro", + "cohort": "mature-oss", + "ref": "f7068995aa451dced13853789b0d51433c2373b5", + "summary": { + "fileCount": 1949, + "directoryCount": 852, + "findingCount": 24, + "repoScore": 86.6060606060606, + "physicalLineCount": 138854, + "logicalLineCount": 80948, + "functionCount": 3018, + "normalized": { + "scorePerFile": 0.04443615218371503, + "scorePerKloc": 1.0698974725263206, + "scorePerFunction": 0.028696507821756326, + "findingsPerFile": 0.012314007183170857, + "findingsPerKloc": 0.29648663339427783, + "findingsPerFunction": 0.007952286282306162 + } + } + } + ], + "rank": 10 + }, + { + "ruleId": "structure.over-fragmentation", + "ruleSlug": "over-fragmentation", + "family": "structure", + "severity": "strong", + "scope": "directory", + "requires": [ + "directory.metrics" + ], + "signalScore": 0.1722222222222222, + "bestMetric": "findingsPerFile", + "bestMetricAuc": 0.18333333333333332, + "metricAucs": { + "scorePerFile": 0.18333333333333332, + "scorePerKloc": 0.18333333333333332, + "scorePerFunction": 0.15, + "findingsPerFile": 0.18333333333333332, + "findingsPerKloc": 0.18333333333333332, + "findingsPerFunction": 0.15 + }, + "cohorts": { + "explicit-ai": { + "repoCount": 6, + "hitCount": 1, + "hitRate": 0.16666666666666666, + "repoScoreMedian": 0, + "findingCountMedian": 0, + "medians": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": 0, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": 0 + } + }, + "mature-oss": { + "repoCount": 5, + "hitCount": 4, + "hitRate": 0.8, + "repoScoreMedian": 13.934306569343066, + "findingCountMedian": 2, + "medians": { + "scorePerFile": 0.008233837733943608, + "scorePerKloc": 0.13834593544841597, + "scorePerFunction": 0.009836926909005993, + "findingsPerFile": 0.0011809163911195087, + "findingsPerKloc": 0.01984189974284898, + "findingsPerFunction": 0.0014108352144469526 + } + } + }, + "repos": [ + { + "id": "vinext", + "repo": "cloudflare/vinext", + "cohort": "explicit-ai", + "ref": "28980b01f1692b47756a6768fc5733712c290f84", + "summary": { + "fileCount": 1129, + "directoryCount": 486, + "findingCount": 3, + "repoScore": 20.12121212121212, + "physicalLineCount": 150365, + "logicalLineCount": 59523, + "functionCount": 2917, + "normalized": { + "scorePerFile": 0.01782215422605148, + "scorePerKloc": 0.3380409609934331, + "scorePerFunction": 0.006897912965790923, + "findingsPerFile": 0.002657218777679362, + "findingsPerKloc": 0.05040068544932211, + "findingsPerFunction": 0.0010284538909838875 + } + } + }, + { + "id": "gstack", + "repo": "garrytan/gstack", + "cohort": "explicit-ai", + "ref": "6cc094cd4190e3fac65b69fb29e0c3cae7e0f214", + "summary": { + "fileCount": 176, + "directoryCount": 19, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 62616, + "logicalLineCount": 18958, + "functionCount": 832, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": 0, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": 0 + } + } + }, + { + "id": "devworkbench", + "repo": "jiayun/DevWorkbench", + "cohort": "explicit-ai", + "ref": "ea50862107c377b0cdd6e508073ad0330b367576", + "summary": { + "fileCount": 32, + "directoryCount": 7, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 9408, + "logicalLineCount": 2986, + "functionCount": 147, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": 0, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": 0 + } + } + }, + { + "id": "hunk", + "repo": "modem-dev/hunk", + "cohort": "explicit-ai", + "ref": "b37663f3c3055f860290587865199192e15d9f35", + "summary": { + "fileCount": 166, + "directoryCount": 36, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 32498, + "logicalLineCount": 13564, + "functionCount": 752, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": 0, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": 0 + } + } + }, + { + "id": "agent-ci", + "repo": "redwoodjs/agent-ci", + "cohort": "explicit-ai", + "ref": "4de00d69487e275d5bc37e30f7818a9303b22352", + "summary": { + "fileCount": 94, + "directoryCount": 24, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 17222, + "logicalLineCount": 8474, + "functionCount": 220, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": 0, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": 0 + } + } + }, + { + "id": "openusage", + "repo": "robinebers/openusage", + "cohort": "explicit-ai", + "ref": "857f537a243483acf98ccd9ea32e20b380c63823", + "summary": { + "fileCount": 139, + "directoryCount": 29, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 33794, + "logicalLineCount": 22270, + "functionCount": 491, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": 0, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": 0 + } + } + }, + { + "id": "vite", + "repo": "vitejs/vite", + "cohort": "mature-oss", + "ref": "a4922537a8d705da7769d30626a0d846511fc124", + "summary": { + "fileCount": 1229, + "directoryCount": 525, + "findingCount": 8, + "repoScore": 54.016666666666666, + "physicalLineCount": 77629, + "logicalLineCount": 37251, + "functionCount": 1904, + "normalized": { + "scorePerFile": 0.043951722267426094, + "scorePerKloc": 1.4500729286909524, + "scorePerFunction": 0.028370098039215686, + "findingsPerFile": 0.006509357200976403, + "findingsPerKloc": 0.2147593353198572, + "findingsPerFunction": 0.004201680672268907 + } + } + }, + { + "id": "execa", + "repo": "sindresorhus/execa", + "cohort": "mature-oss", + "ref": "99d1741d2525eca71b986282148bbf2983356428", + "summary": { + "fileCount": 580, + "directoryCount": 46, + "findingCount": 2, + "repoScore": 13.934306569343066, + "physicalLineCount": 35995, + "logicalLineCount": 20374, + "functionCount": 1007, + "normalized": { + "scorePerFile": 0.024024666498867357, + "scorePerKloc": 0.6839259138776415, + "scorePerFunction": 0.013837444458136113, + "findingsPerFile": 0.0034482758620689655, + "findingsPerKloc": 0.09816432708353785, + "findingsPerFunction": 0.0019860973187686196 + } + } + }, + { + "id": "payload", + "repo": "payloadcms/payload", + "cohort": "mature-oss", + "ref": "f3f36d801010f3c95ae74655ff22a09ea66ab1ac", + "summary": { + "fileCount": 4234, + "directoryCount": 1837, + "findingCount": 5, + "repoScore": 34.86206896551724, + "physicalLineCount": 384327, + "logicalLineCount": 251992, + "functionCount": 3544, + "normalized": { + "scorePerFile": 0.008233837733943608, + "scorePerKloc": 0.13834593544841597, + "scorePerFunction": 0.009836926909005993, + "findingsPerFile": 0.0011809163911195087, + "findingsPerKloc": 0.01984189974284898, + "findingsPerFunction": 0.0014108352144469526 + } + } + }, + { + "id": "astro", + "repo": "withastro/astro", + "cohort": "mature-oss", + "ref": "f7068995aa451dced13853789b0d51433c2373b5", + "summary": { + "fileCount": 1949, + "directoryCount": 852, + "findingCount": 1, + "repoScore": 6.5, + "physicalLineCount": 138854, + "logicalLineCount": 80948, + "functionCount": 3018, + "normalized": { + "scorePerFile": 0.0033350436121087736, + "scorePerKloc": 0.08029846321095024, + "scorePerFunction": 0.0021537442014579193, + "findingsPerFile": 0.000513083632632119, + "findingsPerKloc": 0.012353609724761577, + "findingsPerFunction": 0.0003313452617627568 + } + } + }, + { + "id": "umami", + "repo": "umami-software/umami", + "cohort": "mature-oss", + "ref": "227b2554b4a373e63ceb7f48decdc60c8d3e6eaf", + "summary": { + "fileCount": 512, + "directoryCount": 87, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 29677, + "logicalLineCount": 20508, + "functionCount": 911, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": 0, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": 0 + } + } + } + ], + "rank": 11 + } + ] +} diff --git a/benchmarks/sets/rule-signal-mini.json b/benchmarks/sets/rule-signal-mini.json new file mode 100644 index 0000000..10c8b47 --- /dev/null +++ b/benchmarks/sets/rule-signal-mini.json @@ -0,0 +1,135 @@ +{ + "schemaVersion": 1, + "id": "rule-signal-mini", + "name": "Per-rule signal mini benchmark", + "description": "Run each built-in rule in isolation against a smaller pinned cohort of explicit-AI and mature-OSS repositories so we can compare which rules separate the cohorts most cleanly. The mature-OSS repos stay pinned to exact pre-2025 commit SHAs.", + "artifacts": { + "checkoutsDir": "benchmarks/.cache/checkouts/known-ai-vs-solid-oss", + "snapshotPath": "benchmarks/results/rule-signal-mini.json", + "reportPath": "reports/rule-signal-mini.md" + }, + "repos": [ + { + "id": "openusage", + "repo": "robinebers/openusage", + "url": "https://github.com/robinebers/openusage.git", + "cohort": "explicit-ai", + "ref": "857f537a243483acf98ccd9ea32e20b380c63823", + "createdAt": "2026-02-01T07:37:03Z", + "stars": 1715, + "provenance": "README: \"Not a single line of code in this project was read or written by hand. 100% AI-generated, AI-reviewed, AI-shipped\"", + "notes": "Useful negative control because it does not look uniformly slop-heavy on every normalized metric." + }, + { + "id": "devworkbench", + "repo": "jiayun/DevWorkbench", + "url": "https://github.com/jiayun/DevWorkbench.git", + "cohort": "explicit-ai", + "ref": "ea50862107c377b0cdd6e508073ad0330b367576", + "createdAt": "2025-06-25T03:39:53Z", + "stars": 17, + "provenance": "README: \"100% AI-Generated Code - A Totally Vibed Application\"", + "notes": "Desktop-style TypeScript application with explicit AI-generation disclosure." + }, + { + "id": "gstack", + "repo": "garrytan/gstack", + "url": "https://github.com/garrytan/gstack.git", + "cohort": "explicit-ai", + "ref": "6cc094cd4190e3fac65b69fb29e0c3cae7e0f214", + "createdAt": "2026-03-11T21:22:45Z", + "stars": 65613, + "provenance": "User-provided provenance: repo is AI-generated.", + "notes": "Popular AI-coding workflow/tooling repo with a substantial TypeScript surface and very high try/catch density." + }, + { + "id": "hunk", + "repo": "modem-dev/hunk", + "url": "https://github.com/modem-dev/hunk.git", + "cohort": "explicit-ai", + "ref": "b37663f3c3055f860290587865199192e15d9f35", + "createdAt": "2026-03-17T19:16:15Z", + "stars": 352, + "provenance": "User-provided provenance: repo is AI-generated.", + "notes": "Review-first terminal diff viewer that serves as a lower-slop AI-generated benchmark example." + }, + { + "id": "vinext", + "repo": "cloudflare/vinext", + "url": "https://github.com/cloudflare/vinext.git", + "cohort": "explicit-ai", + "ref": "28980b01f1692b47756a6768fc5733712c290f84", + "createdAt": "2026-02-24T05:07:41Z", + "stars": 7709, + "provenance": "README: \"The vast majority of the code, tests, and documentation were written by AI (Claude Code). Humans direct architecture, priorities, and design decisions, but have not reviewed most of the code line-by-line.\"", + "notes": "Polished, company-backed Vite-based reimplementation of the Next.js API surface built as an AI-driven software-development experiment." + }, + { + "id": "agent-ci", + "repo": "redwoodjs/agent-ci", + "url": "https://github.com/redwoodjs/agent-ci.git", + "cohort": "explicit-ai", + "ref": "4de00d69487e275d5bc37e30f7818a9303b22352", + "createdAt": "2026-02-09T20:24:25Z", + "stars": 120, + "provenance": "User-provided provenance: repo is almost entirely Claude-authored.", + "notes": "Local GitHub Actions orchestration layer for agent workflows; useful benchmark for command-heavy CI tooling with strong AI involvement." + }, + { + "id": "payload", + "repo": "payloadcms/payload", + "url": "https://github.com/payloadcms/payload.git", + "cohort": "mature-oss", + "ref": "f3f36d801010f3c95ae74655ff22a09ea66ab1ac", + "createdAt": "2021-01-05T18:49:45Z", + "stars": 41856, + "provenance": "Mature open-source full-stack Next.js CMS/framework by Payload.", + "notes": "Pinned to the latest default-branch commit on or before 2025-01-01 to test a later mature-OSS full-stack app/framework baseline." + }, + { + "id": "execa", + "repo": "sindresorhus/execa", + "url": "https://github.com/sindresorhus/execa.git", + "cohort": "mature-oss", + "ref": "99d1741d2525eca71b986282148bbf2983356428", + "createdAt": "2015-12-05T22:57:03Z", + "stars": 7481, + "provenance": "Long-lived process execution library for Node.js.", + "notes": "Helpful benchmark for command-heavy code that still avoids extreme normalized scores." + }, + { + "id": "umami", + "repo": "umami-software/umami", + "url": "https://github.com/umami-software/umami.git", + "cohort": "mature-oss", + "ref": "227b2554b4a373e63ceb7f48decdc60c8d3e6eaf", + "createdAt": "2020-07-17T07:59:00Z", + "stars": 36012, + "provenance": "Mature privacy-focused analytics platform with a large TypeScript codebase.", + "notes": "Good counterweight for explicit-AI full-stack app comparisons." + }, + { + "id": "vite", + "repo": "vitejs/vite", + "url": "https://github.com/vitejs/vite.git", + "cohort": "mature-oss", + "ref": "a4922537a8d705da7769d30626a0d846511fc124", + "createdAt": "2020-04-21T05:03:57Z", + "stars": 79637, + "provenance": "Mature frontend tooling project with a large TypeScript codebase.", + "notes": "Added as a stress-test baseline to check that the analyzer does not overfit against only smaller mature repos." + }, + { + "id": "astro", + "repo": "withastro/astro", + "url": "https://github.com/withastro/astro.git", + "cohort": "mature-oss", + "ref": "f7068995aa451dced13853789b0d51433c2373b5", + "createdAt": "2021-03-15T17:19:47Z", + "stars": 58212, + "provenance": "Mature web framework with a very large TypeScript/JavaScript monorepo.", + "notes": "Added as a stress-test baseline for a large, actively maintained modern framework codebase." + } + ], + "pairings": [] +} diff --git a/package.json b/package.json index 04897b4..f725e86 100644 --- a/package.json +++ b/package.json @@ -52,6 +52,7 @@ "benchmark:scan": "bun run scripts/benchmark-scan.ts", "benchmark:report": "bun run scripts/benchmark-report.ts", "benchmark:history": "bun run scripts/benchmark-history.ts", + "benchmark:rules": "bun run scripts/benchmark-rule-signals.ts", "benchmark:update": "bun run benchmark:fetch && bun run benchmark:scan && bun run benchmark:report" }, "dependencies": { diff --git a/reports/rule-signal-mini.md b/reports/rule-signal-mini.md new file mode 100644 index 0000000..99a0a03 --- /dev/null +++ b/reports/rule-signal-mini.md @@ -0,0 +1,459 @@ +# Per-rule signal benchmark: Per-rule signal mini benchmark + +Date: 2026-04-19 +Analyzer version: 0.3.0 +Manifest: `benchmarks/sets/rule-signal-mini.json` +Summary: `benchmarks/results/rule-signal-mini.json` +Report: `reports/rule-signal-mini.md` + +## Goal + +Run each built-in rule in isolation against a smaller pinned cohort of explicit-AI and mature-OSS repositories so we can compare which rules separate the cohorts most cleanly. The mature-OSS repos stay pinned to exact pre-2025 commit SHAs. + +Signal score = average AUROC across the six normalized metrics when each rule runs in isolation against this pinned mini cohort. 1.00 means perfect AI-over-OSS separation, while 0.50 means no better than random ordering. + +## Leaderboard + +| Rank | Rule | Signal score | AI hit rate | OSS hit rate | Best metric | Best AUROC | +|---:|---|---:|---:|---:|---|---:| +| 1 | `defensive.error-swallowing` | **0.72** | 6/6 (100%) | 3/5 (60%) | findings / file | 0.87 | +| 2 | `defensive.empty-catch` | **0.67** | 6/6 (100%) | 5/5 (100%) | findings / file | 0.93 | +| 3 | `structure.pass-through-wrappers` | **0.67** | 5/6 (83%) | 4/5 (80%) | findings / file | 0.85 | +| 4 | `defensive.error-obscuring` | **0.66** | 5/6 (83%) | 5/5 (100%) | findings / file | 0.83 | +| 5 | `tests.duplicate-mock-setup` | **0.63** | 3/6 (50%) | 1/5 (20%) | findings / file | 0.70 | +| 6 | `comments.placeholder-comments` | **0.50** | 0/6 (0%) | 0/5 (0%) | findings / file | 0.50 | +| 7 | `defensive.async-noise` | **0.41** | 3/6 (50%) | 4/5 (80%) | findings / function | 0.50 | +| 8 | `structure.barrel-density` | **0.35** | 3/6 (50%) | 5/5 (100%) | findings / function | 0.50 | +| 9 | `structure.duplicate-function-signatures` | **0.32** | 2/6 (33%) | 4/5 (80%) | findings / file | 0.40 | +| 10 | `structure.directory-fanout-hotspot` | **0.22** | 6/6 (100%) | 5/5 (100%) | findings / file | 0.50 | +| 11 | `structure.over-fragmentation` | **0.17** | 1/6 (17%) | 4/5 (80%) | findings / file | 0.18 | + + +## defensive.error-swallowing + +- Rank: **#1** of 11 +- Signal score: **0.72 / 1.00** +- Family / severity / scope: `defensive` / `strong` / `file` +- Best metric: findings / file (0.87) + +### Cohort summary + +| Cohort | Hit rate | Median findings | Median repo score | Median score / file | Median score / KLOC | Median findings / KLOC | +|---|---:|---:|---:|---:|---:|---:| +| explicit-ai | 6/6 (100%) | 3.00 | 9.10 | 0.07 | 0.53 | 0.24 | +| mature-oss | 3/5 (60%) | 6.00 | 13.80 | 0.01 | 0.17 | 0.09 | + +### AUROC by normalized metric + +- score / file: 0.87 +- score / KLOC: 0.80 +- score / function: 0.50 +- findings / file: 0.87 +- findings / KLOC: 0.77 +- findings / function: 0.50 + +### Repo results + +| Repo | Cohort | Ref | Findings | Repo score | Score / file | Score / KLOC | Findings / KLOC | +|---|---|---|---:|---:|---:|---:|---:| +| [jiayun/DevWorkbench](https://github.com/jiayun/DevWorkbench) | explicit-ai | `ea50862` | 10 | 17.40 | 0.54 | 5.83 | 3.35 | +| [garrytan/gstack](https://github.com/garrytan/gstack) | explicit-ai | `6cc094c` | 8 | 37.40 | 0.21 | 1.97 | 0.42 | +| [robinebers/openusage](https://github.com/robinebers/openusage) | explicit-ai | `857f537` | 3 | 14.00 | 0.10 | 0.63 | 0.13 | +| [redwoodjs/agent-ci](https://github.com/redwoodjs/agent-ci) | explicit-ai | `4de00d6` | 3 | 3.60 | 0.04 | 0.42 | 0.35 | +| [modem-dev/hunk](https://github.com/modem-dev/hunk) | explicit-ai | `b37663f` | 1 | 3.00 | 0.02 | 0.22 | 0.07 | +| [cloudflare/vinext](https://github.com/cloudflare/vinext) | explicit-ai | `28980b0` | 2 | 4.20 | 0.00 | 0.07 | 0.03 | +| [vitejs/vite](https://github.com/vitejs/vite) | mature-oss | `a492253` | 6 | 19.20 | 0.02 | 0.52 | 0.16 | +| [payloadcms/payload](https://github.com/payloadcms/payload) | mature-oss | `f3f36d8` | 29 | 84.80 | 0.02 | 0.34 | 0.12 | +| [withastro/astro](https://github.com/withastro/astro) | mature-oss | `f706899` | 7 | 13.80 | 0.01 | 0.17 | 0.09 | +| [sindresorhus/execa](https://github.com/sindresorhus/execa) | mature-oss | `99d1741` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | +| [umami-software/umami](https://github.com/umami-software/umami) | mature-oss | `227b255` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | + +## defensive.empty-catch + +- Rank: **#2** of 11 +- Signal score: **0.67 / 1.00** +- Family / severity / scope: `defensive` / `strong` / `file` +- Best metric: findings / file (0.93) + +### Cohort summary + +| Cohort | Hit rate | Median findings | Median repo score | Median score / file | Median score / KLOC | Median findings / KLOC | +|---|---:|---:|---:|---:|---:|---:| +| explicit-ai | 6/6 (100%) | 10.00 | 42.30 | 0.12 | 1.23 | 0.31 | +| mature-oss | 5/5 (100%) | 13.00 | 45.10 | 0.04 | 1.21 | 0.35 | + +### AUROC by normalized metric + +- score / file: 0.87 +- score / KLOC: 0.63 +- score / function: 0.50 +- findings / file: 0.93 +- findings / KLOC: 0.57 +- findings / function: 0.50 + +### Repo results + +| Repo | Cohort | Ref | Findings | Repo score | Score / file | Score / KLOC | Findings / KLOC | +|---|---|---|---:|---:|---:|---:|---:| +| [garrytan/gstack](https://github.com/garrytan/gstack) | explicit-ai | `6cc094c` | 55 | 301.30 | 1.71 | 15.89 | 2.90 | +| [redwoodjs/agent-ci](https://github.com/redwoodjs/agent-ci) | explicit-ai | `4de00d6` | 18 | 69.30 | 0.74 | 8.18 | 2.12 | +| [modem-dev/hunk](https://github.com/modem-dev/hunk) | explicit-ai | `b37663f` | 4 | 18.40 | 0.11 | 1.36 | 0.29 | +| [cloudflare/vinext](https://github.com/cloudflare/vinext) | explicit-ai | `28980b0` | 16 | 66.20 | 0.06 | 1.11 | 0.27 | +| [robinebers/openusage](https://github.com/robinebers/openusage) | explicit-ai | `857f537` | 4 | 17.50 | 0.13 | 0.79 | 0.18 | +| [jiayun/DevWorkbench](https://github.com/jiayun/DevWorkbench) | explicit-ai | `ea50862` | 1 | 1.90 | 0.06 | 0.64 | 0.33 | +| [sindresorhus/execa](https://github.com/sindresorhus/execa) | mature-oss | `99d1741` | 11 | 43.90 | 0.08 | 2.15 | 0.54 | +| [withastro/astro](https://github.com/withastro/astro) | mature-oss | `f706899` | 39 | 133.90 | 0.07 | 1.65 | 0.48 | +| [vitejs/vite](https://github.com/vitejs/vite) | mature-oss | `a492253` | 13 | 45.10 | 0.04 | 1.21 | 0.35 | +| [umami-software/umami](https://github.com/umami-software/umami) | mature-oss | `227b255` | 4 | 10.80 | 0.02 | 0.53 | 0.20 | +| [payloadcms/payload](https://github.com/payloadcms/payload) | mature-oss | `f3f36d8` | 21 | 71.20 | 0.02 | 0.28 | 0.08 | + +## structure.pass-through-wrappers + +- Rank: **#3** of 11 +- Signal score: **0.67 / 1.00** +- Family / severity / scope: `structure` / `strong` / `file` +- Best metric: findings / file (0.85) + +### Cohort summary + +| Cohort | Hit rate | Median findings | Median repo score | Median score / file | Median score / KLOC | Median findings / KLOC | +|---|---:|---:|---:|---:|---:|---:| +| explicit-ai | 5/6 (83%) | 5.50 | 13.00 | 0.08 | 1.12 | 0.35 | +| mature-oss | 4/5 (80%) | 13.00 | 41.00 | 0.02 | 0.39 | 0.15 | + +### AUROC by normalized metric + +- score / file: 0.85 +- score / KLOC: 0.65 +- score / function: 0.50 +- findings / file: 0.85 +- findings / KLOC: 0.65 +- findings / function: 0.50 + +### Repo results + +| Repo | Cohort | Ref | Findings | Repo score | Score / file | Score / KLOC | Findings / KLOC | +|---|---|---|---:|---:|---:|---:|---:| +| [jiayun/DevWorkbench](https://github.com/jiayun/DevWorkbench) | explicit-ai | `ea50862` | 1 | 5.00 | 0.16 | 1.67 | 0.33 | +| [cloudflare/vinext](https://github.com/cloudflare/vinext) | explicit-ai | `28980b0` | 29 | 85.00 | 0.08 | 1.43 | 0.49 | +| [modem-dev/hunk](https://github.com/modem-dev/hunk) | explicit-ai | `b37663f` | 6 | 19.00 | 0.11 | 1.40 | 0.44 | +| [garrytan/gstack](https://github.com/garrytan/gstack) | explicit-ai | `6cc094c` | 7 | 16.00 | 0.09 | 0.84 | 0.37 | +| [robinebers/openusage](https://github.com/robinebers/openusage) | explicit-ai | `857f537` | 5 | 10.00 | 0.07 | 0.45 | 0.22 | +| [redwoodjs/agent-ci](https://github.com/redwoodjs/agent-ci) | explicit-ai | `4de00d6` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | +| [vitejs/vite](https://github.com/vitejs/vite) | mature-oss | `a492253` | 25 | 65.00 | 0.05 | 1.74 | 0.67 | +| [withastro/astro](https://github.com/withastro/astro) | mature-oss | `f706899` | 24 | 62.00 | 0.03 | 0.77 | 0.30 | +| [umami-software/umami](https://github.com/umami-software/umami) | mature-oss | `227b255` | 3 | 8.00 | 0.02 | 0.39 | 0.15 | +| [payloadcms/payload](https://github.com/payloadcms/payload) | mature-oss | `f3f36d8` | 13 | 41.00 | 0.01 | 0.16 | 0.05 | +| [sindresorhus/execa](https://github.com/sindresorhus/execa) | mature-oss | `99d1741` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | + +## defensive.error-obscuring + +- Rank: **#4** of 11 +- Signal score: **0.66 / 1.00** +- Family / severity / scope: `defensive` / `strong` / `file` +- Best metric: findings / file (0.83) + +### Cohort summary + +| Cohort | Hit rate | Median findings | Median repo score | Median score / file | Median score / KLOC | Median findings / KLOC | +|---|---:|---:|---:|---:|---:|---:| +| explicit-ai | 5/6 (83%) | 4.50 | 9.40 | 0.06 | 0.82 | 0.38 | +| mature-oss | 5/5 (100%) | 5.00 | 14.40 | 0.02 | 0.39 | 0.13 | + +### AUROC by normalized metric + +- score / file: 0.80 +- score / KLOC: 0.60 +- score / function: 0.50 +- findings / file: 0.83 +- findings / KLOC: 0.70 +- findings / function: 0.50 + +### Repo results + +| Repo | Cohort | Ref | Findings | Repo score | Score / file | Score / KLOC | Findings / KLOC | +|---|---|---|---:|---:|---:|---:|---:| +| [garrytan/gstack](https://github.com/garrytan/gstack) | explicit-ai | `6cc094c` | 19 | 49.40 | 0.28 | 2.61 | 1.00 | +| [cloudflare/vinext](https://github.com/cloudflare/vinext) | explicit-ai | `28980b0` | 24 | 69.40 | 0.06 | 1.17 | 0.40 | +| [modem-dev/hunk](https://github.com/modem-dev/hunk) | explicit-ai | `b37663f` | 6 | 13.00 | 0.08 | 0.96 | 0.44 | +| [redwoodjs/agent-ci](https://github.com/redwoodjs/agent-ci) | explicit-ai | `4de00d6` | 3 | 5.80 | 0.06 | 0.68 | 0.35 | +| [robinebers/openusage](https://github.com/robinebers/openusage) | explicit-ai | `857f537` | 2 | 4.20 | 0.03 | 0.19 | 0.09 | +| [jiayun/DevWorkbench](https://github.com/jiayun/DevWorkbench) | explicit-ai | `ea50862` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | +| [withastro/astro](https://github.com/withastro/astro) | mature-oss | `f706899` | 26 | 80.70 | 0.04 | 1.00 | 0.32 | +| [umami-software/umami](https://github.com/umami-software/umami) | mature-oss | `227b255` | 4 | 11.50 | 0.02 | 0.56 | 0.20 | +| [vitejs/vite](https://github.com/vitejs/vite) | mature-oss | `a492253` | 5 | 14.40 | 0.01 | 0.39 | 0.13 | +| [payloadcms/payload](https://github.com/payloadcms/payload) | mature-oss | `f3f36d8` | 28 | 77.20 | 0.02 | 0.31 | 0.11 | +| [sindresorhus/execa](https://github.com/sindresorhus/execa) | mature-oss | `99d1741` | 1 | 5.00 | 0.01 | 0.25 | 0.05 | + +## tests.duplicate-mock-setup + +- Rank: **#5** of 11 +- Signal score: **0.63 / 1.00** +- Family / severity / scope: `tests` / `medium` / `file` +- Best metric: findings / file (0.70) + +### Cohort summary + +| Cohort | Hit rate | Median findings | Median repo score | Median score / file | Median score / KLOC | Median findings / KLOC | +|---|---:|---:|---:|---:|---:|---:| +| explicit-ai | 3/6 (50%) | 1.50 | 4.50 | 0.04 | 0.53 | 0.15 | +| mature-oss | 1/5 (20%) | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | + +### AUROC by normalized metric + +- score / file: 0.70 +- score / KLOC: 0.70 +- score / function: 0.50 +- findings / file: 0.70 +- findings / KLOC: 0.70 +- findings / function: 0.50 + +### Repo results + +| Repo | Cohort | Ref | Findings | Repo score | Score / file | Score / KLOC | Findings / KLOC | +|---|---|---|---:|---:|---:|---:|---:| +| [robinebers/openusage](https://github.com/robinebers/openusage) | explicit-ai | `857f537` | 25 | 112.00 | 0.81 | 5.03 | 1.12 | +| [cloudflare/vinext](https://github.com/cloudflare/vinext) | explicit-ai | `28980b0` | 18 | 90.00 | 0.08 | 1.51 | 0.30 | +| [redwoodjs/agent-ci](https://github.com/redwoodjs/agent-ci) | explicit-ai | `4de00d6` | 3 | 9.00 | 0.10 | 1.06 | 0.35 | +| [garrytan/gstack](https://github.com/garrytan/gstack) | explicit-ai | `6cc094c` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | +| [jiayun/DevWorkbench](https://github.com/jiayun/DevWorkbench) | explicit-ai | `ea50862` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | +| [modem-dev/hunk](https://github.com/modem-dev/hunk) | explicit-ai | `b37663f` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | +| [payloadcms/payload](https://github.com/payloadcms/payload) | mature-oss | `f3f36d8` | 6 | 22.50 | 0.01 | 0.09 | 0.02 | +| [sindresorhus/execa](https://github.com/sindresorhus/execa) | mature-oss | `99d1741` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | +| [umami-software/umami](https://github.com/umami-software/umami) | mature-oss | `227b255` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | +| [vitejs/vite](https://github.com/vitejs/vite) | mature-oss | `a492253` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | +| [withastro/astro](https://github.com/withastro/astro) | mature-oss | `f706899` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | + +## comments.placeholder-comments + +- Rank: **#6** of 11 +- Signal score: **0.50 / 1.00** +- Family / severity / scope: `comments` / `weak` / `file` +- Best metric: findings / file (0.50) + +### Cohort summary + +| Cohort | Hit rate | Median findings | Median repo score | Median score / file | Median score / KLOC | Median findings / KLOC | +|---|---:|---:|---:|---:|---:|---:| +| explicit-ai | 0/6 (0%) | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | +| mature-oss | 0/5 (0%) | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | + +### AUROC by normalized metric + +- score / file: 0.50 +- score / KLOC: 0.50 +- score / function: 0.50 +- findings / file: 0.50 +- findings / KLOC: 0.50 +- findings / function: 0.50 + +### Repo results + +| Repo | Cohort | Ref | Findings | Repo score | Score / file | Score / KLOC | Findings / KLOC | +|---|---|---|---:|---:|---:|---:|---:| +| [cloudflare/vinext](https://github.com/cloudflare/vinext) | explicit-ai | `28980b0` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | +| [garrytan/gstack](https://github.com/garrytan/gstack) | explicit-ai | `6cc094c` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | +| [jiayun/DevWorkbench](https://github.com/jiayun/DevWorkbench) | explicit-ai | `ea50862` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | +| [modem-dev/hunk](https://github.com/modem-dev/hunk) | explicit-ai | `b37663f` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | +| [redwoodjs/agent-ci](https://github.com/redwoodjs/agent-ci) | explicit-ai | `4de00d6` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | +| [robinebers/openusage](https://github.com/robinebers/openusage) | explicit-ai | `857f537` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | +| [payloadcms/payload](https://github.com/payloadcms/payload) | mature-oss | `f3f36d8` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | +| [sindresorhus/execa](https://github.com/sindresorhus/execa) | mature-oss | `99d1741` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | +| [umami-software/umami](https://github.com/umami-software/umami) | mature-oss | `227b255` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | +| [vitejs/vite](https://github.com/vitejs/vite) | mature-oss | `a492253` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | +| [withastro/astro](https://github.com/withastro/astro) | mature-oss | `f706899` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | + +## defensive.async-noise + +- Rank: **#7** of 11 +- Signal score: **0.41 / 1.00** +- Family / severity / scope: `defensive` / `medium` / `file` +- Best metric: findings / function (0.50) + +### Cohort summary + +| Cohort | Hit rate | Median findings | Median repo score | Median score / file | Median score / KLOC | Median findings / KLOC | +|---|---:|---:|---:|---:|---:|---:| +| explicit-ai | 3/6 (50%) | 0.50 | 0.38 | 0.00 | 0.03 | 0.03 | +| mature-oss | 4/5 (80%) | 6.00 | 9.00 | 0.00 | 0.07 | 0.10 | + +### AUROC by normalized metric + +- score / file: 0.42 +- score / KLOC: 0.35 +- score / function: 0.50 +- findings / file: 0.42 +- findings / KLOC: 0.28 +- findings / function: 0.50 + +### Repo results + +| Repo | Cohort | Ref | Findings | Repo score | Score / file | Score / KLOC | Findings / KLOC | +|---|---|---|---:|---:|---:|---:|---:| +| [garrytan/gstack](https://github.com/garrytan/gstack) | explicit-ai | `6cc094c` | 2 | 4.50 | 0.03 | 0.24 | 0.11 | +| [cloudflare/vinext](https://github.com/cloudflare/vinext) | explicit-ai | `28980b0` | 4 | 6.00 | 0.01 | 0.10 | 0.07 | +| [modem-dev/hunk](https://github.com/modem-dev/hunk) | explicit-ai | `b37663f` | 1 | 0.75 | 0.00 | 0.06 | 0.07 | +| [jiayun/DevWorkbench](https://github.com/jiayun/DevWorkbench) | explicit-ai | `ea50862` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | +| [redwoodjs/agent-ci](https://github.com/redwoodjs/agent-ci) | explicit-ai | `4de00d6` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | +| [robinebers/openusage](https://github.com/robinebers/openusage) | explicit-ai | `857f537` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | +| [vitejs/vite](https://github.com/vitejs/vite) | mature-oss | `a492253` | 8 | 11.25 | 0.01 | 0.30 | 0.21 | +| [withastro/astro](https://github.com/withastro/astro) | mature-oss | `f706899` | 11 | 18.00 | 0.01 | 0.22 | 0.14 | +| [umami-software/umami](https://github.com/umami-software/umami) | mature-oss | `227b255` | 2 | 1.50 | 0.00 | 0.07 | 0.10 | +| [payloadcms/payload](https://github.com/payloadcms/payload) | mature-oss | `f3f36d8` | 6 | 9.00 | 0.00 | 0.04 | 0.02 | +| [sindresorhus/execa](https://github.com/sindresorhus/execa) | mature-oss | `99d1741` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | + +## structure.barrel-density + +- Rank: **#8** of 11 +- Signal score: **0.35 / 1.00** +- Family / severity / scope: `structure` / `medium` / `file` +- Best metric: findings / function (0.50) + +### Cohort summary + +| Cohort | Hit rate | Median findings | Median repo score | Median score / file | Median score / KLOC | Median findings / KLOC | +|---|---:|---:|---:|---:|---:|---:| +| explicit-ai | 3/6 (50%) | 0.50 | 1.00 | 0.00 | 0.06 | 0.03 | +| mature-oss | 5/5 (100%) | 8.00 | 19.00 | 0.02 | 0.44 | 0.15 | + +### AUROC by normalized metric + +- score / file: 0.33 +- score / KLOC: 0.20 +- score / function: 0.50 +- findings / file: 0.33 +- findings / KLOC: 0.23 +- findings / function: 0.50 + +### Repo results + +| Repo | Cohort | Ref | Findings | Repo score | Score / file | Score / KLOC | Findings / KLOC | +|---|---|---|---:|---:|---:|---:|---:| +| [jiayun/DevWorkbench](https://github.com/jiayun/DevWorkbench) | explicit-ai | `ea50862` | 1 | 3.00 | 0.09 | 1.00 | 0.33 | +| [redwoodjs/agent-ci](https://github.com/redwoodjs/agent-ci) | explicit-ai | `4de00d6` | 1 | 2.00 | 0.02 | 0.24 | 0.12 | +| [cloudflare/vinext](https://github.com/cloudflare/vinext) | explicit-ai | `28980b0` | 3 | 7.00 | 0.01 | 0.12 | 0.05 | +| [garrytan/gstack](https://github.com/garrytan/gstack) | explicit-ai | `6cc094c` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | +| [modem-dev/hunk](https://github.com/modem-dev/hunk) | explicit-ai | `b37663f` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | +| [robinebers/openusage](https://github.com/robinebers/openusage) | explicit-ai | `857f537` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | +| [withastro/astro](https://github.com/withastro/astro) | mature-oss | `f706899` | 27 | 68.50 | 0.04 | 0.85 | 0.33 | +| [vitejs/vite](https://github.com/vitejs/vite) | mature-oss | `a492253` | 8 | 19.00 | 0.02 | 0.51 | 0.21 | +| [umami-software/umami](https://github.com/umami-software/umami) | mature-oss | `227b255` | 3 | 9.00 | 0.02 | 0.44 | 0.15 | +| [payloadcms/payload](https://github.com/payloadcms/payload) | mature-oss | `f3f36d8` | 33 | 83.00 | 0.02 | 0.33 | 0.13 | +| [sindresorhus/execa](https://github.com/sindresorhus/execa) | mature-oss | `99d1741` | 1 | 3.00 | 0.01 | 0.15 | 0.05 | + +## structure.duplicate-function-signatures + +- Rank: **#9** of 11 +- Signal score: **0.32 / 1.00** +- Family / severity / scope: `structure` / `medium` / `file` +- Best metric: findings / file (0.40) + +### Cohort summary + +| Cohort | Hit rate | Median findings | Median repo score | Median score / file | Median score / KLOC | Median findings / KLOC | +|---|---:|---:|---:|---:|---:|---:| +| explicit-ai | 2/6 (33%) | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | +| mature-oss | 4/5 (80%) | 12.00 | 25.75 | 0.04 | 0.94 | 0.35 | + +### AUROC by normalized metric + +- score / file: 0.40 +- score / KLOC: 0.30 +- score / function: 0.27 +- findings / file: 0.40 +- findings / KLOC: 0.27 +- findings / function: 0.30 + +### Repo results + +| Repo | Cohort | Ref | Findings | Repo score | Score / file | Score / KLOC | Findings / KLOC | +|---|---|---|---:|---:|---:|---:|---:| +| [cloudflare/vinext](https://github.com/cloudflare/vinext) | explicit-ai | `28980b0` | 50 | 143.25 | 0.13 | 2.41 | 0.84 | +| [robinebers/openusage](https://github.com/robinebers/openusage) | explicit-ai | `857f537` | 5 | 11.25 | 0.08 | 0.51 | 0.22 | +| [garrytan/gstack](https://github.com/garrytan/gstack) | explicit-ai | `6cc094c` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | +| [jiayun/DevWorkbench](https://github.com/jiayun/DevWorkbench) | explicit-ai | `ea50862` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | +| [modem-dev/hunk](https://github.com/modem-dev/hunk) | explicit-ai | `b37663f` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | +| [redwoodjs/agent-ci](https://github.com/redwoodjs/agent-ci) | explicit-ai | `4de00d6` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | +| [umami-software/umami](https://github.com/umami-software/umami) | mature-oss | `227b255` | 12 | 25.75 | 0.05 | 1.26 | 0.59 | +| [payloadcms/payload](https://github.com/payloadcms/payload) | mature-oss | `f3f36d8` | 131 | 309.00 | 0.07 | 1.23 | 0.52 | +| [withastro/astro](https://github.com/withastro/astro) | mature-oss | `f706899` | 28 | 76.00 | 0.04 | 0.94 | 0.35 | +| [sindresorhus/execa](https://github.com/sindresorhus/execa) | mature-oss | `99d1741` | 6 | 7.50 | 0.01 | 0.37 | 0.29 | +| [vitejs/vite](https://github.com/vitejs/vite) | mature-oss | `a492253` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | + +## structure.directory-fanout-hotspot + +- Rank: **#10** of 11 +- Signal score: **0.22 / 1.00** +- Family / severity / scope: `structure` / `medium` / `directory` +- Best metric: findings / file (0.50) + +### Cohort summary + +| Cohort | Hit rate | Median findings | Median repo score | Median score / file | Median score / KLOC | Median findings / KLOC | +|---|---:|---:|---:|---:|---:|---:| +| explicit-ai | 6/6 (100%) | 1.50 | 6.10 | 0.05 | 0.54 | 0.17 | +| mature-oss | 5/5 (100%) | 21.00 | 76.98 | 0.04 | 1.13 | 0.31 | + +### AUROC by normalized metric + +- score / file: 0.47 +- score / KLOC: 0.13 +- score / function: 0.10 +- findings / file: 0.50 +- findings / KLOC: 0.10 +- findings / function: 0.03 + +### Repo results + +| Repo | Cohort | Ref | Findings | Repo score | Score / file | Score / KLOC | Findings / KLOC | +|---|---|---|---:|---:|---:|---:|---:| +| [jiayun/DevWorkbench](https://github.com/jiayun/DevWorkbench) | explicit-ai | `ea50862` | 1 | 4.83 | 0.15 | 1.62 | 0.33 | +| [cloudflare/vinext](https://github.com/cloudflare/vinext) | explicit-ai | `28980b0` | 15 | 56.28 | 0.05 | 0.95 | 0.25 | +| [modem-dev/hunk](https://github.com/modem-dev/hunk) | explicit-ai | `b37663f` | 3 | 9.72 | 0.06 | 0.72 | 0.22 | +| [redwoodjs/agent-ci](https://github.com/redwoodjs/agent-ci) | explicit-ai | `4de00d6` | 1 | 3.13 | 0.03 | 0.37 | 0.12 | +| [robinebers/openusage](https://github.com/robinebers/openusage) | explicit-ai | `857f537` | 2 | 7.36 | 0.05 | 0.33 | 0.09 | +| [garrytan/gstack](https://github.com/garrytan/gstack) | explicit-ai | `6cc094c` | 1 | 3.00 | 0.02 | 0.16 | 0.05 | +| [vitejs/vite](https://github.com/vitejs/vite) | mature-oss | `a492253` | 21 | 76.98 | 0.06 | 2.07 | 0.56 | +| [sindresorhus/execa](https://github.com/sindresorhus/execa) | mature-oss | `99d1741` | 7 | 25.68 | 0.04 | 1.26 | 0.34 | +| [payloadcms/payload](https://github.com/payloadcms/payload) | mature-oss | `f3f36d8` | 77 | 285.92 | 0.07 | 1.13 | 0.31 | +| [umami-software/umami](https://github.com/umami-software/umami) | mature-oss | `227b255` | 6 | 22.79 | 0.04 | 1.11 | 0.29 | +| [withastro/astro](https://github.com/withastro/astro) | mature-oss | `f706899` | 24 | 86.61 | 0.04 | 1.07 | 0.30 | + +## structure.over-fragmentation + +- Rank: **#11** of 11 +- Signal score: **0.17 / 1.00** +- Family / severity / scope: `structure` / `strong` / `directory` +- Best metric: findings / file (0.18) + +### Cohort summary + +| Cohort | Hit rate | Median findings | Median repo score | Median score / file | Median score / KLOC | Median findings / KLOC | +|---|---:|---:|---:|---:|---:|---:| +| explicit-ai | 1/6 (17%) | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | +| mature-oss | 4/5 (80%) | 2.00 | 13.93 | 0.01 | 0.14 | 0.02 | + +### AUROC by normalized metric + +- score / file: 0.18 +- score / KLOC: 0.18 +- score / function: 0.15 +- findings / file: 0.18 +- findings / KLOC: 0.18 +- findings / function: 0.15 + +### Repo results + +| Repo | Cohort | Ref | Findings | Repo score | Score / file | Score / KLOC | Findings / KLOC | +|---|---|---|---:|---:|---:|---:|---:| +| [cloudflare/vinext](https://github.com/cloudflare/vinext) | explicit-ai | `28980b0` | 3 | 20.12 | 0.02 | 0.34 | 0.05 | +| [garrytan/gstack](https://github.com/garrytan/gstack) | explicit-ai | `6cc094c` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | +| [jiayun/DevWorkbench](https://github.com/jiayun/DevWorkbench) | explicit-ai | `ea50862` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | +| [modem-dev/hunk](https://github.com/modem-dev/hunk) | explicit-ai | `b37663f` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | +| [redwoodjs/agent-ci](https://github.com/redwoodjs/agent-ci) | explicit-ai | `4de00d6` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | +| [robinebers/openusage](https://github.com/robinebers/openusage) | explicit-ai | `857f537` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | +| [vitejs/vite](https://github.com/vitejs/vite) | mature-oss | `a492253` | 8 | 54.02 | 0.04 | 1.45 | 0.21 | +| [sindresorhus/execa](https://github.com/sindresorhus/execa) | mature-oss | `99d1741` | 2 | 13.93 | 0.02 | 0.68 | 0.10 | +| [payloadcms/payload](https://github.com/payloadcms/payload) | mature-oss | `f3f36d8` | 5 | 34.86 | 0.01 | 0.14 | 0.02 | +| [withastro/astro](https://github.com/withastro/astro) | mature-oss | `f706899` | 1 | 6.50 | 0.00 | 0.08 | 0.01 | +| [umami-software/umami](https://github.com/umami-software/umami) | mature-oss | `227b255` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | diff --git a/scripts/benchmark-fetch.ts b/scripts/benchmark-fetch.ts index f006d19..61dc69c 100644 --- a/scripts/benchmark-fetch.ts +++ b/scripts/benchmark-fetch.ts @@ -1,69 +1,15 @@ -import { spawnSync } from "node:child_process"; -import { access, mkdir } from "node:fs/promises"; -import path from "node:path"; import { getOption } from "./lib/get-option"; +import { ensurePinnedCheckouts } from "../src/benchmarks/checkouts"; import { DEFAULT_BENCHMARK_SET_PATH, loadBenchmarkSet, resolveProjectPath, } from "../src/benchmarks/manifest"; -function run(command: string, args: string[], cwd?: string): string { - const result = spawnSync(command, args, { - cwd, - encoding: "utf8", - stdio: ["ignore", "pipe", "pipe"], - }); - - if (result.status !== 0) { - throw new Error( - [`Command failed: ${command} ${args.join(" ")}`, result.stdout, result.stderr] - .filter(Boolean) - .join("\n"), - ); - } - - return result.stdout.trim(); -} - -async function pathExists(targetPath: string): Promise { - try { - await access(targetPath); - return true; - } catch { - return false; - } -} - const manifestPath = getOption(process.argv.slice(2), "--manifest", DEFAULT_BENCHMARK_SET_PATH); const benchmarkSet = await loadBenchmarkSet(manifestPath); const checkoutsDir = resolveProjectPath(benchmarkSet.artifacts.checkoutsDir); -await mkdir(checkoutsDir, { recursive: true }); - -for (const repo of benchmarkSet.repos) { - const checkoutPath = path.join(checkoutsDir, repo.id); - const gitPath = path.join(checkoutPath, ".git"); - - console.log(`\n==> ${repo.id} (${repo.repo})`); - - if (!(await pathExists(gitPath))) { - console.log(`cloning ${repo.url}`); - run("git", ["clone", "--filter=blob:none", "--no-checkout", repo.url, checkoutPath]); - } - - run("git", ["remote", "set-url", "origin", repo.url], checkoutPath); - run("git", ["fetch", "--force", "--prune", "--filter=blob:none", "origin"], checkoutPath); - run("git", ["checkout", "--force", "--detach", repo.ref], checkoutPath); - run("git", ["reset", "--hard", repo.ref], checkoutPath); - run("git", ["clean", "-fdx"], checkoutPath); - - const actualRef = run("git", ["rev-parse", "HEAD"], checkoutPath); - if (actualRef !== repo.ref) { - throw new Error(`Pinned ref mismatch for ${repo.id}: expected ${repo.ref}, got ${actualRef}`); - } - - console.log(`ready at ${actualRef.slice(0, 7)}`); -} +await ensurePinnedCheckouts(checkoutsDir, benchmarkSet.repos); console.log(`\nPinned benchmark checkouts are ready in ${checkoutsDir}`); diff --git a/scripts/benchmark-rule-signals.ts b/scripts/benchmark-rule-signals.ts new file mode 100644 index 0000000..106d32f --- /dev/null +++ b/scripts/benchmark-rule-signals.ts @@ -0,0 +1,153 @@ +/** + * End-to-end entrypoint for the per-rule signal benchmark. + * + * The job: + * 1. ensures the pinned mini cohort exists locally, + * 2. runs each built-in rule in isolation against that cohort, + * 3. writes JSON + markdown artifacts, and + * 4. refreshes the short benchmark snippet inside every built-in rule README. + */ +import { mkdir, readFile, writeFile } from "node:fs/promises"; +import path from "node:path"; +import packageJson from "../package.json"; +import { getOption } from "./lib/get-option"; +import { ensurePinnedCheckouts, readHeadRef } from "../src/benchmarks/checkouts"; +import { + createRuleBenchmarkRegistry, + createRuleSignalBenchmarkSummary, + ruleIdToSlug, + type RuleSignalBenchmarkRun, + type RuleSignalBenchmarkSummary, +} from "../src/benchmarks/rule-signal"; +import { + renderRuleSignalReadmeSection, + upsertRuleSignalReadmeSection, +} from "../src/benchmarks/rule-signal-readme"; +import { renderRuleSignalBenchmarkReport } from "../src/benchmarks/rule-signal-report"; +import { loadBenchmarkSet, resolveProjectPath } from "../src/benchmarks/manifest"; +import type { BenchmarkSet } from "../src/benchmarks/types"; +import { DEFAULT_CONFIG } from "../src/config"; +import { analyzeRepository } from "../src/core/engine"; +import type { RulePlugin } from "../src/core/types"; +import { createDefaultRegistry } from "../src/default-registry"; + +const DEFAULT_RULE_SIGNAL_SET_PATH = path.resolve( + process.cwd(), + "benchmarks/sets/rule-signal-mini.json", +); + +/** + * Runs one rule in isolation across the full pinned mini cohort. + */ +async function analyzeRuleAcrossSet( + rule: RulePlugin, + checkoutsDir: string, + benchmarkSet: BenchmarkSet, + baseRegistry: ReturnType, +): Promise { + console.log(`\n## ${rule.id}`); + + const registry = createRuleBenchmarkRegistry(baseRegistry, rule.id); + const analyses = []; + + for (const repo of benchmarkSet.repos) { + const checkoutPath = path.join(checkoutsDir, repo.id); + const actualRef = readHeadRef(checkoutPath); + if (actualRef !== repo.ref) { + throw new Error(`Pinned ref mismatch for ${repo.id}: expected ${repo.ref}, got ${actualRef}`); + } + + console.log(`scanning ${repo.id} @ ${actualRef.slice(0, 7)}`); + const result = await analyzeRepository(checkoutPath, DEFAULT_CONFIG, registry); + analyses.push({ spec: repo, result }); + } + + return { + rule: { + id: rule.id, + family: rule.family, + severity: rule.severity, + scope: rule.scope, + requires: [...rule.requires], + }, + analyses, + }; +} + +/** + * Runs the isolated benchmark for every built-in rule in registry order. + */ +async function collectRuleSignalRuns( + checkoutsDir: string, + benchmarkSet: BenchmarkSet, +): Promise { + const baseRegistry = createDefaultRegistry(); + const rules = baseRegistry.getRules(); + const runs: RuleSignalBenchmarkRun[] = []; + + for (const rule of rules) { + runs.push(await analyzeRuleAcrossSet(rule, checkoutsDir, benchmarkSet, baseRegistry)); + } + + return runs; +} + +/** Writes the generated JSON summary and markdown report to disk. */ +async function writeRuleSignalArtifacts( + summaryPath: string, + reportPath: string, + summary: RuleSignalBenchmarkSummary, + report: string, +): Promise { + await mkdir(path.dirname(summaryPath), { recursive: true }); + await writeFile(summaryPath, `${JSON.stringify(summary, null, 2)}\n`); + await mkdir(path.dirname(reportPath), { recursive: true }); + await writeFile(reportPath, `${report}\n`); +} + +/** + * Refreshes the generated benchmark section in every built-in rule README. + */ +async function updateRuleReadmes(summary: RuleSignalBenchmarkSummary): Promise { + for (const rule of summary.rules) { + const readmePath = resolveProjectPath(`src/rules/${ruleIdToSlug(rule.ruleId)}/README.md`); + const current = await readFile(readmePath, "utf8"); + const next = upsertRuleSignalReadmeSection( + current, + renderRuleSignalReadmeSection(summary, rule), + ); + + if (next !== current) { + await writeFile(readmePath, next); + } + } +} + +/** Coordinates the full rule-signal benchmark refresh. */ +async function main(): Promise { + const manifestPath = getOption(process.argv.slice(2), "--manifest", DEFAULT_RULE_SIGNAL_SET_PATH); + const benchmarkSet = await loadBenchmarkSet(manifestPath); + const checkoutsDir = resolveProjectPath(benchmarkSet.artifacts.checkoutsDir); + const summaryPath = resolveProjectPath(benchmarkSet.artifacts.snapshotPath); + const reportPath = resolveProjectPath(benchmarkSet.artifacts.reportPath); + const manifestProjectPath = path.relative(process.cwd(), manifestPath); + + await ensurePinnedCheckouts(checkoutsDir, benchmarkSet.repos); + + const runs = await collectRuleSignalRuns(checkoutsDir, benchmarkSet); + const summary = createRuleSignalBenchmarkSummary(benchmarkSet, runs, packageJson.version, { + manifestPath: manifestProjectPath, + summaryPath: benchmarkSet.artifacts.snapshotPath, + reportPath: benchmarkSet.artifacts.reportPath, + }); + const report = renderRuleSignalBenchmarkReport(benchmarkSet, summary); + + await writeRuleSignalArtifacts(summaryPath, reportPath, summary, report); + await updateRuleReadmes(summary); + + console.log(`\nWrote rule signal summary to ${summaryPath}`); + console.log(`Wrote rule signal report to ${reportPath}`); + console.log("Updated rule README benchmark sections."); +} + +await main(); diff --git a/scripts/benchmark-scan.ts b/scripts/benchmark-scan.ts index 2f769b7..79c2a03 100644 --- a/scripts/benchmark-scan.ts +++ b/scripts/benchmark-scan.ts @@ -1,8 +1,8 @@ -import { spawnSync } from "node:child_process"; import { access, mkdir, writeFile } from "node:fs/promises"; import path from "node:path"; import packageJson from "../package.json"; import { getOption } from "./lib/get-option"; +import { readHeadRef } from "../src/benchmarks/checkouts"; import { DEFAULT_BENCHMARK_SET_PATH, loadBenchmarkSet, @@ -21,19 +21,6 @@ async function assertExists(targetPath: string, message: string): Promise } } -function readHeadRef(checkoutPath: string): string { - const result = spawnSync("git", ["rev-parse", "HEAD"], { - cwd: checkoutPath, - encoding: "utf8", - }); - - if (result.status !== 0) { - throw new Error(`Unable to read HEAD for ${checkoutPath}: ${result.stderr}`); - } - - return result.stdout.trim(); -} - const manifestPath = getOption(process.argv.slice(2), "--manifest", DEFAULT_BENCHMARK_SET_PATH); const benchmarkSet = await loadBenchmarkSet(manifestPath); const checkoutsDir = resolveProjectPath(benchmarkSet.artifacts.checkoutsDir); diff --git a/src/benchmarks/checkouts.ts b/src/benchmarks/checkouts.ts new file mode 100644 index 0000000..e105301 --- /dev/null +++ b/src/benchmarks/checkouts.ts @@ -0,0 +1,90 @@ +/** + * Shared git checkout helpers for pinned benchmark workflows. + * + * Both the main pinned benchmark and the per-rule signal benchmark need to + * materialize exact upstream SHAs locally before scanning. Keeping the git + * plumbing here avoids small behavior drift between the CLI entrypoints. + */ +import { spawnSync } from "node:child_process"; +import { access, mkdir } from "node:fs/promises"; +import path from "node:path"; +import type { BenchmarkRepoSpec } from "./types"; + +/** + * Runs a git command and returns trimmed stdout. + * + * Benchmarks are meant to be reproducible, so failures should be loud and + * include both stdout and stderr for debugging. + */ +function run(command: string, args: string[], cwd?: string): string { + const result = spawnSync(command, args, { + cwd, + encoding: "utf8", + stdio: ["ignore", "pipe", "pipe"], + }); + + if (result.status !== 0) { + throw new Error( + [`Command failed: ${command} ${args.join(" ")}`, result.stdout, result.stderr] + .filter(Boolean) + .join("\n"), + ); + } + + return result.stdout.trim(); +} + +/** + * Checks whether a path exists without treating a missing path as exceptional. + */ +function pathExists(targetPath: string): Promise { + return access(targetPath).then( + () => true, + () => false, + ); +} + +/** + * Reads the currently checked out commit SHA for an existing local checkout. + */ +export function readHeadRef(checkoutPath: string): string { + return run("git", ["rev-parse", "HEAD"], checkoutPath); +} + +/** + * Ensures every repo in a pinned benchmark set exists locally at its exact ref. + * + * Existing clones are reused, but their origin URL, fetched refs, working tree, + * and HEAD are all reset so reruns stay deterministic. + */ +export async function ensurePinnedCheckouts( + checkoutsDir: string, + repos: BenchmarkRepoSpec[], +): Promise { + await mkdir(checkoutsDir, { recursive: true }); + + for (const repo of repos) { + const checkoutPath = path.join(checkoutsDir, repo.id); + const gitPath = path.join(checkoutPath, ".git"); + + console.log(`\n==> ${repo.id} (${repo.repo})`); + + if (!(await pathExists(gitPath))) { + console.log(`cloning ${repo.url}`); + run("git", ["clone", "--filter=blob:none", "--no-checkout", repo.url, checkoutPath]); + } + + run("git", ["remote", "set-url", "origin", repo.url], checkoutPath); + run("git", ["fetch", "--force", "--prune", "--filter=blob:none", "origin"], checkoutPath); + run("git", ["checkout", "--force", "--detach", repo.ref], checkoutPath); + run("git", ["reset", "--hard", repo.ref], checkoutPath); + run("git", ["clean", "-fdx"], checkoutPath); + + const actualRef = readHeadRef(checkoutPath); + if (actualRef !== repo.ref) { + throw new Error(`Pinned ref mismatch for ${repo.id}: expected ${repo.ref}, got ${actualRef}`); + } + + console.log(`ready at ${actualRef.slice(0, 7)}`); + } +} diff --git a/src/benchmarks/rule-signal-readme.ts b/src/benchmarks/rule-signal-readme.ts new file mode 100644 index 0000000..0c43724 --- /dev/null +++ b/src/benchmarks/rule-signal-readme.ts @@ -0,0 +1,83 @@ +/** + * Builds the small benchmark snippet inserted into each rule README. + * + * The full per-rule report is useful for maintainers, but rule docs need a + * compact summary that explains whether a rule has real benchmark signal. + */ +import type { RuleSignalBenchmarkSummary, RuleSignalRuleSummary } from "./rule-signal"; + +/** Formats numeric values consistently for README snippets. */ +function formatMetric(value: number | null, digits = 2): string { + return value === null ? "n/a" : value.toFixed(digits); +} + +/** Formats hit counts as `x/y` for compact bullet points. */ +function formatHitRate(hitCount: number, repoCount: number): string { + return `${hitCount}/${repoCount}`; +} + +/** Builds the relative anchor link from a rule README to the shared report section. */ +function renderReportLink( + summary: RuleSignalBenchmarkSummary, + rule: RuleSignalRuleSummary, +): string { + return `../../../${summary.artifacts.reportPath}#${rule.ruleId.replaceAll(".", "")}`; +} + +/** Maps normalized-metric keys to the labels used in the docs. */ +function formatMetricName(metricKey: NonNullable): string { + return { + scorePerFile: "score / file", + scorePerKloc: "score / KLOC", + scorePerFunction: "score / function", + findingsPerFile: "findings / file", + findingsPerKloc: "findings / KLOC", + findingsPerFunction: "findings / function", + }[metricKey]; +} + +/** + * Renders the markdown snippet embedded into one rule README. + */ +export function renderRuleSignalReadmeSection( + summary: RuleSignalBenchmarkSummary, + rule: RuleSignalRuleSummary, +): string { + const ai = rule.cohorts["explicit-ai"]; + const oss = rule.cohorts["mature-oss"]; + const bestMetric = + rule.bestMetric === null + ? "n/a" + : `${formatMetricName(rule.bestMetric)} (${formatMetric(rule.bestMetricAuc)})`; + + return [ + "## Benchmark signal", + "", + `Small pinned rule benchmark ([manifest](../../../${summary.artifacts.manifestPath})):`, + "", + `- Signal rank: **#${rule.rank} of ${summary.rules.length}**`, + `- Signal score: **${formatMetric(rule.signalScore)} / 1.00**`, + `- Best separating metric: **${bestMetric}**`, + `- Hit rate: **${formatHitRate(ai.hitCount, ai.repoCount)} AI repos** vs **${formatHitRate(oss.hitCount, oss.repoCount)} mature OSS repos**`, + `- Full results: [rule signal report](${renderReportLink(summary, rule)})`, + ].join("\n"); +} + +/** + * Inserts or replaces the generated benchmark section in a rule README. + * + * Re-running the benchmark should refresh one deterministic section instead of + * appending duplicates at the end of the file. + */ +export function upsertRuleSignalReadmeSection(readme: string, section: string): string { + const trimmedSection = section.trim(); + const marker = "\n## Benchmark signal\n"; + const normalizedReadme = readme.trimEnd(); + const markerIndex = normalizedReadme.indexOf(marker); + + if (markerIndex >= 0) { + return `${normalizedReadme.slice(0, markerIndex)}\n\n${trimmedSection}\n`; + } + + return `${normalizedReadme}\n\n${trimmedSection}\n`; +} diff --git a/src/benchmarks/rule-signal-report.ts b/src/benchmarks/rule-signal-report.ts new file mode 100644 index 0000000..1049be3 --- /dev/null +++ b/src/benchmarks/rule-signal-report.ts @@ -0,0 +1,140 @@ +/** + * Renders the markdown report for the per-rule signal benchmark. + * + * The JSON summary is the machine-readable artifact; this file turns the same + * data into a maintainer-facing leaderboard and per-rule drilldown. + */ +import type { NormalizedMetrics } from "../core/types"; +import type { BenchmarkSet } from "./types"; +import type { RuleSignalBenchmarkSummary, RuleSignalRuleSummary } from "./rule-signal"; + +/** Formats numeric benchmark values consistently for markdown output. */ +function formatMetric(value: number | null, digits = 2): string { + return value === null ? "n/a" : value.toFixed(digits); +} + +/** Formats hit-rate percentages for compact tables. */ +function formatPercent(value: number): string { + return `${(value * 100).toFixed(0)}%`; +} + +/** Maps normalized-metric keys to the human labels used in reports. */ +function formatMetricName(metricKey: keyof NormalizedMetrics): string { + return { + scorePerFile: "score / file", + scorePerKloc: "score / KLOC", + scorePerFunction: "score / function", + findingsPerFile: "findings / file", + findingsPerKloc: "findings / KLOC", + findingsPerFunction: "findings / function", + }[metricKey]; +} + +/** Shortens full SHAs for table display. */ +function shortRef(ref: string): string { + return ref.slice(0, 7); +} + +/** Renders the top-level ranking table across every isolated rule run. */ +function renderLeaderboard(summary: RuleSignalBenchmarkSummary): string[] { + return [ + "| Rank | Rule | Signal score | AI hit rate | OSS hit rate | Best metric | Best AUROC |", + "|---:|---|---:|---:|---:|---|---:|", + ...summary.rules.map((rule) => { + const ai = rule.cohorts["explicit-ai"]; + const oss = rule.cohorts["mature-oss"]; + return `| ${rule.rank} | \`${rule.ruleId}\` | **${formatMetric(rule.signalScore)}** | ${ai.hitCount}/${ai.repoCount} (${formatPercent(ai.hitRate)}) | ${oss.hitCount}/${oss.repoCount} (${formatPercent(oss.hitRate)}) | ${rule.bestMetric ? formatMetricName(rule.bestMetric) : "n/a"} | ${formatMetric(rule.bestMetricAuc)} |`; + }), + ]; +} + +/** Renders side-by-side cohort medians for one isolated rule. */ +function renderCohortTable(rule: RuleSignalRuleSummary): string[] { + const ai = rule.cohorts["explicit-ai"]; + const oss = rule.cohorts["mature-oss"]; + + return [ + "| Cohort | Hit rate | Median findings | Median repo score | Median score / file | Median score / KLOC | Median findings / KLOC |", + "|---|---:|---:|---:|---:|---:|---:|", + `| explicit-ai | ${ai.hitCount}/${ai.repoCount} (${formatPercent(ai.hitRate)}) | ${formatMetric(ai.findingCountMedian)} | ${formatMetric(ai.repoScoreMedian)} | ${formatMetric(ai.medians.scorePerFile)} | ${formatMetric(ai.medians.scorePerKloc)} | ${formatMetric(ai.medians.findingsPerKloc)} |`, + `| mature-oss | ${oss.hitCount}/${oss.repoCount} (${formatPercent(oss.hitRate)}) | ${formatMetric(oss.findingCountMedian)} | ${formatMetric(oss.repoScoreMedian)} | ${formatMetric(oss.medians.scorePerFile)} | ${formatMetric(oss.medians.scorePerKloc)} | ${formatMetric(oss.medians.findingsPerKloc)} |`, + ]; +} + +/** Renders the per-repo isolated results for a single rule. */ +function renderRepoTable(rule: RuleSignalRuleSummary): string[] { + return [ + "| Repo | Cohort | Ref | Findings | Repo score | Score / file | Score / KLOC | Findings / KLOC |", + "|---|---|---|---:|---:|---:|---:|---:|", + ...rule.repos.map( + (repo) => + `| [${repo.repo}](https://github.com/${repo.repo}) | ${repo.cohort} | \`${shortRef(repo.ref)}\` | ${repo.summary.findingCount} | ${formatMetric(repo.summary.repoScore)} | ${formatMetric(repo.summary.normalized.scorePerFile)} | ${formatMetric(repo.summary.normalized.scorePerKloc)} | ${formatMetric(repo.summary.normalized.findingsPerKloc)} |`, + ), + ]; +} + +/** + * Renders the full drilldown section for one rule, including cohort medians, + * per-metric AUROC values, and the repo-by-repo table. + */ +function renderRuleSection(rule: RuleSignalRuleSummary, totalRules: number): string[] { + const metricAucs = Object.entries(rule.metricAucs) + .map( + ([metricKey, auc]) => + `- ${formatMetricName(metricKey as keyof NormalizedMetrics)}: ${formatMetric(auc)}`, + ) + .join("\n"); + + return [ + `## ${rule.ruleId}`, + "", + `- Rank: **#${rule.rank}** of ${totalRules}`, + `- Signal score: **${formatMetric(rule.signalScore)} / 1.00**`, + `- Family / severity / scope: \`${rule.family}\` / \`${rule.severity}\` / \`${rule.scope}\``, + `- Best metric: ${rule.bestMetric ? `${formatMetricName(rule.bestMetric)} (${formatMetric(rule.bestMetricAuc)})` : "n/a"}`, + "", + "### Cohort summary", + "", + ...renderCohortTable(rule), + "", + "### AUROC by normalized metric", + "", + metricAucs, + "", + "### Repo results", + "", + ...renderRepoTable(rule), + ]; +} + +/** + * Builds the human-readable markdown report for the per-rule signal benchmark. + */ +export function renderRuleSignalBenchmarkReport( + set: BenchmarkSet, + summary: RuleSignalBenchmarkSummary, +): string { + const lines = [ + `# Per-rule signal benchmark: ${set.name}`, + "", + `Date: ${summary.generatedAt.slice(0, 10)}`, + `Analyzer version: ${summary.analyzerVersion}`, + `Manifest: \`${summary.artifacts.manifestPath}\``, + `Summary: \`${summary.artifacts.summaryPath}\``, + `Report: \`${summary.artifacts.reportPath}\``, + "", + "## Goal", + "", + set.description, + "", + "Signal score = average AUROC across the six normalized metrics when each rule runs in isolation against this pinned mini cohort. 1.00 means perfect AI-over-OSS separation, while 0.50 means no better than random ordering.", + "", + "## Leaderboard", + "", + ...renderLeaderboard(summary), + "", + ...summary.rules.flatMap((rule) => ["", ...renderRuleSection(rule, summary.rules.length)]), + ]; + + return lines.join("\n").trim(); +} diff --git a/src/benchmarks/rule-signal.ts b/src/benchmarks/rule-signal.ts new file mode 100644 index 0000000..bb9ba9f --- /dev/null +++ b/src/benchmarks/rule-signal.ts @@ -0,0 +1,389 @@ +/** + * Builds the data model for the per-rule signal benchmark. + * + * This benchmark runs each built-in rule in isolation against a smaller pinned + * AI-vs-OSS cohort so we can answer: "which rules actually separate the two + * cohorts well on their own?" + */ +import type { AnalysisSummary, NormalizedMetrics, RulePlugin, Scope } from "../core/types"; +import { Registry } from "../core/registry"; +import { buildMedianMetrics, median } from "./metrics"; +import { + NORMALIZED_METRIC_KEYS, + type BenchmarkCohort, + type BenchmarkSet, + type BenchmarkedAnalysis, +} from "./types"; + +/** + * Fact ids that are always available from the engine without discovering a + * provider. They form the roots of the dependency walk when we build an + * isolated registry for one rule. + */ +const BASE_FACT_IDS = new Set([ + "file.record", + "file.text", + "file.lineCount", + "file.logicalLineCount", + "directory.record", + "repo.files", + "repo.directories", +]); + +/** Single isolated benchmark result for one repo under one rule. */ +export interface RuleSignalRepoSnapshot { + id: string; + repo: string; + cohort: BenchmarkCohort; + ref: string; + summary: AnalysisSummary; +} + +/** Aggregated isolated-rule stats for one cohort. */ +export interface RuleSignalCohortSummary { + repoCount: number; + hitCount: number; + hitRate: number; + repoScoreMedian: number | null; + findingCountMedian: number | null; + medians: NormalizedMetrics; +} + +/** AUROC scores for each normalized metric when a rule runs alone. */ +export interface RuleSignalMetricAucs extends NormalizedMetrics {} + +/** Ranked isolated-rule summary used by JSON output, markdown, and rule READMEs. */ +export interface RuleSignalRuleSummary { + rank: number; + ruleId: string; + ruleSlug: string; + family: string; + severity: RulePlugin["severity"]; + scope: Scope; + requires: string[]; + signalScore: number | null; + bestMetric: keyof NormalizedMetrics | null; + bestMetricAuc: number | null; + metricAucs: RuleSignalMetricAucs; + cohorts: Record; + repos: RuleSignalRepoSnapshot[]; +} + +/** Paths recorded in the generated summary so downstream docs can link back to them. */ +export interface RuleSignalBenchmarkArtifacts { + manifestPath: string; + summaryPath: string; + reportPath: string; +} + +/** Top-level JSON payload written by `bun run benchmark:rules`. */ +export interface RuleSignalBenchmarkSummary { + schemaVersion: 1; + benchmarkSetId: string; + benchmarkSetName: string; + generatedAt: string; + analyzerVersion: string; + artifacts: RuleSignalBenchmarkArtifacts; + rules: RuleSignalRuleSummary[]; +} + +/** One isolated run worth of inputs before we aggregate and rank it. */ +export interface RuleSignalBenchmarkRun { + rule: Pick; + analyses: BenchmarkedAnalysis[]; +} + +interface RuleSignalRuleDraft extends Omit {} + +/** Computes a simple arithmetic mean, returning null for empty input. */ +function mean(values: number[]): number | null { + return values.length === 0 + ? null + : values.reduce((total, value) => total + value, 0) / values.length; +} + +/** Keeps only the per-repo fields needed for signal benchmarking outputs. */ +function buildRepoSnapshot({ spec, result }: BenchmarkedAnalysis): RuleSignalRepoSnapshot { + return { + id: spec.id, + repo: spec.repo, + cohort: spec.cohort, + ref: spec.ref, + summary: result.summary, + }; +} + +/** + * Builds cohort-level medians and hit rates for one rule. + * + * A "hit" means the isolated rule emitted at least one finding for that repo. + */ +function buildCohortSummary(repos: RuleSignalRepoSnapshot[]): RuleSignalCohortSummary { + const hitRepos = repos.filter((repo) => repo.summary.findingCount > 0); + + return { + repoCount: repos.length, + hitCount: hitRepos.length, + hitRate: repos.length === 0 ? 0 : hitRepos.length / repos.length, + repoScoreMedian: median(repos.map((repo) => repo.summary.repoScore)), + findingCountMedian: median(repos.map((repo) => repo.summary.findingCount)), + medians: buildMedianMetrics(repos), + }; +} + +/** + * Computes AUROC for one metric, treating larger values as "more AI-like". + * + * 1.00 means every positive outranks every negative, 0.50 means no separation, + * and values below 0.50 mean the metric is separating in the wrong direction. + */ +export function computeAuroc(positiveValues: number[], negativeValues: number[]): number | null { + if (positiveValues.length === 0 || negativeValues.length === 0) { + return null; + } + + let wins = 0; + + for (const positive of positiveValues) { + for (const negative of negativeValues) { + if (positive > negative) { + wins += 1; + } else if (positive === negative) { + wins += 0.5; + } + } + } + + return wins / (positiveValues.length * negativeValues.length); +} + +/** + * Calculates per-metric AUROC values for one isolated rule run. + * + * Null normalized values are treated as zero so repos with degenerate totals + * still participate in the ordering instead of disappearing from comparison. + */ +function toAucMetricMap(repos: RuleSignalRepoSnapshot[]): RuleSignalMetricAucs { + const aiRepos = repos.filter((repo) => repo.cohort === "explicit-ai"); + const ossRepos = repos.filter((repo) => repo.cohort === "mature-oss"); + + const entries = NORMALIZED_METRIC_KEYS.map((metricKey) => { + const aiValues = aiRepos.map((repo) => repo.summary.normalized[metricKey] ?? 0); + const ossValues = ossRepos.map((repo) => repo.summary.normalized[metricKey] ?? 0); + return [metricKey, computeAuroc(aiValues, ossValues)]; + }); + + return Object.fromEntries(entries) as RuleSignalMetricAucs; +} + +/** + * Picks the normalized metric with the strongest separation for a rule. + * + * Ties are broken lexicographically so output is deterministic across reruns. + */ +function findBestMetric(metricAucs: RuleSignalMetricAucs): { + metric: keyof NormalizedMetrics | null; + auc: number | null; +} { + let bestMetric: keyof NormalizedMetrics | null = null; + let bestAuc: number | null = null; + + for (const metricKey of NORMALIZED_METRIC_KEYS) { + const auc = metricAucs[metricKey]; + if (auc === null) { + continue; + } + + if ( + bestAuc === null || + auc > bestAuc || + (auc === bestAuc && bestMetric !== null && metricKey < bestMetric) + ) { + bestMetric = metricKey; + bestAuc = auc; + } + } + + return { metric: bestMetric, auc: bestAuc }; +} + +/** Sort helper that places larger numeric values first and nulls last. */ +function compareNullableDescending(left: number | null, right: number | null): number { + if (left === right) { + return 0; + } + + if (left === null) { + return 1; + } + + if (right === null) { + return -1; + } + + return right - left; +} + +/** Converts `family.rule-name` ids into the directory slug used under `src/rules/`. */ +export function ruleIdToSlug(ruleId: string): string { + const parts = ruleId.split("."); + return parts[parts.length - 1] ?? ruleId; +} + +/** + * Builds a minimal registry for one rule. + * + * The goal is isolation: we include only the target rule plus the fact + * providers it transitively depends on, while keeping the normal language + * plugins. That keeps unrelated rules/providers from influencing the run. + */ +export function createRuleBenchmarkRegistry(baseRegistry: Registry, ruleId: string): Registry { + const targetRule = baseRegistry.getRules().find((rule) => rule.id === ruleId); + if (!targetRule) { + throw new Error(`Unknown rule: ${ruleId}`); + } + + const providerByFact = new Map[number]>(); + for (const provider of baseRegistry.getFactProviders()) { + for (const factId of provider.provides) { + if (!providerByFact.has(factId)) { + providerByFact.set(factId, provider); + } + } + } + + const requiredProviderIds = new Set(); + const visitedFacts = new Set(); + + /** Walks fact dependencies backward from the rule to the providers we need. */ + const requireFact = (factId: string): void => { + if (visitedFacts.has(factId) || BASE_FACT_IDS.has(factId)) { + return; + } + + visitedFacts.add(factId); + const provider = providerByFact.get(factId); + if (!provider) { + throw new Error(`No fact provider produces required fact ${factId} for rule ${ruleId}`); + } + + requiredProviderIds.add(provider.id); + for (const dependency of provider.requires) { + requireFact(dependency); + } + }; + + for (const factId of targetRule.requires) { + requireFact(factId); + } + + const registry = new Registry(); + for (const language of baseRegistry.getLanguages()) { + registry.registerLanguage(language); + } + + for (const provider of baseRegistry.getFactProviders()) { + if (requiredProviderIds.has(provider.id)) { + registry.registerFactProvider(provider); + } + } + + registry.registerRule(targetRule); + return registry; +} + +/** + * Aggregates isolated rule runs into a ranked benchmark summary. + * + * `signalScore` is the mean AUROC across all six normalized metrics. Rules are + * then ranked by that score, with deterministic tie-breaks for repeatable docs. + */ +export function createRuleSignalBenchmarkSummary( + set: BenchmarkSet, + runs: RuleSignalBenchmarkRun[], + analyzerVersion: string, + artifacts: RuleSignalBenchmarkArtifacts, + generatedAt = new Date().toISOString(), +): RuleSignalBenchmarkSummary { + const drafts: RuleSignalRuleDraft[] = runs.map(({ rule, analyses }) => { + const repos = analyses + .map(buildRepoSnapshot) + .sort( + (left, right) => + left.cohort.localeCompare(right.cohort) || + compareNullableDescending( + left.summary.normalized.scorePerKloc, + right.summary.normalized.scorePerKloc, + ) || + left.repo.localeCompare(right.repo), + ); + + const metricAucs = toAucMetricMap(repos); + const signalScore = mean( + NORMALIZED_METRIC_KEYS.flatMap((metricKey) => { + const auc = metricAucs[metricKey]; + return auc === null ? [] : [auc]; + }), + ); + const { metric: bestMetric, auc: bestMetricAuc } = findBestMetric(metricAucs); + const aiRepos = repos.filter((repo) => repo.cohort === "explicit-ai"); + const ossRepos = repos.filter((repo) => repo.cohort === "mature-oss"); + + return { + ruleId: rule.id, + ruleSlug: ruleIdToSlug(rule.id), + family: rule.family, + severity: rule.severity, + scope: rule.scope, + requires: [...rule.requires], + signalScore, + bestMetric, + bestMetricAuc, + metricAucs, + cohorts: { + "explicit-ai": buildCohortSummary(aiRepos), + "mature-oss": buildCohortSummary(ossRepos), + }, + repos, + }; + }); + + const sorted = drafts.sort((left, right) => { + const signalCompare = compareNullableDescending(left.signalScore, right.signalScore); + if (signalCompare !== 0) { + return signalCompare; + } + + const bestMetricCompare = compareNullableDescending(left.bestMetricAuc, right.bestMetricAuc); + if (bestMetricCompare !== 0) { + return bestMetricCompare; + } + + const aiHitRateCompare = + right.cohorts["explicit-ai"].hitRate - left.cohorts["explicit-ai"].hitRate; + if (aiHitRateCompare !== 0) { + return aiHitRateCompare; + } + + const ossHitRateCompare = + left.cohorts["mature-oss"].hitRate - right.cohorts["mature-oss"].hitRate; + if (ossHitRateCompare !== 0) { + return ossHitRateCompare; + } + + return left.ruleId.localeCompare(right.ruleId); + }); + + return { + schemaVersion: 1, + benchmarkSetId: set.id, + benchmarkSetName: set.name, + generatedAt, + analyzerVersion, + artifacts, + rules: sorted.map((draft, index) => ({ + ...draft, + rank: index + 1, + })), + }; +} diff --git a/src/rules/async-noise/README.md b/src/rules/async-noise/README.md index 097399b..133a477 100644 --- a/src/rules/async-noise/README.md +++ b/src/rules/async-noise/README.md @@ -46,3 +46,13 @@ async function getJson(url: string) { Redundant `return await` sites add `1.5` each. Plain async pass-through wrappers add `0.75` each. The total file contribution is capped at `4`. + +## Benchmark signal + +Small pinned rule benchmark ([manifest](../../../benchmarks/sets/rule-signal-mini.json)): + +- Signal rank: **#7 of 11** +- Signal score: **0.41 / 1.00** +- Best separating metric: **findings / function (0.50)** +- Hit rate: **3/6 AI repos** vs **4/5 mature OSS repos** +- Full results: [rule signal report](../../../reports/rule-signal-mini.md#defensiveasync-noise) diff --git a/src/rules/barrel-density/README.md b/src/rules/barrel-density/README.md index 70d0123..adefaa9 100644 --- a/src/rules/barrel-density/README.md +++ b/src/rules/barrel-density/README.md @@ -39,3 +39,13 @@ export { type Store } from "./types"; ## Scoring The score starts at `1` and adds `0.5` per re-export statement, capped at `3`. + +## Benchmark signal + +Small pinned rule benchmark ([manifest](../../../benchmarks/sets/rule-signal-mini.json)): + +- Signal rank: **#8 of 11** +- Signal score: **0.35 / 1.00** +- Best separating metric: **findings / function (0.50)** +- Hit rate: **3/6 AI repos** vs **5/5 mature OSS repos** +- Full results: [rule signal report](../../../reports/rule-signal-mini.md#structurebarrel-density) diff --git a/src/rules/directory-fanout-hotspot/README.md b/src/rules/directory-fanout-hotspot/README.md index b3cf0d8..d1637eb 100644 --- a/src/rules/directory-fanout-hotspot/README.md +++ b/src/rules/directory-fanout-hotspot/README.md @@ -51,3 +51,13 @@ Asset-like buckets and test-matrix directories are intentionally suppressed beca The rule starts at `2` and adds a bounded amount based on how far the directory is above the computed threshold. The total directory contribution stays capped at `6`. + +## Benchmark signal + +Small pinned rule benchmark ([manifest](../../../benchmarks/sets/rule-signal-mini.json)): + +- Signal rank: **#10 of 11** +- Signal score: **0.22 / 1.00** +- Best separating metric: **findings / file (0.50)** +- Hit rate: **6/6 AI repos** vs **5/5 mature OSS repos** +- Full results: [rule signal report](../../../reports/rule-signal-mini.md#structuredirectory-fanout-hotspot) diff --git a/src/rules/duplicate-function-signatures/README.md b/src/rules/duplicate-function-signatures/README.md index 9edc7a6..bfd1e5b 100644 --- a/src/rules/duplicate-function-signatures/README.md +++ b/src/rules/duplicate-function-signatures/README.md @@ -53,3 +53,13 @@ Pass-through wrappers are excluded, and a duplicate that only appears in 2 files ## Scoring Each duplicate cluster adds `1.25 + 0.5 * (fileCount - 3)` for the current file, capped at `6`. + +## Benchmark signal + +Small pinned rule benchmark ([manifest](../../../benchmarks/sets/rule-signal-mini.json)): + +- Signal rank: **#9 of 11** +- Signal score: **0.32 / 1.00** +- Best separating metric: **findings / file (0.40)** +- Hit rate: **2/6 AI repos** vs **4/5 mature OSS repos** +- Full results: [rule signal report](../../../reports/rule-signal-mini.md#structureduplicate-function-signatures) diff --git a/src/rules/duplicate-mock-setup/README.md b/src/rules/duplicate-mock-setup/README.md index dfb0a0a..d2ca57f 100644 --- a/src/rules/duplicate-mock-setup/README.md +++ b/src/rules/duplicate-mock-setup/README.md @@ -42,3 +42,13 @@ Generic mock declarations and cleanup-only statements do not contribute to this ## Scoring Each duplicate setup cluster adds `1 + 0.5 * (fileCount - 2)` for the current file, capped at `5`. + +## Benchmark signal + +Small pinned rule benchmark ([manifest](../../../benchmarks/sets/rule-signal-mini.json)): + +- Signal rank: **#5 of 11** +- Signal score: **0.63 / 1.00** +- Best separating metric: **findings / file (0.70)** +- Hit rate: **3/6 AI repos** vs **1/5 mature OSS repos** +- Full results: [rule signal report](../../../reports/rule-signal-mini.md#testsduplicate-mock-setup) diff --git a/src/rules/empty-catch/README.md b/src/rules/empty-catch/README.md index 0211a87..c5ae4ca 100644 --- a/src/rules/empty-catch/README.md +++ b/src/rules/empty-catch/README.md @@ -48,3 +48,13 @@ export function loadTheme() { Each flagged catch uses the shared try/catch scoring helper, then the file total is capped at `8`. Boundary-oriented catches are downweighted instead of fully ignored. + +## Benchmark signal + +Small pinned rule benchmark ([manifest](../../../benchmarks/sets/rule-signal-mini.json)): + +- Signal rank: **#2 of 11** +- Signal score: **0.67 / 1.00** +- Best separating metric: **findings / file (0.93)** +- Hit rate: **6/6 AI repos** vs **5/5 mature OSS repos** +- Full results: [rule signal report](../../../reports/rule-signal-mini.md#defensiveempty-catch) diff --git a/src/rules/error-obscuring/README.md b/src/rules/error-obscuring/README.md index 666d7af..56c6604 100644 --- a/src/rules/error-obscuring/README.md +++ b/src/rules/error-obscuring/README.md @@ -54,3 +54,13 @@ export function readConfig(raw: string) { Each flagged catch uses the shared try/catch scoring helper, then the file total is capped at `8`. Generic rethrows are still noisy, but scored slightly lower than silent default-return patterns. + +## Benchmark signal + +Small pinned rule benchmark ([manifest](../../../benchmarks/sets/rule-signal-mini.json)): + +- Signal rank: **#4 of 11** +- Signal score: **0.66 / 1.00** +- Best separating metric: **findings / file (0.83)** +- Hit rate: **5/6 AI repos** vs **5/5 mature OSS repos** +- Full results: [rule signal report](../../../reports/rule-signal-mini.md#defensiveerror-obscuring) diff --git a/src/rules/error-swallowing/README.md b/src/rules/error-swallowing/README.md index 38e782c..fc29261 100644 --- a/src/rules/error-swallowing/README.md +++ b/src/rules/error-swallowing/README.md @@ -42,3 +42,13 @@ export async function syncUser(id: string) { ## Scoring Each flagged catch uses the shared try/catch scoring helper, then the file total is capped at `8`. + +## Benchmark signal + +Small pinned rule benchmark ([manifest](../../../benchmarks/sets/rule-signal-mini.json)): + +- Signal rank: **#1 of 11** +- Signal score: **0.72 / 1.00** +- Best separating metric: **findings / file (0.87)** +- Hit rate: **6/6 AI repos** vs **3/5 mature OSS repos** +- Full results: [rule signal report](../../../reports/rule-signal-mini.md#defensiveerror-swallowing) diff --git a/src/rules/over-fragmentation/README.md b/src/rules/over-fragmentation/README.md index 57cf0a5..7e0e5cb 100644 --- a/src/rules/over-fragmentation/README.md +++ b/src/rules/over-fragmentation/README.md @@ -51,3 +51,13 @@ Asset buckets and test-heavy directories are suppressed, and a directory full of The score is `4 + tinyRatio * 3 + ceremonyRatio * 2`. That weights tiny-file prevalence most heavily and adds extra pressure when wrappers and barrels make up a large share of the directory. + +## Benchmark signal + +Small pinned rule benchmark ([manifest](../../../benchmarks/sets/rule-signal-mini.json)): + +- Signal rank: **#11 of 11** +- Signal score: **0.17 / 1.00** +- Best separating metric: **findings / file (0.18)** +- Hit rate: **1/6 AI repos** vs **4/5 mature OSS repos** +- Full results: [rule signal report](../../../reports/rule-signal-mini.md#structureover-fragmentation) diff --git a/src/rules/pass-through-wrappers/README.md b/src/rules/pass-through-wrappers/README.md index 9e5bd49..2b1a06d 100644 --- a/src/rules/pass-through-wrappers/README.md +++ b/src/rules/pass-through-wrappers/README.md @@ -43,3 +43,13 @@ export function getJson(url: string) { ## Scoring Each wrapper adds `2` points, capped at `5` for the file. + +## Benchmark signal + +Small pinned rule benchmark ([manifest](../../../benchmarks/sets/rule-signal-mini.json)): + +- Signal rank: **#3 of 11** +- Signal score: **0.67 / 1.00** +- Best separating metric: **findings / file (0.85)** +- Hit rate: **5/6 AI repos** vs **4/5 mature OSS repos** +- Full results: [rule signal report](../../../reports/rule-signal-mini.md#structurepass-through-wrappers) diff --git a/src/rules/placeholder-comments/README.md b/src/rules/placeholder-comments/README.md index 2a29ca2..39840e2 100644 --- a/src/rules/placeholder-comments/README.md +++ b/src/rules/placeholder-comments/README.md @@ -50,3 +50,13 @@ export function legacyMode() { ## Scoring Each matching comment adds `0.75` to the file score, capped at `1.5`. + +## Benchmark signal + +Small pinned rule benchmark ([manifest](../../../benchmarks/sets/rule-signal-mini.json)): + +- Signal rank: **#6 of 11** +- Signal score: **0.50 / 1.00** +- Best separating metric: **findings / file (0.50)** +- Hit rate: **0/6 AI repos** vs **0/5 mature OSS repos** +- Full results: [rule signal report](../../../reports/rule-signal-mini.md#commentsplaceholder-comments) diff --git a/tests/rule-signal-benchmark.test.ts b/tests/rule-signal-benchmark.test.ts new file mode 100644 index 0000000..48bcc90 --- /dev/null +++ b/tests/rule-signal-benchmark.test.ts @@ -0,0 +1,206 @@ +import { describe, expect, test } from "bun:test"; +import { DEFAULT_CONFIG } from "../src/config"; +import type { AnalysisResult, AnalysisSummary, NormalizedMetrics } from "../src/core/types"; +import { + computeAuroc, + createRuleSignalBenchmarkSummary, + type RuleSignalBenchmarkRun, +} from "../src/benchmarks/rule-signal"; +import { + renderRuleSignalReadmeSection, + upsertRuleSignalReadmeSection, +} from "../src/benchmarks/rule-signal-readme"; +import { renderRuleSignalBenchmarkReport } from "../src/benchmarks/rule-signal-report"; +import type { BenchmarkSet, BenchmarkedAnalysis } from "../src/benchmarks/types"; + +function metrics(value: number): NormalizedMetrics { + return { + scorePerFile: value, + scorePerKloc: value, + scorePerFunction: value, + findingsPerFile: value, + findingsPerKloc: value, + findingsPerFunction: value, + }; +} + +function buildSummary(value: number, findingCount: number): AnalysisSummary { + return { + fileCount: 10, + directoryCount: 3, + findingCount, + repoScore: value * 10, + physicalLineCount: 1000, + logicalLineCount: 1000, + functionCount: 100, + normalized: metrics(value), + }; +} + +function analysis(resultSummary: AnalysisSummary): AnalysisResult { + return { + rootDir: "/tmp/fake", + config: DEFAULT_CONFIG, + summary: resultSummary, + files: [], + directories: [], + findings: [], + fileScores: [], + directoryScores: [], + repoScore: resultSummary.repoScore, + }; +} + +function bench( + spec: BenchmarkSet["repos"][number], + value: number, + findingCount: number, +): BenchmarkedAnalysis { + return { + spec, + result: analysis(buildSummary(value, findingCount)), + }; +} + +function buildBenchmarkSet(): BenchmarkSet { + return { + schemaVersion: 1, + id: "fixture-rule-signal", + name: "Fixture rule signal benchmark", + description: "Small benchmark used for rule-signal unit coverage.", + artifacts: { + checkoutsDir: "benchmarks/.cache/checkouts/fixture-rule-signal", + snapshotPath: "benchmarks/results/fixture-rule-signal.json", + reportPath: "reports/fixture-rule-signal.md", + }, + repos: [ + { + id: "ai-one", + repo: "fixtures/ai-one", + url: "https://example.invalid/ai-one.git", + cohort: "explicit-ai", + ref: "1111111", + createdAt: "2026-01-01T00:00:00Z", + stars: 0, + provenance: "Fixture AI repo.", + }, + { + id: "ai-two", + repo: "fixtures/ai-two", + url: "https://example.invalid/ai-two.git", + cohort: "explicit-ai", + ref: "2222222", + createdAt: "2026-01-01T00:00:00Z", + stars: 0, + provenance: "Fixture AI repo.", + }, + { + id: "oss-one", + repo: "fixtures/oss-one", + url: "https://example.invalid/oss-one.git", + cohort: "mature-oss", + ref: "3333333", + createdAt: "2020-01-01T00:00:00Z", + stars: 0, + provenance: "Fixture OSS repo.", + }, + { + id: "oss-two", + repo: "fixtures/oss-two", + url: "https://example.invalid/oss-two.git", + cohort: "mature-oss", + ref: "4444444", + createdAt: "2020-01-01T00:00:00Z", + stars: 0, + provenance: "Fixture OSS repo.", + }, + ], + pairings: [], + }; +} + +function buildRuns(set: BenchmarkSet): RuleSignalBenchmarkRun[] { + const [aiOne, aiTwo, ossOne, ossTwo] = set.repos; + + return [ + { + rule: { + id: "defensive.empty-catch", + family: "defensive", + severity: "strong", + scope: "file", + requires: ["file.tryCatchSummaries"], + }, + analyses: [ + bench(aiOne!, 3, 3), + bench(aiTwo!, 2, 2), + bench(ossOne!, 0, 0), + bench(ossTwo!, 1, 1), + ], + }, + { + rule: { + id: "comments.placeholder-comments", + family: "comments", + severity: "weak", + scope: "file", + requires: ["file.commentSummaries"], + }, + analyses: [ + bench(aiOne!, 0, 0), + bench(aiTwo!, 1, 1), + bench(ossOne!, 0, 0), + bench(ossTwo!, 1, 1), + ], + }, + ]; +} + +describe("rule signal benchmark support", () => { + test("computes AUROC with tie handling", () => { + expect(computeAuroc([3, 2], [0, 1])).toBe(1); + expect(computeAuroc([1], [1])).toBe(0.5); + expect(computeAuroc([], [1])).toBeNull(); + }); + + test("builds ranked rule summaries, renders report text, and updates rule README sections", () => { + const set = buildBenchmarkSet(); + const summary = createRuleSignalBenchmarkSummary( + set, + buildRuns(set), + "0.3.0", + { + manifestPath: "benchmarks/sets/rule-signal-mini.json", + summaryPath: "benchmarks/results/rule-signal-mini.json", + reportPath: "reports/rule-signal-mini.md", + }, + "2026-04-19T00:00:00Z", + ); + + expect(summary.rules).toHaveLength(2); + expect(summary.rules[0]?.ruleId).toBe("defensive.empty-catch"); + expect(summary.rules[0]?.rank).toBe(1); + expect(summary.rules[0]?.signalScore).toBe(1); + expect(summary.rules[0]?.cohorts["explicit-ai"].hitCount).toBe(2); + expect(summary.rules[0]?.cohorts["mature-oss"].hitCount).toBe(1); + expect(summary.rules[1]?.signalScore).toBe(0.5); + + const report = renderRuleSignalBenchmarkReport(set, summary); + expect(report).toContain("Per-rule signal benchmark: Fixture rule signal benchmark"); + expect(report).toContain("Signal score = average AUROC across the six normalized metrics"); + expect(report).toContain("`defensive.empty-catch`"); + expect(report).toContain("fixtures/ai-one"); + + const readmeSection = renderRuleSignalReadmeSection(summary, summary.rules[0]!); + expect(readmeSection).toContain("Signal rank: **#1 of 2**"); + expect(readmeSection).toContain("Best separating metric: **findings / file (1.00)**"); + expect(readmeSection).toContain("reports/rule-signal-mini.md#defensiveempty-catch"); + + const appended = upsertRuleSignalReadmeSection("# defensive.empty-catch\n", readmeSection); + expect(appended).toContain("## Benchmark signal"); + + const replaced = upsertRuleSignalReadmeSection(appended, "## Benchmark signal\n\nUpdated\n"); + expect(replaced).toContain("Updated"); + expect(replaced.match(/## Benchmark signal/g)?.length).toBe(1); + }); +}); From 84cc435a08802cb5f5bda536b8620bcd6a404ee6 Mon Sep 17 00:00:00 2001 From: Ben Vinegar Date: Sun, 19 Apr 2026 09:56:03 -0400 Subject: [PATCH 2/4] Prototype strong promise-catch signal rule --- autoresearch.checks.sh | 15 + autoresearch.ideas.md | 3 + autoresearch.md | 59 +++ autoresearch.sh | 4 + .../results/autoresearch-candidate-rule.json | 484 ++++++++++++++++++ reports/autoresearch-candidate-rule.md | 66 +++ scripts/benchmark-experimental-rule.ts | 198 +++++++ src/rules/promise-default-fallbacks/index.ts | 151 ++++++ tests/promise-default-fallbacks.test.ts | 101 ++++ 9 files changed, 1081 insertions(+) create mode 100755 autoresearch.checks.sh create mode 100644 autoresearch.ideas.md create mode 100644 autoresearch.md create mode 100755 autoresearch.sh create mode 100644 benchmarks/results/autoresearch-candidate-rule.json create mode 100644 reports/autoresearch-candidate-rule.md create mode 100644 scripts/benchmark-experimental-rule.ts create mode 100644 src/rules/promise-default-fallbacks/index.ts create mode 100644 tests/promise-default-fallbacks.test.ts diff --git a/autoresearch.checks.sh b/autoresearch.checks.sh new file mode 100755 index 0000000..d85f263 --- /dev/null +++ b/autoresearch.checks.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +set -euo pipefail + +tmpdir="$(mktemp -d)" +trap 'rm -rf "$tmpdir"' EXIT + +bun run format:check >"$tmpdir/format.log" 2>&1 || { + tail -50 "$tmpdir/format.log" + exit 1 +} + +bun test tests/promise-default-fallbacks.test.ts tests/rule-signal-benchmark.test.ts >"$tmpdir/tests.log" 2>&1 || { + tail -80 "$tmpdir/tests.log" + exit 1 +} diff --git a/autoresearch.ideas.md b/autoresearch.ideas.md new file mode 100644 index 0000000..3c0489c --- /dev/null +++ b/autoresearch.ideas.md @@ -0,0 +1,3 @@ +- Investigate a lower-overfit comment rule around formulaic doc-comments that restate function names with generic imperative verbs (`build`, `create`, `resolve`, `detect`, `check whether`). +- Investigate whether promise `.catch()` handlers that only log and implicitly return `undefined` form a strong separate rule or are too close to existing swallowing/obscuring rules. +- Investigate fallback-comment patterns (`fall through to`, `fall back to`) only if the AST-based promise-catch idea stalls; treat this as higher overfit risk. diff --git a/autoresearch.md b/autoresearch.md new file mode 100644 index 0000000..3db9769 --- /dev/null +++ b/autoresearch.md @@ -0,0 +1,59 @@ +# Autoresearch: new strong AI-signal rule on the full pinned benchmark + +## Objective + +Find a **new** `slop-scan` rule that is not just a restatement of an existing built-in rule and that scores **> 0.80 signal_score** on the full pinned `known-ai-vs-solid-oss` benchmark set. + +The rule should generalize as a real slop/code-quality smell, not as benchmark-specific trivia. We may use the full pinned benchmark set to evaluate candidate rules, but we must not hardcode repo ids, repo-specific strings, manifest membership, or benchmark-only exceptions. + +## Metrics + +- **Primary**: `signal_score` (unitless, higher is better) — average AUROC across the rule-signal normalized metrics on the full pinned benchmark set. +- **Secondary**: + - `best_metric_auc` + - `ai_hit_rate` + - `oss_hit_rate` + +## How to Run + +`./autoresearch.sh` + +That script runs `scripts/benchmark-experimental-rule.ts` against the full pinned benchmark cohort and writes: + +- `benchmarks/results/autoresearch-candidate-rule.json` +- `reports/autoresearch-candidate-rule.md` + +## Files in Scope + +- `src/rules/promise-default-fallbacks/index.ts` — current experimental candidate rule. +- `scripts/benchmark-experimental-rule.ts` — isolated benchmark runner for the candidate rule on the full pinned set. +- `tests/promise-default-fallbacks.test.ts` — focused behavioral coverage for the candidate rule. +- `src/facts/ts-helpers.ts` — only if the rule needs shared AST helpers. +- `src/benchmarks/rule-signal.ts` — only if benchmark summary math or isolated-rule wiring truly needs adjustment. +- `autoresearch.md` +- `autoresearch.sh` +- `autoresearch.checks.sh` +- `autoresearch.ideas.md` + +## Off Limits + +- `benchmarks/sets/known-ai-vs-solid-oss.json` repo membership, refs, provenance, or pairings. +- `benchmarks/.cache/**` pinned checkout contents. +- Hardcoding repo names, paths, benchmark fixture strings, or cohort-specific allow/deny lists into the rule. +- Editing existing built-in rules just to make the candidate look more unique. + +## Constraints + +- Do not cheat on the benchmark. +- Do not knowingly overfit to a single repo or a single weird generated subtree. +- Prefer explainable AST/code-smell rules over raw string matching. +- Keep the candidate distinct from existing rules such as `error-swallowing`, `error-obscuring`, `empty-catch`, `pass-through-wrappers`, and `placeholder-comments`. +- If the candidate changes analyzer behavior materially, keep focused tests passing. +- Generated benchmark artifacts are allowed for inspection, but benchmark manifests and pinned refs must stay fixed. + +## What's Been Tried + +- Initial corpus mining across the full pinned benchmark suggested that **promise `.catch()` handlers returning sentinel defaults** (`null`, `undefined`, `false`, `0`, `""`, `[]`, `{}`) are a promising signal. The first regex-based proxy over the full set showed roughly **0.89–0.91 AUROC** across the normalized metrics, but that proxy turned out to blur together true default returns and empty `() => {}` handlers. +- First honest AST baseline: `defensive.promise-default-fallbacks` scored **0.663** on the full set. It cleanly catches explicit default returns and log+default handlers, but it misses several high-signal empty promise-catch handlers in AI repos such as DevWorkbench, openusage, and agent-ci. +- The isolated benchmark harness also needed a function-count fix: isolated registries were computing `file.functionSummaries`, but the engine was dropping that fact before summary time. `scripts/benchmark-experimental-rule.ts` now has to recover function counts through `analyzeRepository` hooks so `scorePerFunction` and `findingsPerFunction` stay honest. +- Textual comment mining surfaced fallback-comment phrases (for example `fall through to` / `fall back to`) and formulaic doc-comment prefixes, but those look easier to overfit and are currently lower priority than the promise-catch idea. diff --git a/autoresearch.sh b/autoresearch.sh new file mode 100755 index 0000000..cf807f1 --- /dev/null +++ b/autoresearch.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +set -euo pipefail + +bun run scripts/benchmark-experimental-rule.ts diff --git a/benchmarks/results/autoresearch-candidate-rule.json b/benchmarks/results/autoresearch-candidate-rule.json new file mode 100644 index 0000000..a9eaa4d --- /dev/null +++ b/benchmarks/results/autoresearch-candidate-rule.json @@ -0,0 +1,484 @@ +{ + "schemaVersion": 1, + "benchmarkSetId": "known-ai-vs-solid-oss", + "benchmarkSetName": "Known AI repos vs older solid OSS repos", + "generatedAt": "2026-04-19T13:55:19.897Z", + "analyzerVersion": "0.3.0", + "artifacts": { + "manifestPath": "benchmarks/sets/known-ai-vs-solid-oss.json", + "summaryPath": "benchmarks/results/autoresearch-candidate-rule.json", + "reportPath": "reports/autoresearch-candidate-rule.md" + }, + "rules": [ + { + "ruleId": "defensive.promise-default-fallbacks", + "ruleSlug": "promise-default-fallbacks", + "family": "defensive", + "severity": "strong", + "scope": "file", + "requires": [ + "file.ast" + ], + "signalScore": 0.9670781893004116, + "bestMetric": "scorePerFunction", + "bestMetricAuc": 0.9876543209876543, + "metricAucs": { + "scorePerFile": 0.9506172839506173, + "scorePerKloc": 0.9753086419753086, + "scorePerFunction": 0.9876543209876543, + "findingsPerFile": 0.9629629629629629, + "findingsPerKloc": 0.9506172839506173, + "findingsPerFunction": 0.9753086419753086 + }, + "cohorts": { + "explicit-ai": { + "repoCount": 9, + "hitCount": 9, + "hitRate": 1, + "repoScoreMedian": 10, + "findingCountMedian": 3, + "medians": { + "scorePerFile": 0.10638297872340426, + "scorePerKloc": 1.180080245456691, + "scorePerFunction": 0.02982516283853274, + "findingsPerFile": 0.03125, + "findingsPerKloc": 0.3247984068395757, + "findingsPerFunction": 0.008302765936353722 + } + }, + "mature-oss": { + "repoCount": 9, + "hitCount": 5, + "hitRate": 0.5555555555555556, + "repoScoreMedian": 2, + "findingCountMedian": 1, + "medians": { + "scorePerFile": 0.0014170996693434106, + "scorePerKloc": 0.023810279691418777, + "scorePerFunction": 0.0014942099364960778, + "findingsPerFile": 0.0007085498346717053, + "findingsPerKloc": 0.011905139845709388, + "findingsPerFunction": 0.00018677624206200972 + } + } + }, + "repos": [ + { + "id": "devworkbench", + "repo": "jiayun/DevWorkbench", + "cohort": "explicit-ai", + "ref": "ea50862107c377b0cdd6e508073ad0330b367576", + "summary": { + "fileCount": 32, + "directoryCount": 7, + "findingCount": 1, + "repoScore": 8, + "physicalLineCount": 9408, + "logicalLineCount": 2986, + "functionCount": 147, + "normalized": { + "scorePerFile": 0.25, + "scorePerKloc": 2.6791694574681846, + "scorePerFunction": 0.05442176870748299, + "findingsPerFile": 0.03125, + "findingsPerKloc": 0.3348961821835231, + "findingsPerFunction": 0.006802721088435374 + } + } + }, + { + "id": "gstack", + "repo": "garrytan/gstack", + "cohort": "explicit-ai", + "ref": "6cc094cd4190e3fac65b69fb29e0c3cae7e0f214", + "summary": { + "fileCount": 176, + "directoryCount": 19, + "findingCount": 7, + "repoScore": 40, + "physicalLineCount": 62616, + "logicalLineCount": 18958, + "functionCount": 832, + "normalized": { + "scorePerFile": 0.22727272727272727, + "scorePerKloc": 2.109927207511341, + "scorePerFunction": 0.04807692307692308, + "findingsPerFile": 0.03977272727272727, + "findingsPerKloc": 0.3692372613144847, + "findingsPerFunction": 0.008413461538461538 + } + } + }, + { + "id": "vinext", + "repo": "cloudflare/vinext", + "cohort": "explicit-ai", + "ref": "28980b01f1692b47756a6768fc5733712c290f84", + "summary": { + "fileCount": 1129, + "directoryCount": 486, + "findingCount": 30, + "repoScore": 87, + "physicalLineCount": 150365, + "logicalLineCount": 59523, + "functionCount": 2917, + "normalized": { + "scorePerFile": 0.07705934455270151, + "scorePerKloc": 1.461619878030341, + "scorePerFunction": 0.02982516283853274, + "findingsPerFile": 0.026572187776793623, + "findingsPerKloc": 0.5040068544932211, + "findingsPerFunction": 0.010284538909838875 + } + } + }, + { + "id": "emdash", + "repo": "emdash-cms/emdash", + "cohort": "explicit-ai", + "ref": "dbaf8c6f857456b6a449e68d26c5000c1143fadb", + "summary": { + "fileCount": 1072, + "directoryCount": 306, + "findingCount": 39, + "repoScore": 144, + "physicalLineCount": 257938, + "logicalLineCount": 120432, + "functionCount": 3513, + "normalized": { + "scorePerFile": 0.13432835820895522, + "scorePerKloc": 1.1956954962136308, + "scorePerFunction": 0.04099060631938514, + "findingsPerFile": 0.036380597014925374, + "findingsPerKloc": 0.3238341968911917, + "findingsPerFunction": 0.011101622544833475 + } + } + }, + { + "id": "agent-ci", + "repo": "redwoodjs/agent-ci", + "cohort": "explicit-ai", + "ref": "4de00d69487e275d5bc37e30f7818a9303b22352", + "summary": { + "fileCount": 94, + "directoryCount": 24, + "findingCount": 3, + "repoScore": 10, + "physicalLineCount": 17222, + "logicalLineCount": 8474, + "functionCount": 220, + "normalized": { + "scorePerFile": 0.10638297872340426, + "scorePerKloc": 1.180080245456691, + "scorePerFunction": 0.045454545454545456, + "findingsPerFile": 0.031914893617021274, + "findingsPerKloc": 0.35402407363700733, + "findingsPerFunction": 0.013636363636363636 + } + } + }, + { + "id": "openclaw", + "repo": "openclaw/openclaw", + "cohort": "explicit-ai", + "ref": "44cf74717b4507245648a0263f3b0df85aa2a813", + "summary": { + "fileCount": 10465, + "directoryCount": 423, + "findingCount": 335, + "repoScore": 1140, + "physicalLineCount": 1939647, + "logicalLineCount": 1031409, + "functionCount": 40348, + "normalized": { + "scorePerFile": 0.10893454371715242, + "scorePerKloc": 1.1052841307376606, + "scorePerFunction": 0.02825418855953207, + "findingsPerFile": 0.03201146679407549, + "findingsPerKloc": 0.3247984068395757, + "findingsPerFunction": 0.008302765936353722 + } + } + }, + { + "id": "hunk", + "repo": "modem-dev/hunk", + "cohort": "explicit-ai", + "ref": "b37663f3c3055f860290587865199192e15d9f35", + "summary": { + "fileCount": 166, + "directoryCount": 36, + "findingCount": 3, + "repoScore": 10, + "physicalLineCount": 32498, + "logicalLineCount": 13564, + "functionCount": 752, + "normalized": { + "scorePerFile": 0.060240963855421686, + "scorePerKloc": 0.7372456502506635, + "scorePerFunction": 0.013297872340425532, + "findingsPerFile": 0.018072289156626505, + "findingsPerKloc": 0.22117369507519904, + "findingsPerFunction": 0.003989361702127659 + } + } + }, + { + "id": "openusage", + "repo": "robinebers/openusage", + "cohort": "explicit-ai", + "ref": "857f537a243483acf98ccd9ea32e20b380c63823", + "summary": { + "fileCount": 139, + "directoryCount": 29, + "findingCount": 1, + "repoScore": 4, + "physicalLineCount": 33794, + "logicalLineCount": 22270, + "functionCount": 491, + "normalized": { + "scorePerFile": 0.02877697841726619, + "scorePerKloc": 0.1796138302649304, + "scorePerFunction": 0.008146639511201629, + "findingsPerFile": 0.007194244604316547, + "findingsPerKloc": 0.0449034575662326, + "findingsPerFunction": 0.002036659877800407 + } + } + }, + { + "id": "fulling", + "repo": "FullAgent/fulling", + "cohort": "explicit-ai", + "ref": "d95060f8421b5888564fc8517c1fe99ded463985", + "summary": { + "fileCount": 219, + "directoryCount": 78, + "findingCount": 1, + "repoScore": 2, + "physicalLineCount": 26787, + "logicalLineCount": 12154, + "functionCount": 574, + "normalized": { + "scorePerFile": 0.0091324200913242, + "scorePerKloc": 0.1645548790521639, + "scorePerFunction": 0.003484320557491289, + "findingsPerFile": 0.0045662100456621, + "findingsPerKloc": 0.08227743952608195, + "findingsPerFunction": 0.0017421602787456446 + } + } + }, + { + "id": "astro", + "repo": "withastro/astro", + "cohort": "mature-oss", + "ref": "f7068995aa451dced13853789b0d51433c2373b5", + "summary": { + "fileCount": 1949, + "directoryCount": 852, + "findingCount": 9, + "repoScore": 20.5, + "physicalLineCount": 138854, + "logicalLineCount": 80948, + "functionCount": 3018, + "normalized": { + "scorePerFile": 0.01051821446895844, + "scorePerKloc": 0.2532489993576123, + "scorePerFunction": 0.006792577866136514, + "findingsPerFile": 0.004617752693689072, + "findingsPerKloc": 0.11118248752285419, + "findingsPerFunction": 0.002982107355864811 + } + } + }, + { + "id": "vite", + "repo": "vitejs/vite", + "cohort": "mature-oss", + "ref": "a4922537a8d705da7769d30626a0d846511fc124", + "summary": { + "fileCount": 1229, + "directoryCount": 525, + "findingCount": 3, + "repoScore": 6, + "physicalLineCount": 77629, + "logicalLineCount": 37251, + "functionCount": 1904, + "normalized": { + "scorePerFile": 0.004882017900732303, + "scorePerKloc": 0.1610695014898929, + "scorePerFunction": 0.0031512605042016808, + "findingsPerFile": 0.0024410089503661514, + "findingsPerKloc": 0.08053475074494645, + "findingsPerFunction": 0.0015756302521008404 + } + } + }, + { + "id": "hyper", + "repo": "vercel/hyper", + "cohort": "mature-oss", + "ref": "2a7bb18259d975f27b30b502af1be7576f6f5656", + "summary": { + "fileCount": 113, + "directoryCount": 21, + "findingCount": 1, + "repoScore": 8, + "physicalLineCount": 162735, + "logicalLineCount": 65075, + "functionCount": 5354, + "normalized": { + "scorePerFile": 0.07079646017699115, + "scorePerKloc": 0.12293507491356127, + "scorePerFunction": 0.0014942099364960778, + "findingsPerFile": 0.008849557522123894, + "findingsPerKloc": 0.01536688436419516, + "findingsPerFunction": 0.00018677624206200972 + } + } + }, + { + "id": "execa", + "repo": "sindresorhus/execa", + "cohort": "mature-oss", + "ref": "99d1741d2525eca71b986282148bbf2983356428", + "summary": { + "fileCount": 580, + "directoryCount": 46, + "findingCount": 1, + "repoScore": 2, + "physicalLineCount": 35995, + "logicalLineCount": 20374, + "functionCount": 1007, + "normalized": { + "scorePerFile": 0.0034482758620689655, + "scorePerKloc": 0.09816432708353785, + "scorePerFunction": 0.0019860973187686196, + "findingsPerFile": 0.0017241379310344827, + "findingsPerKloc": 0.049082163541768926, + "findingsPerFunction": 0.0009930486593843098 + } + } + }, + { + "id": "payload", + "repo": "payloadcms/payload", + "cohort": "mature-oss", + "ref": "f3f36d801010f3c95ae74655ff22a09ea66ab1ac", + "summary": { + "fileCount": 4234, + "directoryCount": 1837, + "findingCount": 3, + "repoScore": 6, + "physicalLineCount": 384327, + "logicalLineCount": 251992, + "functionCount": 3544, + "normalized": { + "scorePerFile": 0.0014170996693434106, + "scorePerKloc": 0.023810279691418777, + "scorePerFunction": 0.001693002257336343, + "findingsPerFile": 0.0007085498346717053, + "findingsPerKloc": 0.011905139845709388, + "findingsPerFunction": 0.0008465011286681715 + } + } + }, + { + "id": "tsup", + "repo": "egoist/tsup", + "cohort": "mature-oss", + "ref": "cd03e1e00ec2bd6676ae1837cbc7e618ab6a2362", + "summary": { + "fileCount": 46, + "directoryCount": 8, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 6456, + "logicalLineCount": 2668, + "functionCount": 140, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": 0, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": 0 + } + } + }, + { + "id": "node-notifier", + "repo": "mikaelbr/node-notifier", + "cohort": "mature-oss", + "ref": "b36c237f0d913f9df3a2bd45adc08b33ff717f6a", + "summary": { + "fileCount": 24, + "directoryCount": 5, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 2716, + "logicalLineCount": 2114, + "functionCount": 42, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": 0, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": 0 + } + } + }, + { + "id": "zustand", + "repo": "pmndrs/zustand", + "cohort": "mature-oss", + "ref": "2e6d8813095c6a79ca208bae4c2cf5edc12049a1", + "summary": { + "fileCount": 48, + "directoryCount": 15, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 8814, + "logicalLineCount": 7096, + "functionCount": 161, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": 0, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": 0 + } + } + }, + { + "id": "umami", + "repo": "umami-software/umami", + "cohort": "mature-oss", + "ref": "227b2554b4a373e63ceb7f48decdc60c8d3e6eaf", + "summary": { + "fileCount": 512, + "directoryCount": 87, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 29677, + "logicalLineCount": 20508, + "functionCount": 911, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": 0, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": 0 + } + } + } + ], + "rank": 1 + } + ] +} diff --git a/reports/autoresearch-candidate-rule.md b/reports/autoresearch-candidate-rule.md new file mode 100644 index 0000000..6f42a98 --- /dev/null +++ b/reports/autoresearch-candidate-rule.md @@ -0,0 +1,66 @@ +# Per-rule signal benchmark: Known AI repos vs older solid OSS repos + +Date: 2026-04-19 +Analyzer version: 0.3.0 +Manifest: `benchmarks/sets/known-ai-vs-solid-oss.json` +Summary: `benchmarks/results/autoresearch-candidate-rule.json` +Report: `reports/autoresearch-candidate-rule.md` + +## Goal + +Compare a cohort of known AI-generated JavaScript/TypeScript repos against well-regarded OSS repos, with the mature-OSS cohort pinned to the latest default-branch commit on or before 2025-01-01, using exact commit SHAs and normalized analyzer metrics. + +Signal score = average AUROC across the six normalized metrics when each rule runs in isolation against this pinned mini cohort. 1.00 means perfect AI-over-OSS separation, while 0.50 means no better than random ordering. + +## Leaderboard + +| Rank | Rule | Signal score | AI hit rate | OSS hit rate | Best metric | Best AUROC | +|---:|---|---:|---:|---:|---|---:| +| 1 | `defensive.promise-default-fallbacks` | **0.97** | 9/9 (100%) | 5/9 (56%) | score / function | 0.99 | + + +## defensive.promise-default-fallbacks + +- Rank: **#1** of 1 +- Signal score: **0.97 / 1.00** +- Family / severity / scope: `defensive` / `strong` / `file` +- Best metric: score / function (0.99) + +### Cohort summary + +| Cohort | Hit rate | Median findings | Median repo score | Median score / file | Median score / KLOC | Median findings / KLOC | +|---|---:|---:|---:|---:|---:|---:| +| explicit-ai | 9/9 (100%) | 3.00 | 10.00 | 0.11 | 1.18 | 0.32 | +| mature-oss | 5/9 (56%) | 1.00 | 2.00 | 0.00 | 0.02 | 0.01 | + +### AUROC by normalized metric + +- score / file: 0.95 +- score / KLOC: 0.98 +- score / function: 0.99 +- findings / file: 0.96 +- findings / KLOC: 0.95 +- findings / function: 0.98 + +### Repo results + +| Repo | Cohort | Ref | Findings | Repo score | Score / file | Score / KLOC | Findings / KLOC | +|---|---|---|---:|---:|---:|---:|---:| +| [jiayun/DevWorkbench](https://github.com/jiayun/DevWorkbench) | explicit-ai | `ea50862` | 1 | 8.00 | 0.25 | 2.68 | 0.33 | +| [garrytan/gstack](https://github.com/garrytan/gstack) | explicit-ai | `6cc094c` | 7 | 40.00 | 0.23 | 2.11 | 0.37 | +| [cloudflare/vinext](https://github.com/cloudflare/vinext) | explicit-ai | `28980b0` | 30 | 87.00 | 0.08 | 1.46 | 0.50 | +| [emdash-cms/emdash](https://github.com/emdash-cms/emdash) | explicit-ai | `dbaf8c6` | 39 | 144.00 | 0.13 | 1.20 | 0.32 | +| [redwoodjs/agent-ci](https://github.com/redwoodjs/agent-ci) | explicit-ai | `4de00d6` | 3 | 10.00 | 0.11 | 1.18 | 0.35 | +| [openclaw/openclaw](https://github.com/openclaw/openclaw) | explicit-ai | `44cf747` | 335 | 1140.00 | 0.11 | 1.11 | 0.32 | +| [modem-dev/hunk](https://github.com/modem-dev/hunk) | explicit-ai | `b37663f` | 3 | 10.00 | 0.06 | 0.74 | 0.22 | +| [robinebers/openusage](https://github.com/robinebers/openusage) | explicit-ai | `857f537` | 1 | 4.00 | 0.03 | 0.18 | 0.04 | +| [FullAgent/fulling](https://github.com/FullAgent/fulling) | explicit-ai | `d95060f` | 1 | 2.00 | 0.01 | 0.16 | 0.08 | +| [withastro/astro](https://github.com/withastro/astro) | mature-oss | `f706899` | 9 | 20.50 | 0.01 | 0.25 | 0.11 | +| [vitejs/vite](https://github.com/vitejs/vite) | mature-oss | `a492253` | 3 | 6.00 | 0.00 | 0.16 | 0.08 | +| [vercel/hyper](https://github.com/vercel/hyper) | mature-oss | `2a7bb18` | 1 | 8.00 | 0.07 | 0.12 | 0.02 | +| [sindresorhus/execa](https://github.com/sindresorhus/execa) | mature-oss | `99d1741` | 1 | 2.00 | 0.00 | 0.10 | 0.05 | +| [payloadcms/payload](https://github.com/payloadcms/payload) | mature-oss | `f3f36d8` | 3 | 6.00 | 0.00 | 0.02 | 0.01 | +| [egoist/tsup](https://github.com/egoist/tsup) | mature-oss | `cd03e1e` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | +| [mikaelbr/node-notifier](https://github.com/mikaelbr/node-notifier) | mature-oss | `b36c237` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | +| [pmndrs/zustand](https://github.com/pmndrs/zustand) | mature-oss | `2e6d881` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | +| [umami-software/umami](https://github.com/umami-software/umami) | mature-oss | `227b255` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | diff --git a/scripts/benchmark-experimental-rule.ts b/scripts/benchmark-experimental-rule.ts new file mode 100644 index 0000000..7be4374 --- /dev/null +++ b/scripts/benchmark-experimental-rule.ts @@ -0,0 +1,198 @@ +/** + * Benchmarks the current experimental candidate rule against the full pinned + * AI-vs-OSS cohort and emits structured `METRIC` lines for autoresearch. + */ +import { mkdir, writeFile } from "node:fs/promises"; +import path from "node:path"; +import packageJson from "../package.json"; +import { getOption } from "./lib/get-option"; +import { ensurePinnedCheckouts, readHeadRef } from "../src/benchmarks/checkouts"; +import { + createRuleSignalBenchmarkSummary, + type RuleSignalBenchmarkRun, +} from "../src/benchmarks/rule-signal"; +import { renderRuleSignalBenchmarkReport } from "../src/benchmarks/rule-signal-report"; +import { loadBenchmarkSet, resolveProjectPath } from "../src/benchmarks/manifest"; +import type { BenchmarkSet } from "../src/benchmarks/types"; +import { DEFAULT_CONFIG } from "../src/config"; +import { analyzeRepository } from "../src/core/engine"; +import { Registry } from "../src/core/registry"; +import type { AnalysisResult, RulePlugin } from "../src/core/types"; +import type { FunctionSummary } from "../src/facts/types"; +import { createDefaultRegistry } from "../src/default-registry"; +import { promiseDefaultFallbacksRule } from "../src/rules/promise-default-fallbacks"; + +const DEFAULT_MANIFEST_PATH = path.resolve( + process.cwd(), + "benchmarks/sets/known-ai-vs-solid-oss.json", +); +const DEFAULT_SUMMARY_PATH = "benchmarks/results/autoresearch-candidate-rule.json"; +const DEFAULT_REPORT_PATH = "reports/autoresearch-candidate-rule.md"; +const BENCHMARK_FUNCTION_COUNT_FACT = "file.functionSummaries"; +const BASE_FACT_IDS = new Set([ + "file.record", + "file.text", + "file.lineCount", + "file.logicalLineCount", + "directory.record", + "repo.files", + "repo.directories", +]); + +function createStandaloneRuleBenchmarkRegistry(baseRegistry: Registry, rule: RulePlugin): Registry { + const providerByFact = new Map[number]>(); + for (const provider of baseRegistry.getFactProviders()) { + for (const factId of provider.provides) { + if (!providerByFact.has(factId)) { + providerByFact.set(factId, provider); + } + } + } + + const requiredProviderIds = new Set(); + const visitedFacts = new Set(); + + const requireFact = (factId: string): void => { + if (visitedFacts.has(factId) || BASE_FACT_IDS.has(factId)) { + return; + } + + visitedFacts.add(factId); + const provider = providerByFact.get(factId); + if (!provider) { + throw new Error(`No fact provider produces required fact ${factId} for rule ${rule.id}`); + } + + requiredProviderIds.add(provider.id); + for (const dependency of provider.requires) { + requireFact(dependency); + } + }; + + for (const factId of [...rule.requires, BENCHMARK_FUNCTION_COUNT_FACT]) { + requireFact(factId); + } + + const registry = new Registry(); + for (const language of baseRegistry.getLanguages()) { + registry.registerLanguage(language); + } + + for (const provider of baseRegistry.getFactProviders()) { + if (requiredProviderIds.has(provider.id)) { + registry.registerFactProvider(provider); + } + } + + registry.registerRule(rule); + return registry; +} + +function divideOrNull(numerator: number, denominator: number): number | null { + return denominator > 0 ? numerator / denominator : null; +} + +async function analyzeRepositoryWithFunctionCount( + rootDir: string, + registry: Registry, +): Promise { + let functionCount = 0; + + const result = await analyzeRepository(rootDir, DEFAULT_CONFIG, registry, { + hooks: { + onFileAnalyzed(file, store) { + functionCount += + store.getFileFact(file.path, BENCHMARK_FUNCTION_COUNT_FACT)?.length ?? + 0; + }, + }, + }); + + return { + ...result, + summary: { + ...result.summary, + functionCount, + normalized: { + ...result.summary.normalized, + scorePerFunction: divideOrNull(result.summary.repoScore, functionCount), + findingsPerFunction: divideOrNull(result.summary.findingCount, functionCount), + }, + }, + }; +} + +async function analyzeRuleAcrossSet( + rule: RulePlugin, + checkoutsDir: string, + benchmarkSet: BenchmarkSet, +): Promise { + const baseRegistry = createDefaultRegistry(); + const registry = createStandaloneRuleBenchmarkRegistry(baseRegistry, rule); + const analyses = []; + + for (const repo of benchmarkSet.repos) { + const checkoutPath = path.join(checkoutsDir, repo.id); + const actualRef = readHeadRef(checkoutPath); + if (actualRef !== repo.ref) { + throw new Error(`Pinned ref mismatch for ${repo.id}: expected ${repo.ref}, got ${actualRef}`); + } + + console.log(`scanning ${repo.id} @ ${actualRef.slice(0, 7)}`); + const result = await analyzeRepositoryWithFunctionCount(checkoutPath, registry); + analyses.push({ spec: repo, result }); + } + + return { + rule: { + id: rule.id, + family: rule.family, + severity: rule.severity, + scope: rule.scope, + requires: [...rule.requires], + }, + analyses, + }; +} + +function printMetric(name: string, value: number | null): void { + console.log(`METRIC ${name}=${value ?? 0}`); +} + +async function main(): Promise { + const manifestPath = getOption(process.argv.slice(2), "--manifest", DEFAULT_MANIFEST_PATH); + const summaryPathRelative = getOption(process.argv.slice(2), "--summary", DEFAULT_SUMMARY_PATH); + const reportPathRelative = getOption(process.argv.slice(2), "--report", DEFAULT_REPORT_PATH); + const benchmarkSet = await loadBenchmarkSet(manifestPath); + const checkoutsDir = resolveProjectPath(benchmarkSet.artifacts.checkoutsDir); + const summaryPath = resolveProjectPath(summaryPathRelative); + const reportPath = resolveProjectPath(reportPathRelative); + + await ensurePinnedCheckouts(checkoutsDir, benchmarkSet.repos); + + const run = await analyzeRuleAcrossSet(promiseDefaultFallbacksRule, checkoutsDir, benchmarkSet); + const summary = createRuleSignalBenchmarkSummary(benchmarkSet, [run], packageJson.version, { + manifestPath: path.relative(process.cwd(), manifestPath), + summaryPath: summaryPathRelative, + reportPath: reportPathRelative, + }); + const report = renderRuleSignalBenchmarkReport(benchmarkSet, summary); + const rule = summary.rules[0]; + if (!rule) { + throw new Error("Expected experimental benchmark summary to contain one rule."); + } + + await mkdir(path.dirname(summaryPath), { recursive: true }); + await writeFile(summaryPath, `${JSON.stringify(summary, null, 2)}\n`); + await mkdir(path.dirname(reportPath), { recursive: true }); + await writeFile(reportPath, `${report}\n`); + + console.log(`Wrote experimental summary to ${summaryPath}`); + console.log(`Wrote experimental report to ${reportPath}`); + printMetric("signal_score", rule.signalScore); + printMetric("best_metric_auc", rule.bestMetricAuc); + printMetric("ai_hit_rate", rule.cohorts["explicit-ai"].hitRate); + printMetric("oss_hit_rate", rule.cohorts["mature-oss"].hitRate); +} + +await main(); diff --git a/src/rules/promise-default-fallbacks/index.ts b/src/rules/promise-default-fallbacks/index.ts new file mode 100644 index 0000000..d4df16a --- /dev/null +++ b/src/rules/promise-default-fallbacks/index.ts @@ -0,0 +1,151 @@ +/** + * Flags promise `.catch()` handlers that convert a rejected async path into a + * cheap fallback value or an implicit `undefined`. + * + * This is intentionally narrower than the existing try/catch rules: it focuses + * on promise-chain catch callbacks that quietly coerce failures into `null`, + * `undefined`, `false`, `0`, `""`, `[]`, `{}`, or an empty handler body. That + * pattern keeps control flow moving while hiding the original rejection. + */ +import * as ts from "typescript"; +import type { RulePlugin } from "../../core/types"; +import { + getLineNumber, + isDefaultLiteral, + isLoggingCall, + unwrapExpression, + walk, +} from "../../facts/ts-helpers"; +import { delta } from "../../rule-delta"; + +type PromiseDefaultFallbackMatch = { + line: number; + kind: "default-return" | "empty-handler" | "log+default"; +}; + +function isCatchCall(node: ts.CallExpression): boolean { + return ts.isPropertyAccessExpression(node.expression) && node.expression.name.text === "catch"; +} + +function getCatchHandler(node: ts.CallExpression): ts.ArrowFunction | ts.FunctionExpression | null { + const [handler] = node.arguments; + if (!handler) { + return null; + } + + return ts.isArrowFunction(handler) || ts.isFunctionExpression(handler) ? handler : null; +} + +function statementIsLogging(statement: ts.Statement): boolean { + return ts.isExpressionStatement(statement) && isLoggingCall(statement.expression); +} + +function summarizeCatchHandler( + handler: ts.ArrowFunction | ts.FunctionExpression, + sourceFile: ts.SourceFile, +): PromiseDefaultFallbackMatch | null { + if (ts.isBlock(handler.body)) { + const statements = handler.body.statements; + if (statements.length === 0) { + return { + line: getLineNumber(sourceFile, handler.getStart(sourceFile)), + kind: "empty-handler", + }; + } + + const returnStatements = statements.filter(ts.isReturnStatement); + + if (returnStatements.length !== 1) { + return null; + } + + const [returnStatement] = returnStatements; + if (!returnStatement || !isDefaultLiteral(returnStatement.expression)) { + return null; + } + + const hasOnlyLoggingAndReturn = statements.every( + (statement) => statement === returnStatement || statementIsLogging(statement), + ); + if (!hasOnlyLoggingAndReturn) { + return null; + } + + return { + line: getLineNumber(sourceFile, handler.getStart(sourceFile)), + kind: statements.some(statementIsLogging) ? "log+default" : "default-return", + }; + } + + return isDefaultLiteral(unwrapExpression(handler.body)) + ? { + line: getLineNumber(sourceFile, handler.getStart(sourceFile)), + kind: "default-return", + } + : null; +} + +function findPromiseDefaultFallbacks(sourceFile: ts.SourceFile): PromiseDefaultFallbackMatch[] { + const matches: PromiseDefaultFallbackMatch[] = []; + + walk(sourceFile, (node) => { + if (!ts.isCallExpression(node) || !isCatchCall(node)) { + return; + } + + const handler = getCatchHandler(node); + if (!handler) { + return; + } + + const match = summarizeCatchHandler(handler, sourceFile); + if (match) { + matches.push(match); + } + }); + + return matches; +} + +export const promiseDefaultFallbacksRule: RulePlugin = { + id: "defensive.promise-default-fallbacks", + family: "defensive", + severity: "strong", + scope: "file", + requires: ["file.ast"], + delta: delta.byLocations(), + supports(context) { + return context.scope === "file" && Boolean(context.file); + }, + evaluate(context) { + const sourceFile = context.runtime.store.getFileFact( + context.file!.path, + "file.ast", + ); + if (!sourceFile) { + return []; + } + + const matches = findPromiseDefaultFallbacks(sourceFile); + if (matches.length === 0) { + return []; + } + + return [ + { + ruleId: "defensive.promise-default-fallbacks", + family: "defensive", + severity: "strong", + scope: "file", + path: context.file!.path, + message: `Found ${matches.length} promise catch handler${matches.length === 1 ? "" : "s"} that suppress rejections with cheap fallbacks`, + evidence: matches.map((match) => `line ${match.line}: ${match.kind}`), + score: Math.min( + 8, + matches.reduce((total, match) => total + (match.kind === "log+default" ? 2.5 : 2), 0), + ), + locations: matches.map((match) => ({ path: context.file!.path, line: match.line })), + }, + ]; + }, +}; diff --git a/tests/promise-default-fallbacks.test.ts b/tests/promise-default-fallbacks.test.ts new file mode 100644 index 0000000..651f0a9 --- /dev/null +++ b/tests/promise-default-fallbacks.test.ts @@ -0,0 +1,101 @@ +import { afterEach, describe, expect, test } from "bun:test"; +import { mkdtemp, mkdir, rm, writeFile } from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import { DEFAULT_CONFIG } from "../src/config"; +import { analyzeRepository } from "../src/core/engine"; +import { Registry } from "../src/core/registry"; +import { createDefaultRegistry } from "../src/default-registry"; +import { promiseDefaultFallbacksRule } from "../src/rules/promise-default-fallbacks"; + +const tempDirs: string[] = []; + +afterEach(async () => { + await Promise.all(tempDirs.splice(0).map((dir) => rm(dir, { recursive: true, force: true }))); +}); + +async function writeRepoFiles(rootDir: string, files: Record): Promise { + for (const [relativePath, content] of Object.entries(files)) { + const absolutePath = path.join(rootDir, relativePath); + await mkdir(path.dirname(absolutePath), { recursive: true }); + await writeFile(absolutePath, content); + } +} + +async function createTempRepo(files: Record): Promise { + const rootDir = await mkdtemp(path.join(os.tmpdir(), "slop-scan-promise-defaults-")); + tempDirs.push(rootDir); + await writeRepoFiles(rootDir, files); + return rootDir; +} + +function createCandidateRegistry(): Registry { + const baseRegistry = createDefaultRegistry(); + const registry = new Registry(); + + for (const language of baseRegistry.getLanguages()) { + registry.registerLanguage(language); + } + + for (const provider of baseRegistry.getFactProviders()) { + registry.registerFactProvider(provider); + } + + registry.registerRule(promiseDefaultFallbacksRule); + return registry; +} + +describe("promise-default-fallbacks rule", () => { + test("flags promise catch handlers that return default literals", async () => { + const rootDir = await createTempRepo({ + "src/slop.ts": [ + "export async function loadConfig() {", + " return readConfig().catch(() => null);", + "}", + "", + "export async function copyFromClipboard() {", + " return navigator.clipboard.readText().catch(() => {});", + "}", + "", + "export async function loadFeatureFlag() {", + " return fetchFlag().catch((error) => {", + ' console.error("flag load failed", error);', + " return false;", + " });", + "}", + "", + ].join("\n"), + "src/legit.ts": [ + "export async function loadRequiredConfig() {", + " return readConfig().catch((error) => {", + " throw error;", + " });", + "}", + "", + "export async function loadShape() {", + " return readConfig().catch(() => ({ ok: false, reason: 'missing' }));", + "}", + "", + ].join("\n"), + }); + + const result = await analyzeRepository(rootDir, DEFAULT_CONFIG, createCandidateRegistry()); + const finding = result.findings.find( + (nextFinding) => nextFinding.ruleId === "defensive.promise-default-fallbacks", + ); + + expect(finding).toBeDefined(); + expect(finding?.path).toBe("src/slop.ts"); + expect(finding?.evidence).toEqual([ + "line 2: default-return", + "line 6: empty-handler", + "line 10: log+default", + ]); + expect(finding?.locations).toEqual([ + { path: "src/slop.ts", line: 2 }, + { path: "src/slop.ts", line: 6 }, + { path: "src/slop.ts", line: 10 }, + ]); + expect(result.findings).toHaveLength(1); + }); +}); From ff717f21d705a761016b693acdeff66639db2b42 Mon Sep 17 00:00:00 2001 From: Ben Vinegar Date: Sun, 19 Apr 2026 10:00:44 -0400 Subject: [PATCH 3/4] Refine promise-catch signal rule --- autoresearch.md | 1 + .../results/autoresearch-candidate-rule.json | 76 +++++++++---------- reports/autoresearch-candidate-rule.md | 14 ++-- src/rules/promise-default-fallbacks/index.ts | 8 ++ tests/promise-default-fallbacks.test.ts | 16 ++++ 5 files changed, 70 insertions(+), 45 deletions(-) diff --git a/autoresearch.md b/autoresearch.md index 3db9769..b482905 100644 --- a/autoresearch.md +++ b/autoresearch.md @@ -56,4 +56,5 @@ That script runs `scripts/benchmark-experimental-rule.ts` against the full pinne - Initial corpus mining across the full pinned benchmark suggested that **promise `.catch()` handlers returning sentinel defaults** (`null`, `undefined`, `false`, `0`, `""`, `[]`, `{}`) are a promising signal. The first regex-based proxy over the full set showed roughly **0.89–0.91 AUROC** across the normalized metrics, but that proxy turned out to blur together true default returns and empty `() => {}` handlers. - First honest AST baseline: `defensive.promise-default-fallbacks` scored **0.663** on the full set. It cleanly catches explicit default returns and log+default handlers, but it misses several high-signal empty promise-catch handlers in AI repos such as DevWorkbench, openusage, and agent-ci. - The isolated benchmark harness also needed a function-count fix: isolated registries were computing `file.functionSummaries`, but the engine was dropping that fact before summary time. `scripts/benchmark-experimental-rule.ts` now has to recover function counts through `analyzeRepository` hooks so `scorePerFunction` and `findingsPerFunction` stay honest. +- Refinement win: skipping giant bundled/generated files over **5k logical lines** improved the honest full-set score from **0.967** to **0.977** by removing Hyper's vendored `yarn-standalone.js` noise without hurting the AI cohort. - Textual comment mining surfaced fallback-comment phrases (for example `fall through to` / `fall back to`) and formulaic doc-comment prefixes, but those look easier to overfit and are currently lower priority than the promise-catch idea. diff --git a/benchmarks/results/autoresearch-candidate-rule.json b/benchmarks/results/autoresearch-candidate-rule.json index a9eaa4d..b7cdae7 100644 --- a/benchmarks/results/autoresearch-candidate-rule.json +++ b/benchmarks/results/autoresearch-candidate-rule.json @@ -2,7 +2,7 @@ "schemaVersion": 1, "benchmarkSetId": "known-ai-vs-solid-oss", "benchmarkSetName": "Known AI repos vs older solid OSS repos", - "generatedAt": "2026-04-19T13:55:19.897Z", + "generatedAt": "2026-04-19T14:00:03.107Z", "analyzerVersion": "0.3.0", "artifacts": { "manifestPath": "benchmarks/sets/known-ai-vs-solid-oss.json", @@ -19,14 +19,14 @@ "requires": [ "file.ast" ], - "signalScore": 0.9670781893004116, - "bestMetric": "scorePerFunction", + "signalScore": 0.9773662551440329, + "bestMetric": "findingsPerFile", "bestMetricAuc": 0.9876543209876543, "metricAucs": { - "scorePerFile": 0.9506172839506173, + "scorePerFile": 0.9876543209876543, "scorePerKloc": 0.9753086419753086, "scorePerFunction": 0.9876543209876543, - "findingsPerFile": 0.9629629629629629, + "findingsPerFile": 0.9876543209876543, "findingsPerKloc": 0.9506172839506173, "findingsPerFunction": 0.9753086419753086 }, @@ -48,17 +48,17 @@ }, "mature-oss": { "repoCount": 9, - "hitCount": 5, - "hitRate": 0.5555555555555556, - "repoScoreMedian": 2, - "findingCountMedian": 1, + "hitCount": 4, + "hitRate": 0.4444444444444444, + "repoScoreMedian": 0, + "findingCountMedian": 0, "medians": { - "scorePerFile": 0.0014170996693434106, - "scorePerKloc": 0.023810279691418777, - "scorePerFunction": 0.0014942099364960778, - "findingsPerFile": 0.0007085498346717053, - "findingsPerKloc": 0.011905139845709388, - "findingsPerFunction": 0.00018677624206200972 + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": 0, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": 0 } } }, @@ -316,29 +316,6 @@ } } }, - { - "id": "hyper", - "repo": "vercel/hyper", - "cohort": "mature-oss", - "ref": "2a7bb18259d975f27b30b502af1be7576f6f5656", - "summary": { - "fileCount": 113, - "directoryCount": 21, - "findingCount": 1, - "repoScore": 8, - "physicalLineCount": 162735, - "logicalLineCount": 65075, - "functionCount": 5354, - "normalized": { - "scorePerFile": 0.07079646017699115, - "scorePerKloc": 0.12293507491356127, - "scorePerFunction": 0.0014942099364960778, - "findingsPerFile": 0.008849557522123894, - "findingsPerKloc": 0.01536688436419516, - "findingsPerFunction": 0.00018677624206200972 - } - } - }, { "id": "execa", "repo": "sindresorhus/execa", @@ -476,6 +453,29 @@ "findingsPerFunction": 0 } } + }, + { + "id": "hyper", + "repo": "vercel/hyper", + "cohort": "mature-oss", + "ref": "2a7bb18259d975f27b30b502af1be7576f6f5656", + "summary": { + "fileCount": 113, + "directoryCount": 21, + "findingCount": 0, + "repoScore": 0, + "physicalLineCount": 162735, + "logicalLineCount": 65075, + "functionCount": 5354, + "normalized": { + "scorePerFile": 0, + "scorePerKloc": 0, + "scorePerFunction": 0, + "findingsPerFile": 0, + "findingsPerKloc": 0, + "findingsPerFunction": 0 + } + } } ], "rank": 1 diff --git a/reports/autoresearch-candidate-rule.md b/reports/autoresearch-candidate-rule.md index 6f42a98..8351ce1 100644 --- a/reports/autoresearch-candidate-rule.md +++ b/reports/autoresearch-candidate-rule.md @@ -16,29 +16,29 @@ Signal score = average AUROC across the six normalized metrics when each rule ru | Rank | Rule | Signal score | AI hit rate | OSS hit rate | Best metric | Best AUROC | |---:|---|---:|---:|---:|---|---:| -| 1 | `defensive.promise-default-fallbacks` | **0.97** | 9/9 (100%) | 5/9 (56%) | score / function | 0.99 | +| 1 | `defensive.promise-default-fallbacks` | **0.98** | 9/9 (100%) | 4/9 (44%) | findings / file | 0.99 | ## defensive.promise-default-fallbacks - Rank: **#1** of 1 -- Signal score: **0.97 / 1.00** +- Signal score: **0.98 / 1.00** - Family / severity / scope: `defensive` / `strong` / `file` -- Best metric: score / function (0.99) +- Best metric: findings / file (0.99) ### Cohort summary | Cohort | Hit rate | Median findings | Median repo score | Median score / file | Median score / KLOC | Median findings / KLOC | |---|---:|---:|---:|---:|---:|---:| | explicit-ai | 9/9 (100%) | 3.00 | 10.00 | 0.11 | 1.18 | 0.32 | -| mature-oss | 5/9 (56%) | 1.00 | 2.00 | 0.00 | 0.02 | 0.01 | +| mature-oss | 4/9 (44%) | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | ### AUROC by normalized metric -- score / file: 0.95 +- score / file: 0.99 - score / KLOC: 0.98 - score / function: 0.99 -- findings / file: 0.96 +- findings / file: 0.99 - findings / KLOC: 0.95 - findings / function: 0.98 @@ -57,10 +57,10 @@ Signal score = average AUROC across the six normalized metrics when each rule ru | [FullAgent/fulling](https://github.com/FullAgent/fulling) | explicit-ai | `d95060f` | 1 | 2.00 | 0.01 | 0.16 | 0.08 | | [withastro/astro](https://github.com/withastro/astro) | mature-oss | `f706899` | 9 | 20.50 | 0.01 | 0.25 | 0.11 | | [vitejs/vite](https://github.com/vitejs/vite) | mature-oss | `a492253` | 3 | 6.00 | 0.00 | 0.16 | 0.08 | -| [vercel/hyper](https://github.com/vercel/hyper) | mature-oss | `2a7bb18` | 1 | 8.00 | 0.07 | 0.12 | 0.02 | | [sindresorhus/execa](https://github.com/sindresorhus/execa) | mature-oss | `99d1741` | 1 | 2.00 | 0.00 | 0.10 | 0.05 | | [payloadcms/payload](https://github.com/payloadcms/payload) | mature-oss | `f3f36d8` | 3 | 6.00 | 0.00 | 0.02 | 0.01 | | [egoist/tsup](https://github.com/egoist/tsup) | mature-oss | `cd03e1e` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | | [mikaelbr/node-notifier](https://github.com/mikaelbr/node-notifier) | mature-oss | `b36c237` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | | [pmndrs/zustand](https://github.com/pmndrs/zustand) | mature-oss | `2e6d881` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | | [umami-software/umami](https://github.com/umami-software/umami) | mature-oss | `227b255` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | +| [vercel/hyper](https://github.com/vercel/hyper) | mature-oss | `2a7bb18` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | diff --git a/src/rules/promise-default-fallbacks/index.ts b/src/rules/promise-default-fallbacks/index.ts index d4df16a..5a916b2 100644 --- a/src/rules/promise-default-fallbacks/index.ts +++ b/src/rules/promise-default-fallbacks/index.ts @@ -18,6 +18,8 @@ import { } from "../../facts/ts-helpers"; import { delta } from "../../rule-delta"; +const MAX_LOGICAL_LINES = 5000; + type PromiseDefaultFallbackMatch = { line: number; kind: "default-return" | "empty-handler" | "log+default"; @@ -118,6 +120,12 @@ export const promiseDefaultFallbacksRule: RulePlugin = { return context.scope === "file" && Boolean(context.file); }, evaluate(context) { + // Huge bundled/generated files are noisy outliers for this heuristic and can + // otherwise let one vendored blob dominate a repo-level signal. + if (context.file!.logicalLineCount > MAX_LOGICAL_LINES) { + return []; + } + const sourceFile = context.runtime.store.getFileFact( context.file!.path, "file.ast", diff --git a/tests/promise-default-fallbacks.test.ts b/tests/promise-default-fallbacks.test.ts index 651f0a9..0ae06cf 100644 --- a/tests/promise-default-fallbacks.test.ts +++ b/tests/promise-default-fallbacks.test.ts @@ -98,4 +98,20 @@ describe("promise-default-fallbacks rule", () => { ]); expect(result.findings).toHaveLength(1); }); + + test("ignores giant bundled files that would otherwise create vendor noise", async () => { + const hugeFile = [ + ...Array.from({ length: 5001 }, (_, index) => `export const filler${index} = ${index};`), + "Promise.resolve('x').catch(() => {});", + "", + ].join("\n"); + + const rootDir = await createTempRepo({ + "src/bundle.ts": hugeFile, + }); + + const result = await analyzeRepository(rootDir, DEFAULT_CONFIG, createCandidateRegistry()); + + expect(result.findings).toHaveLength(0); + }); }); From 8a13417292c77d6b18bea0174e6df5acc9df7f9e Mon Sep 17 00:00:00 2001 From: Ben Vinegar Date: Sun, 19 Apr 2026 10:21:31 -0400 Subject: [PATCH 4/4] Add promise default fallback rule --- .gitignore | 7 + README.md | 1 + autoresearch.checks.sh | 15 - autoresearch.ideas.md | 3 - autoresearch.md | 60 --- autoresearch.sh | 4 - .../results/autoresearch-candidate-rule.json | 484 ------------------ reports/autoresearch-candidate-rule.md | 66 --- scripts/benchmark-experimental-rule.ts | 198 ------- src/default-registry.ts | 2 + src/rules/promise-default-fallbacks/README.md | 68 +++ tests/heuristics.test.ts | 5 + 12 files changed, 83 insertions(+), 830 deletions(-) delete mode 100755 autoresearch.checks.sh delete mode 100644 autoresearch.ideas.md delete mode 100644 autoresearch.md delete mode 100755 autoresearch.sh delete mode 100644 benchmarks/results/autoresearch-candidate-rule.json delete mode 100644 reports/autoresearch-candidate-rule.md delete mode 100644 scripts/benchmark-experimental-rule.ts create mode 100644 src/rules/promise-default-fallbacks/README.md diff --git a/.gitignore b/.gitignore index 476f64a..5b01d92 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,10 @@ node_modules/ dist/ benchmarks/.cache/ +autoresearch.md +autoresearch.sh +autoresearch.checks.sh +autoresearch.ideas.md +benchmarks/results/autoresearch-candidate-rule.json +reports/autoresearch-candidate-rule.md +scripts/benchmark-experimental-rule.ts diff --git a/README.md b/README.md index afff110..bcb7b03 100644 --- a/README.md +++ b/README.md @@ -134,6 +134,7 @@ Current checks focus on patterns that often show up in unreviewed generated code - [log-and-continue catch blocks](src/rules/error-swallowing/README.md) - [error-obscuring catch blocks](src/rules/error-obscuring/README.md) (default-return or generic replacement error) - [empty catch blocks](src/rules/empty-catch/README.md) +- [promise `.catch()` default fallbacks](src/rules/promise-default-fallbacks/README.md) - [async wrapper / `return await` noise](src/rules/async-noise/README.md) - [pass-through wrappers](src/rules/pass-through-wrappers/README.md) - [barrel density](src/rules/barrel-density/README.md) diff --git a/autoresearch.checks.sh b/autoresearch.checks.sh deleted file mode 100755 index d85f263..0000000 --- a/autoresearch.checks.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -tmpdir="$(mktemp -d)" -trap 'rm -rf "$tmpdir"' EXIT - -bun run format:check >"$tmpdir/format.log" 2>&1 || { - tail -50 "$tmpdir/format.log" - exit 1 -} - -bun test tests/promise-default-fallbacks.test.ts tests/rule-signal-benchmark.test.ts >"$tmpdir/tests.log" 2>&1 || { - tail -80 "$tmpdir/tests.log" - exit 1 -} diff --git a/autoresearch.ideas.md b/autoresearch.ideas.md deleted file mode 100644 index 3c0489c..0000000 --- a/autoresearch.ideas.md +++ /dev/null @@ -1,3 +0,0 @@ -- Investigate a lower-overfit comment rule around formulaic doc-comments that restate function names with generic imperative verbs (`build`, `create`, `resolve`, `detect`, `check whether`). -- Investigate whether promise `.catch()` handlers that only log and implicitly return `undefined` form a strong separate rule or are too close to existing swallowing/obscuring rules. -- Investigate fallback-comment patterns (`fall through to`, `fall back to`) only if the AST-based promise-catch idea stalls; treat this as higher overfit risk. diff --git a/autoresearch.md b/autoresearch.md deleted file mode 100644 index b482905..0000000 --- a/autoresearch.md +++ /dev/null @@ -1,60 +0,0 @@ -# Autoresearch: new strong AI-signal rule on the full pinned benchmark - -## Objective - -Find a **new** `slop-scan` rule that is not just a restatement of an existing built-in rule and that scores **> 0.80 signal_score** on the full pinned `known-ai-vs-solid-oss` benchmark set. - -The rule should generalize as a real slop/code-quality smell, not as benchmark-specific trivia. We may use the full pinned benchmark set to evaluate candidate rules, but we must not hardcode repo ids, repo-specific strings, manifest membership, or benchmark-only exceptions. - -## Metrics - -- **Primary**: `signal_score` (unitless, higher is better) — average AUROC across the rule-signal normalized metrics on the full pinned benchmark set. -- **Secondary**: - - `best_metric_auc` - - `ai_hit_rate` - - `oss_hit_rate` - -## How to Run - -`./autoresearch.sh` - -That script runs `scripts/benchmark-experimental-rule.ts` against the full pinned benchmark cohort and writes: - -- `benchmarks/results/autoresearch-candidate-rule.json` -- `reports/autoresearch-candidate-rule.md` - -## Files in Scope - -- `src/rules/promise-default-fallbacks/index.ts` — current experimental candidate rule. -- `scripts/benchmark-experimental-rule.ts` — isolated benchmark runner for the candidate rule on the full pinned set. -- `tests/promise-default-fallbacks.test.ts` — focused behavioral coverage for the candidate rule. -- `src/facts/ts-helpers.ts` — only if the rule needs shared AST helpers. -- `src/benchmarks/rule-signal.ts` — only if benchmark summary math or isolated-rule wiring truly needs adjustment. -- `autoresearch.md` -- `autoresearch.sh` -- `autoresearch.checks.sh` -- `autoresearch.ideas.md` - -## Off Limits - -- `benchmarks/sets/known-ai-vs-solid-oss.json` repo membership, refs, provenance, or pairings. -- `benchmarks/.cache/**` pinned checkout contents. -- Hardcoding repo names, paths, benchmark fixture strings, or cohort-specific allow/deny lists into the rule. -- Editing existing built-in rules just to make the candidate look more unique. - -## Constraints - -- Do not cheat on the benchmark. -- Do not knowingly overfit to a single repo or a single weird generated subtree. -- Prefer explainable AST/code-smell rules over raw string matching. -- Keep the candidate distinct from existing rules such as `error-swallowing`, `error-obscuring`, `empty-catch`, `pass-through-wrappers`, and `placeholder-comments`. -- If the candidate changes analyzer behavior materially, keep focused tests passing. -- Generated benchmark artifacts are allowed for inspection, but benchmark manifests and pinned refs must stay fixed. - -## What's Been Tried - -- Initial corpus mining across the full pinned benchmark suggested that **promise `.catch()` handlers returning sentinel defaults** (`null`, `undefined`, `false`, `0`, `""`, `[]`, `{}`) are a promising signal. The first regex-based proxy over the full set showed roughly **0.89–0.91 AUROC** across the normalized metrics, but that proxy turned out to blur together true default returns and empty `() => {}` handlers. -- First honest AST baseline: `defensive.promise-default-fallbacks` scored **0.663** on the full set. It cleanly catches explicit default returns and log+default handlers, but it misses several high-signal empty promise-catch handlers in AI repos such as DevWorkbench, openusage, and agent-ci. -- The isolated benchmark harness also needed a function-count fix: isolated registries were computing `file.functionSummaries`, but the engine was dropping that fact before summary time. `scripts/benchmark-experimental-rule.ts` now has to recover function counts through `analyzeRepository` hooks so `scorePerFunction` and `findingsPerFunction` stay honest. -- Refinement win: skipping giant bundled/generated files over **5k logical lines** improved the honest full-set score from **0.967** to **0.977** by removing Hyper's vendored `yarn-standalone.js` noise without hurting the AI cohort. -- Textual comment mining surfaced fallback-comment phrases (for example `fall through to` / `fall back to`) and formulaic doc-comment prefixes, but those look easier to overfit and are currently lower priority than the promise-catch idea. diff --git a/autoresearch.sh b/autoresearch.sh deleted file mode 100755 index cf807f1..0000000 --- a/autoresearch.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -bun run scripts/benchmark-experimental-rule.ts diff --git a/benchmarks/results/autoresearch-candidate-rule.json b/benchmarks/results/autoresearch-candidate-rule.json deleted file mode 100644 index b7cdae7..0000000 --- a/benchmarks/results/autoresearch-candidate-rule.json +++ /dev/null @@ -1,484 +0,0 @@ -{ - "schemaVersion": 1, - "benchmarkSetId": "known-ai-vs-solid-oss", - "benchmarkSetName": "Known AI repos vs older solid OSS repos", - "generatedAt": "2026-04-19T14:00:03.107Z", - "analyzerVersion": "0.3.0", - "artifacts": { - "manifestPath": "benchmarks/sets/known-ai-vs-solid-oss.json", - "summaryPath": "benchmarks/results/autoresearch-candidate-rule.json", - "reportPath": "reports/autoresearch-candidate-rule.md" - }, - "rules": [ - { - "ruleId": "defensive.promise-default-fallbacks", - "ruleSlug": "promise-default-fallbacks", - "family": "defensive", - "severity": "strong", - "scope": "file", - "requires": [ - "file.ast" - ], - "signalScore": 0.9773662551440329, - "bestMetric": "findingsPerFile", - "bestMetricAuc": 0.9876543209876543, - "metricAucs": { - "scorePerFile": 0.9876543209876543, - "scorePerKloc": 0.9753086419753086, - "scorePerFunction": 0.9876543209876543, - "findingsPerFile": 0.9876543209876543, - "findingsPerKloc": 0.9506172839506173, - "findingsPerFunction": 0.9753086419753086 - }, - "cohorts": { - "explicit-ai": { - "repoCount": 9, - "hitCount": 9, - "hitRate": 1, - "repoScoreMedian": 10, - "findingCountMedian": 3, - "medians": { - "scorePerFile": 0.10638297872340426, - "scorePerKloc": 1.180080245456691, - "scorePerFunction": 0.02982516283853274, - "findingsPerFile": 0.03125, - "findingsPerKloc": 0.3247984068395757, - "findingsPerFunction": 0.008302765936353722 - } - }, - "mature-oss": { - "repoCount": 9, - "hitCount": 4, - "hitRate": 0.4444444444444444, - "repoScoreMedian": 0, - "findingCountMedian": 0, - "medians": { - "scorePerFile": 0, - "scorePerKloc": 0, - "scorePerFunction": 0, - "findingsPerFile": 0, - "findingsPerKloc": 0, - "findingsPerFunction": 0 - } - } - }, - "repos": [ - { - "id": "devworkbench", - "repo": "jiayun/DevWorkbench", - "cohort": "explicit-ai", - "ref": "ea50862107c377b0cdd6e508073ad0330b367576", - "summary": { - "fileCount": 32, - "directoryCount": 7, - "findingCount": 1, - "repoScore": 8, - "physicalLineCount": 9408, - "logicalLineCount": 2986, - "functionCount": 147, - "normalized": { - "scorePerFile": 0.25, - "scorePerKloc": 2.6791694574681846, - "scorePerFunction": 0.05442176870748299, - "findingsPerFile": 0.03125, - "findingsPerKloc": 0.3348961821835231, - "findingsPerFunction": 0.006802721088435374 - } - } - }, - { - "id": "gstack", - "repo": "garrytan/gstack", - "cohort": "explicit-ai", - "ref": "6cc094cd4190e3fac65b69fb29e0c3cae7e0f214", - "summary": { - "fileCount": 176, - "directoryCount": 19, - "findingCount": 7, - "repoScore": 40, - "physicalLineCount": 62616, - "logicalLineCount": 18958, - "functionCount": 832, - "normalized": { - "scorePerFile": 0.22727272727272727, - "scorePerKloc": 2.109927207511341, - "scorePerFunction": 0.04807692307692308, - "findingsPerFile": 0.03977272727272727, - "findingsPerKloc": 0.3692372613144847, - "findingsPerFunction": 0.008413461538461538 - } - } - }, - { - "id": "vinext", - "repo": "cloudflare/vinext", - "cohort": "explicit-ai", - "ref": "28980b01f1692b47756a6768fc5733712c290f84", - "summary": { - "fileCount": 1129, - "directoryCount": 486, - "findingCount": 30, - "repoScore": 87, - "physicalLineCount": 150365, - "logicalLineCount": 59523, - "functionCount": 2917, - "normalized": { - "scorePerFile": 0.07705934455270151, - "scorePerKloc": 1.461619878030341, - "scorePerFunction": 0.02982516283853274, - "findingsPerFile": 0.026572187776793623, - "findingsPerKloc": 0.5040068544932211, - "findingsPerFunction": 0.010284538909838875 - } - } - }, - { - "id": "emdash", - "repo": "emdash-cms/emdash", - "cohort": "explicit-ai", - "ref": "dbaf8c6f857456b6a449e68d26c5000c1143fadb", - "summary": { - "fileCount": 1072, - "directoryCount": 306, - "findingCount": 39, - "repoScore": 144, - "physicalLineCount": 257938, - "logicalLineCount": 120432, - "functionCount": 3513, - "normalized": { - "scorePerFile": 0.13432835820895522, - "scorePerKloc": 1.1956954962136308, - "scorePerFunction": 0.04099060631938514, - "findingsPerFile": 0.036380597014925374, - "findingsPerKloc": 0.3238341968911917, - "findingsPerFunction": 0.011101622544833475 - } - } - }, - { - "id": "agent-ci", - "repo": "redwoodjs/agent-ci", - "cohort": "explicit-ai", - "ref": "4de00d69487e275d5bc37e30f7818a9303b22352", - "summary": { - "fileCount": 94, - "directoryCount": 24, - "findingCount": 3, - "repoScore": 10, - "physicalLineCount": 17222, - "logicalLineCount": 8474, - "functionCount": 220, - "normalized": { - "scorePerFile": 0.10638297872340426, - "scorePerKloc": 1.180080245456691, - "scorePerFunction": 0.045454545454545456, - "findingsPerFile": 0.031914893617021274, - "findingsPerKloc": 0.35402407363700733, - "findingsPerFunction": 0.013636363636363636 - } - } - }, - { - "id": "openclaw", - "repo": "openclaw/openclaw", - "cohort": "explicit-ai", - "ref": "44cf74717b4507245648a0263f3b0df85aa2a813", - "summary": { - "fileCount": 10465, - "directoryCount": 423, - "findingCount": 335, - "repoScore": 1140, - "physicalLineCount": 1939647, - "logicalLineCount": 1031409, - "functionCount": 40348, - "normalized": { - "scorePerFile": 0.10893454371715242, - "scorePerKloc": 1.1052841307376606, - "scorePerFunction": 0.02825418855953207, - "findingsPerFile": 0.03201146679407549, - "findingsPerKloc": 0.3247984068395757, - "findingsPerFunction": 0.008302765936353722 - } - } - }, - { - "id": "hunk", - "repo": "modem-dev/hunk", - "cohort": "explicit-ai", - "ref": "b37663f3c3055f860290587865199192e15d9f35", - "summary": { - "fileCount": 166, - "directoryCount": 36, - "findingCount": 3, - "repoScore": 10, - "physicalLineCount": 32498, - "logicalLineCount": 13564, - "functionCount": 752, - "normalized": { - "scorePerFile": 0.060240963855421686, - "scorePerKloc": 0.7372456502506635, - "scorePerFunction": 0.013297872340425532, - "findingsPerFile": 0.018072289156626505, - "findingsPerKloc": 0.22117369507519904, - "findingsPerFunction": 0.003989361702127659 - } - } - }, - { - "id": "openusage", - "repo": "robinebers/openusage", - "cohort": "explicit-ai", - "ref": "857f537a243483acf98ccd9ea32e20b380c63823", - "summary": { - "fileCount": 139, - "directoryCount": 29, - "findingCount": 1, - "repoScore": 4, - "physicalLineCount": 33794, - "logicalLineCount": 22270, - "functionCount": 491, - "normalized": { - "scorePerFile": 0.02877697841726619, - "scorePerKloc": 0.1796138302649304, - "scorePerFunction": 0.008146639511201629, - "findingsPerFile": 0.007194244604316547, - "findingsPerKloc": 0.0449034575662326, - "findingsPerFunction": 0.002036659877800407 - } - } - }, - { - "id": "fulling", - "repo": "FullAgent/fulling", - "cohort": "explicit-ai", - "ref": "d95060f8421b5888564fc8517c1fe99ded463985", - "summary": { - "fileCount": 219, - "directoryCount": 78, - "findingCount": 1, - "repoScore": 2, - "physicalLineCount": 26787, - "logicalLineCount": 12154, - "functionCount": 574, - "normalized": { - "scorePerFile": 0.0091324200913242, - "scorePerKloc": 0.1645548790521639, - "scorePerFunction": 0.003484320557491289, - "findingsPerFile": 0.0045662100456621, - "findingsPerKloc": 0.08227743952608195, - "findingsPerFunction": 0.0017421602787456446 - } - } - }, - { - "id": "astro", - "repo": "withastro/astro", - "cohort": "mature-oss", - "ref": "f7068995aa451dced13853789b0d51433c2373b5", - "summary": { - "fileCount": 1949, - "directoryCount": 852, - "findingCount": 9, - "repoScore": 20.5, - "physicalLineCount": 138854, - "logicalLineCount": 80948, - "functionCount": 3018, - "normalized": { - "scorePerFile": 0.01051821446895844, - "scorePerKloc": 0.2532489993576123, - "scorePerFunction": 0.006792577866136514, - "findingsPerFile": 0.004617752693689072, - "findingsPerKloc": 0.11118248752285419, - "findingsPerFunction": 0.002982107355864811 - } - } - }, - { - "id": "vite", - "repo": "vitejs/vite", - "cohort": "mature-oss", - "ref": "a4922537a8d705da7769d30626a0d846511fc124", - "summary": { - "fileCount": 1229, - "directoryCount": 525, - "findingCount": 3, - "repoScore": 6, - "physicalLineCount": 77629, - "logicalLineCount": 37251, - "functionCount": 1904, - "normalized": { - "scorePerFile": 0.004882017900732303, - "scorePerKloc": 0.1610695014898929, - "scorePerFunction": 0.0031512605042016808, - "findingsPerFile": 0.0024410089503661514, - "findingsPerKloc": 0.08053475074494645, - "findingsPerFunction": 0.0015756302521008404 - } - } - }, - { - "id": "execa", - "repo": "sindresorhus/execa", - "cohort": "mature-oss", - "ref": "99d1741d2525eca71b986282148bbf2983356428", - "summary": { - "fileCount": 580, - "directoryCount": 46, - "findingCount": 1, - "repoScore": 2, - "physicalLineCount": 35995, - "logicalLineCount": 20374, - "functionCount": 1007, - "normalized": { - "scorePerFile": 0.0034482758620689655, - "scorePerKloc": 0.09816432708353785, - "scorePerFunction": 0.0019860973187686196, - "findingsPerFile": 0.0017241379310344827, - "findingsPerKloc": 0.049082163541768926, - "findingsPerFunction": 0.0009930486593843098 - } - } - }, - { - "id": "payload", - "repo": "payloadcms/payload", - "cohort": "mature-oss", - "ref": "f3f36d801010f3c95ae74655ff22a09ea66ab1ac", - "summary": { - "fileCount": 4234, - "directoryCount": 1837, - "findingCount": 3, - "repoScore": 6, - "physicalLineCount": 384327, - "logicalLineCount": 251992, - "functionCount": 3544, - "normalized": { - "scorePerFile": 0.0014170996693434106, - "scorePerKloc": 0.023810279691418777, - "scorePerFunction": 0.001693002257336343, - "findingsPerFile": 0.0007085498346717053, - "findingsPerKloc": 0.011905139845709388, - "findingsPerFunction": 0.0008465011286681715 - } - } - }, - { - "id": "tsup", - "repo": "egoist/tsup", - "cohort": "mature-oss", - "ref": "cd03e1e00ec2bd6676ae1837cbc7e618ab6a2362", - "summary": { - "fileCount": 46, - "directoryCount": 8, - "findingCount": 0, - "repoScore": 0, - "physicalLineCount": 6456, - "logicalLineCount": 2668, - "functionCount": 140, - "normalized": { - "scorePerFile": 0, - "scorePerKloc": 0, - "scorePerFunction": 0, - "findingsPerFile": 0, - "findingsPerKloc": 0, - "findingsPerFunction": 0 - } - } - }, - { - "id": "node-notifier", - "repo": "mikaelbr/node-notifier", - "cohort": "mature-oss", - "ref": "b36c237f0d913f9df3a2bd45adc08b33ff717f6a", - "summary": { - "fileCount": 24, - "directoryCount": 5, - "findingCount": 0, - "repoScore": 0, - "physicalLineCount": 2716, - "logicalLineCount": 2114, - "functionCount": 42, - "normalized": { - "scorePerFile": 0, - "scorePerKloc": 0, - "scorePerFunction": 0, - "findingsPerFile": 0, - "findingsPerKloc": 0, - "findingsPerFunction": 0 - } - } - }, - { - "id": "zustand", - "repo": "pmndrs/zustand", - "cohort": "mature-oss", - "ref": "2e6d8813095c6a79ca208bae4c2cf5edc12049a1", - "summary": { - "fileCount": 48, - "directoryCount": 15, - "findingCount": 0, - "repoScore": 0, - "physicalLineCount": 8814, - "logicalLineCount": 7096, - "functionCount": 161, - "normalized": { - "scorePerFile": 0, - "scorePerKloc": 0, - "scorePerFunction": 0, - "findingsPerFile": 0, - "findingsPerKloc": 0, - "findingsPerFunction": 0 - } - } - }, - { - "id": "umami", - "repo": "umami-software/umami", - "cohort": "mature-oss", - "ref": "227b2554b4a373e63ceb7f48decdc60c8d3e6eaf", - "summary": { - "fileCount": 512, - "directoryCount": 87, - "findingCount": 0, - "repoScore": 0, - "physicalLineCount": 29677, - "logicalLineCount": 20508, - "functionCount": 911, - "normalized": { - "scorePerFile": 0, - "scorePerKloc": 0, - "scorePerFunction": 0, - "findingsPerFile": 0, - "findingsPerKloc": 0, - "findingsPerFunction": 0 - } - } - }, - { - "id": "hyper", - "repo": "vercel/hyper", - "cohort": "mature-oss", - "ref": "2a7bb18259d975f27b30b502af1be7576f6f5656", - "summary": { - "fileCount": 113, - "directoryCount": 21, - "findingCount": 0, - "repoScore": 0, - "physicalLineCount": 162735, - "logicalLineCount": 65075, - "functionCount": 5354, - "normalized": { - "scorePerFile": 0, - "scorePerKloc": 0, - "scorePerFunction": 0, - "findingsPerFile": 0, - "findingsPerKloc": 0, - "findingsPerFunction": 0 - } - } - } - ], - "rank": 1 - } - ] -} diff --git a/reports/autoresearch-candidate-rule.md b/reports/autoresearch-candidate-rule.md deleted file mode 100644 index 8351ce1..0000000 --- a/reports/autoresearch-candidate-rule.md +++ /dev/null @@ -1,66 +0,0 @@ -# Per-rule signal benchmark: Known AI repos vs older solid OSS repos - -Date: 2026-04-19 -Analyzer version: 0.3.0 -Manifest: `benchmarks/sets/known-ai-vs-solid-oss.json` -Summary: `benchmarks/results/autoresearch-candidate-rule.json` -Report: `reports/autoresearch-candidate-rule.md` - -## Goal - -Compare a cohort of known AI-generated JavaScript/TypeScript repos against well-regarded OSS repos, with the mature-OSS cohort pinned to the latest default-branch commit on or before 2025-01-01, using exact commit SHAs and normalized analyzer metrics. - -Signal score = average AUROC across the six normalized metrics when each rule runs in isolation against this pinned mini cohort. 1.00 means perfect AI-over-OSS separation, while 0.50 means no better than random ordering. - -## Leaderboard - -| Rank | Rule | Signal score | AI hit rate | OSS hit rate | Best metric | Best AUROC | -|---:|---|---:|---:|---:|---|---:| -| 1 | `defensive.promise-default-fallbacks` | **0.98** | 9/9 (100%) | 4/9 (44%) | findings / file | 0.99 | - - -## defensive.promise-default-fallbacks - -- Rank: **#1** of 1 -- Signal score: **0.98 / 1.00** -- Family / severity / scope: `defensive` / `strong` / `file` -- Best metric: findings / file (0.99) - -### Cohort summary - -| Cohort | Hit rate | Median findings | Median repo score | Median score / file | Median score / KLOC | Median findings / KLOC | -|---|---:|---:|---:|---:|---:|---:| -| explicit-ai | 9/9 (100%) | 3.00 | 10.00 | 0.11 | 1.18 | 0.32 | -| mature-oss | 4/9 (44%) | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | - -### AUROC by normalized metric - -- score / file: 0.99 -- score / KLOC: 0.98 -- score / function: 0.99 -- findings / file: 0.99 -- findings / KLOC: 0.95 -- findings / function: 0.98 - -### Repo results - -| Repo | Cohort | Ref | Findings | Repo score | Score / file | Score / KLOC | Findings / KLOC | -|---|---|---|---:|---:|---:|---:|---:| -| [jiayun/DevWorkbench](https://github.com/jiayun/DevWorkbench) | explicit-ai | `ea50862` | 1 | 8.00 | 0.25 | 2.68 | 0.33 | -| [garrytan/gstack](https://github.com/garrytan/gstack) | explicit-ai | `6cc094c` | 7 | 40.00 | 0.23 | 2.11 | 0.37 | -| [cloudflare/vinext](https://github.com/cloudflare/vinext) | explicit-ai | `28980b0` | 30 | 87.00 | 0.08 | 1.46 | 0.50 | -| [emdash-cms/emdash](https://github.com/emdash-cms/emdash) | explicit-ai | `dbaf8c6` | 39 | 144.00 | 0.13 | 1.20 | 0.32 | -| [redwoodjs/agent-ci](https://github.com/redwoodjs/agent-ci) | explicit-ai | `4de00d6` | 3 | 10.00 | 0.11 | 1.18 | 0.35 | -| [openclaw/openclaw](https://github.com/openclaw/openclaw) | explicit-ai | `44cf747` | 335 | 1140.00 | 0.11 | 1.11 | 0.32 | -| [modem-dev/hunk](https://github.com/modem-dev/hunk) | explicit-ai | `b37663f` | 3 | 10.00 | 0.06 | 0.74 | 0.22 | -| [robinebers/openusage](https://github.com/robinebers/openusage) | explicit-ai | `857f537` | 1 | 4.00 | 0.03 | 0.18 | 0.04 | -| [FullAgent/fulling](https://github.com/FullAgent/fulling) | explicit-ai | `d95060f` | 1 | 2.00 | 0.01 | 0.16 | 0.08 | -| [withastro/astro](https://github.com/withastro/astro) | mature-oss | `f706899` | 9 | 20.50 | 0.01 | 0.25 | 0.11 | -| [vitejs/vite](https://github.com/vitejs/vite) | mature-oss | `a492253` | 3 | 6.00 | 0.00 | 0.16 | 0.08 | -| [sindresorhus/execa](https://github.com/sindresorhus/execa) | mature-oss | `99d1741` | 1 | 2.00 | 0.00 | 0.10 | 0.05 | -| [payloadcms/payload](https://github.com/payloadcms/payload) | mature-oss | `f3f36d8` | 3 | 6.00 | 0.00 | 0.02 | 0.01 | -| [egoist/tsup](https://github.com/egoist/tsup) | mature-oss | `cd03e1e` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | -| [mikaelbr/node-notifier](https://github.com/mikaelbr/node-notifier) | mature-oss | `b36c237` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | -| [pmndrs/zustand](https://github.com/pmndrs/zustand) | mature-oss | `2e6d881` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | -| [umami-software/umami](https://github.com/umami-software/umami) | mature-oss | `227b255` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | -| [vercel/hyper](https://github.com/vercel/hyper) | mature-oss | `2a7bb18` | 0 | 0.00 | 0.00 | 0.00 | 0.00 | diff --git a/scripts/benchmark-experimental-rule.ts b/scripts/benchmark-experimental-rule.ts deleted file mode 100644 index 7be4374..0000000 --- a/scripts/benchmark-experimental-rule.ts +++ /dev/null @@ -1,198 +0,0 @@ -/** - * Benchmarks the current experimental candidate rule against the full pinned - * AI-vs-OSS cohort and emits structured `METRIC` lines for autoresearch. - */ -import { mkdir, writeFile } from "node:fs/promises"; -import path from "node:path"; -import packageJson from "../package.json"; -import { getOption } from "./lib/get-option"; -import { ensurePinnedCheckouts, readHeadRef } from "../src/benchmarks/checkouts"; -import { - createRuleSignalBenchmarkSummary, - type RuleSignalBenchmarkRun, -} from "../src/benchmarks/rule-signal"; -import { renderRuleSignalBenchmarkReport } from "../src/benchmarks/rule-signal-report"; -import { loadBenchmarkSet, resolveProjectPath } from "../src/benchmarks/manifest"; -import type { BenchmarkSet } from "../src/benchmarks/types"; -import { DEFAULT_CONFIG } from "../src/config"; -import { analyzeRepository } from "../src/core/engine"; -import { Registry } from "../src/core/registry"; -import type { AnalysisResult, RulePlugin } from "../src/core/types"; -import type { FunctionSummary } from "../src/facts/types"; -import { createDefaultRegistry } from "../src/default-registry"; -import { promiseDefaultFallbacksRule } from "../src/rules/promise-default-fallbacks"; - -const DEFAULT_MANIFEST_PATH = path.resolve( - process.cwd(), - "benchmarks/sets/known-ai-vs-solid-oss.json", -); -const DEFAULT_SUMMARY_PATH = "benchmarks/results/autoresearch-candidate-rule.json"; -const DEFAULT_REPORT_PATH = "reports/autoresearch-candidate-rule.md"; -const BENCHMARK_FUNCTION_COUNT_FACT = "file.functionSummaries"; -const BASE_FACT_IDS = new Set([ - "file.record", - "file.text", - "file.lineCount", - "file.logicalLineCount", - "directory.record", - "repo.files", - "repo.directories", -]); - -function createStandaloneRuleBenchmarkRegistry(baseRegistry: Registry, rule: RulePlugin): Registry { - const providerByFact = new Map[number]>(); - for (const provider of baseRegistry.getFactProviders()) { - for (const factId of provider.provides) { - if (!providerByFact.has(factId)) { - providerByFact.set(factId, provider); - } - } - } - - const requiredProviderIds = new Set(); - const visitedFacts = new Set(); - - const requireFact = (factId: string): void => { - if (visitedFacts.has(factId) || BASE_FACT_IDS.has(factId)) { - return; - } - - visitedFacts.add(factId); - const provider = providerByFact.get(factId); - if (!provider) { - throw new Error(`No fact provider produces required fact ${factId} for rule ${rule.id}`); - } - - requiredProviderIds.add(provider.id); - for (const dependency of provider.requires) { - requireFact(dependency); - } - }; - - for (const factId of [...rule.requires, BENCHMARK_FUNCTION_COUNT_FACT]) { - requireFact(factId); - } - - const registry = new Registry(); - for (const language of baseRegistry.getLanguages()) { - registry.registerLanguage(language); - } - - for (const provider of baseRegistry.getFactProviders()) { - if (requiredProviderIds.has(provider.id)) { - registry.registerFactProvider(provider); - } - } - - registry.registerRule(rule); - return registry; -} - -function divideOrNull(numerator: number, denominator: number): number | null { - return denominator > 0 ? numerator / denominator : null; -} - -async function analyzeRepositoryWithFunctionCount( - rootDir: string, - registry: Registry, -): Promise { - let functionCount = 0; - - const result = await analyzeRepository(rootDir, DEFAULT_CONFIG, registry, { - hooks: { - onFileAnalyzed(file, store) { - functionCount += - store.getFileFact(file.path, BENCHMARK_FUNCTION_COUNT_FACT)?.length ?? - 0; - }, - }, - }); - - return { - ...result, - summary: { - ...result.summary, - functionCount, - normalized: { - ...result.summary.normalized, - scorePerFunction: divideOrNull(result.summary.repoScore, functionCount), - findingsPerFunction: divideOrNull(result.summary.findingCount, functionCount), - }, - }, - }; -} - -async function analyzeRuleAcrossSet( - rule: RulePlugin, - checkoutsDir: string, - benchmarkSet: BenchmarkSet, -): Promise { - const baseRegistry = createDefaultRegistry(); - const registry = createStandaloneRuleBenchmarkRegistry(baseRegistry, rule); - const analyses = []; - - for (const repo of benchmarkSet.repos) { - const checkoutPath = path.join(checkoutsDir, repo.id); - const actualRef = readHeadRef(checkoutPath); - if (actualRef !== repo.ref) { - throw new Error(`Pinned ref mismatch for ${repo.id}: expected ${repo.ref}, got ${actualRef}`); - } - - console.log(`scanning ${repo.id} @ ${actualRef.slice(0, 7)}`); - const result = await analyzeRepositoryWithFunctionCount(checkoutPath, registry); - analyses.push({ spec: repo, result }); - } - - return { - rule: { - id: rule.id, - family: rule.family, - severity: rule.severity, - scope: rule.scope, - requires: [...rule.requires], - }, - analyses, - }; -} - -function printMetric(name: string, value: number | null): void { - console.log(`METRIC ${name}=${value ?? 0}`); -} - -async function main(): Promise { - const manifestPath = getOption(process.argv.slice(2), "--manifest", DEFAULT_MANIFEST_PATH); - const summaryPathRelative = getOption(process.argv.slice(2), "--summary", DEFAULT_SUMMARY_PATH); - const reportPathRelative = getOption(process.argv.slice(2), "--report", DEFAULT_REPORT_PATH); - const benchmarkSet = await loadBenchmarkSet(manifestPath); - const checkoutsDir = resolveProjectPath(benchmarkSet.artifacts.checkoutsDir); - const summaryPath = resolveProjectPath(summaryPathRelative); - const reportPath = resolveProjectPath(reportPathRelative); - - await ensurePinnedCheckouts(checkoutsDir, benchmarkSet.repos); - - const run = await analyzeRuleAcrossSet(promiseDefaultFallbacksRule, checkoutsDir, benchmarkSet); - const summary = createRuleSignalBenchmarkSummary(benchmarkSet, [run], packageJson.version, { - manifestPath: path.relative(process.cwd(), manifestPath), - summaryPath: summaryPathRelative, - reportPath: reportPathRelative, - }); - const report = renderRuleSignalBenchmarkReport(benchmarkSet, summary); - const rule = summary.rules[0]; - if (!rule) { - throw new Error("Expected experimental benchmark summary to contain one rule."); - } - - await mkdir(path.dirname(summaryPath), { recursive: true }); - await writeFile(summaryPath, `${JSON.stringify(summary, null, 2)}\n`); - await mkdir(path.dirname(reportPath), { recursive: true }); - await writeFile(reportPath, `${report}\n`); - - console.log(`Wrote experimental summary to ${summaryPath}`); - console.log(`Wrote experimental report to ${reportPath}`); - printMetric("signal_score", rule.signalScore); - printMetric("best_metric_auc", rule.bestMetricAuc); - printMetric("ai_hit_rate", rule.cohorts["explicit-ai"].hitRate); - printMetric("oss_hit_rate", rule.cohorts["mature-oss"].hitRate); -} - -await main(); diff --git a/src/default-registry.ts b/src/default-registry.ts index 8a942ee..c740e45 100644 --- a/src/default-registry.ts +++ b/src/default-registry.ts @@ -17,6 +17,7 @@ import { asyncNoiseRule } from "./rules/async-noise"; import { emptyCatchRule } from "./rules/empty-catch"; import { errorObscuringRule } from "./rules/error-obscuring"; import { errorSwallowingRule } from "./rules/error-swallowing"; +import { promiseDefaultFallbacksRule } from "./rules/promise-default-fallbacks"; import { barrelDensityRule } from "./rules/barrel-density"; import { directoryFanoutHotspotRule } from "./rules/directory-fanout-hotspot"; import { duplicateFunctionSignaturesRule } from "./rules/duplicate-function-signatures"; @@ -43,6 +44,7 @@ export function createDefaultRegistry(): Registry { registry.registerRule(errorSwallowingRule); registry.registerRule(errorObscuringRule); registry.registerRule(emptyCatchRule); + registry.registerRule(promiseDefaultFallbacksRule); registry.registerRule(barrelDensityRule); registry.registerRule(passThroughWrappersRule); registry.registerRule(duplicateFunctionSignaturesRule); diff --git a/src/rules/promise-default-fallbacks/README.md b/src/rules/promise-default-fallbacks/README.md new file mode 100644 index 0000000..bd12e05 --- /dev/null +++ b/src/rules/promise-default-fallbacks/README.md @@ -0,0 +1,68 @@ +# defensive.promise-default-fallbacks + +Flags promise `.catch()` handlers that suppress rejected async work with a cheap fallback. + +- **Family:** `defensive` +- **Severity:** `strong` +- **Scope:** `file` +- **Requires:** `file.ast` + +## How it works + +The rule looks for promise-chain catch handlers that turn rejection into: + +- an empty handler body like `.catch(() => {})` +- a direct sentinel default like `null`, `undefined`, `false`, `0`, `""`, `[]`, or `{}` +- a log-and-default block like `console.error(error); return false` + +This is intentionally distinct from the existing `try/catch` defensive rules. It targets the promise-chain version of the same failure-suppression habit, which shows up frequently in generated async glue code. + +To avoid obvious noise, the rule skips very large bundled/generated files over `5000` logical lines. + +## Flagged examples + +```ts +export async function loadConfig() { + return fetchConfig().catch(() => null); +} + +export async function readClipboard() { + return navigator.clipboard.readText().catch(() => {}); +} + +export async function loadFlag() { + return fetchFlag().catch((error) => { + console.error("flag load failed", error); + return false; + }); +} +``` + +## Usually ignored + +```ts +export async function loadConfig() { + return fetchConfig().catch((error) => { + throw error; + }); +} + +export async function loadConfigResult() { + return fetchConfig().catch(() => ({ ok: false, reason: "missing" })); +} +``` + +## Scoring + +Each flagged promise catch adds `2` points. +Log-and-default handlers add `2.5` points. +The file total is capped at `8`. + +## Benchmark signal + +Full pinned benchmark against the exact `known-ai-vs-solid-oss` cohort: + +- Signal score: **0.98 / 1.00** +- Best separating metric: **findings / file (0.99)** +- Hit rate: **9/9 AI repos** vs **4/9 mature OSS repos** +- Full results: [experimental rule report](../../../reports/autoresearch-candidate-rule.md#defensivepromise-default-fallbacks) diff --git a/tests/heuristics.test.ts b/tests/heuristics.test.ts index fc6f7e6..7bf4e0c 100644 --- a/tests/heuristics.test.ts +++ b/tests/heuristics.test.ts @@ -54,6 +54,10 @@ describe("heuristic rule pack", () => { " return await getData(id);", "}", "", + "export async function fetchDataSafely(id: string) {", + " return getData(id).catch(() => null);", + "}", + "", "export function wrap(id: string) {", " return getData(id);", "}", @@ -75,6 +79,7 @@ describe("heuristic rule pack", () => { expect(ruleIds.has("comments.placeholder-comments")).toBe(true); expect(ruleIds.has("defensive.error-obscuring")).toBe(true); + expect(ruleIds.has("defensive.promise-default-fallbacks")).toBe(true); expect(ruleIds.has("defensive.async-noise")).toBe(true); expect(ruleIds.has("structure.pass-through-wrappers")).toBe(true); expect(ruleIds.has("structure.barrel-density")).toBe(true);