diff --git a/README.md b/README.md index d338cf6..bc8be7d 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,4 @@ # deepevents.ai deepevents.ai main codebase + +- `statistical-consistency-checker/` adds statistical methods, unit, artifact, and review readiness checks for research packets. diff --git a/statistical-consistency-checker/README.md b/statistical-consistency-checker/README.md new file mode 100644 index 0000000..44fa4d6 --- /dev/null +++ b/statistical-consistency-checker/README.md @@ -0,0 +1,40 @@ +# Statistical Consistency Checker + +This module adds a focused research review slice for statistical method consistency. + +It covers: + +- p-value, alpha, and reported-significance alignment +- effect size and confidence interval consistency +- null-value checks for significant and non-significant findings +- multiple-comparison correction review +- sample-size and small-group warnings +- unit mismatches and linked data/code artifact availability +- reviewer actions, audit events, and deterministic digests + +The implementation is dependency-free and uses synthetic sample data only. + +## Run + +```bash +npm run check +npm test +npm run demo +``` + +## Demo Assets + +- short demo video: `docs/demo.webm` +- `docs/demo.svg` + +## API + +```js +import { + evaluateStatisticalConsistency, + renderStatisticalConsistencyReport +} from "./src/statistical-consistency-checker.js"; + +const result = evaluateStatisticalConsistency(input); +console.log(renderStatisticalConsistencyReport(result)); +``` diff --git a/statistical-consistency-checker/data/sample-statistics-input.json b/statistical-consistency-checker/data/sample-statistics-input.json new file mode 100644 index 0000000..987ea66 --- /dev/null +++ b/statistical-consistency-checker/data/sample-statistics-input.json @@ -0,0 +1,92 @@ +{ + "generatedAt": "2026-05-16T16:35:00Z", + "manuscript": { + "id": "ms-neuroimmune-042", + "title": "Neuroimmune Marker Study", + "domain": "neuroscience" + }, + "artifacts": [ + { + "id": "data-raw-csf", + "type": "dataset", + "available": true, + "hash": "sha256:5c1c8b" + }, + { + "id": "code-model-r", + "type": "analysis-script", + "available": false, + "hash": "sha256:missing" + }, + { + "id": "code-summary-r", + "type": "analysis-script", + "available": true, + "hash": "sha256:af4d22" + } + ], + "analyses": [ + { + "id": "primary-csf-marker", + "claim": "Marker levels were higher in responders.", + "test": "welch-t-test", + "pValue": 0.031, + "alpha": 0.05, + "reportedSignificant": true, + "effectSize": 0.82, + "confidenceInterval": [0.12, 0.65], + "direction": "positive", + "sampleSize": 28, + "groups": [ + { "name": "responders", "n": 8 }, + { "name": "non-responders", "n": 20 } + ], + "multipleComparisons": { + "tests": 6, + "correction": "none" + }, + "units": [ + { + "variable": "CSF marker", + "expected": "ng/mL", + "observed": "pg/mL" + } + ], + "primaryOutcome": true, + "preregistered": false, + "dataArtifactId": "data-raw-csf", + "codeArtifactId": "code-model-r" + }, + { + "id": "secondary-cognition-score", + "claim": "Cognitive score change was not statistically significant.", + "test": "linear-model", + "pValue": 0.18, + "alpha": 0.05, + "reportedSignificant": false, + "effectSize": 0.09, + "confidenceInterval": [-0.11, 0.32], + "direction": "positive", + "sampleSize": 64, + "groups": [ + { "name": "treatment", "n": 32 }, + { "name": "control", "n": 32 } + ], + "multipleComparisons": { + "tests": 1, + "correction": "not reported" + }, + "units": [ + { + "variable": "score delta", + "expected": "points", + "observed": "points" + } + ], + "primaryOutcome": false, + "preregistered": true, + "dataArtifactId": "data-raw-csf", + "codeArtifactId": "code-summary-r" + } + ] +} diff --git a/statistical-consistency-checker/docs/demo.svg b/statistical-consistency-checker/docs/demo.svg new file mode 100644 index 0000000..a47b12b --- /dev/null +++ b/statistical-consistency-checker/docs/demo.svg @@ -0,0 +1,17 @@ + + Statistical consistency checker demo + Terminal-style demo output for the statistical consistency checker. + + + + + + + Statistical Consistency Check + Neuroimmune Marker Study: blocked (24/100) + Findings high/medium/low: 2/4/1 + Manifest: 20b466c71e117884 + - high code_artifact_not_found + - high effect_outside_ci + - medium multiple_comparisons_uncorrected + diff --git a/statistical-consistency-checker/docs/demo.webm b/statistical-consistency-checker/docs/demo.webm new file mode 100644 index 0000000..c3d7e72 Binary files /dev/null and b/statistical-consistency-checker/docs/demo.webm differ diff --git a/statistical-consistency-checker/docs/requirement-map.md b/statistical-consistency-checker/docs/requirement-map.md new file mode 100644 index 0000000..6505cb4 --- /dev/null +++ b/statistical-consistency-checker/docs/requirement-map.md @@ -0,0 +1,11 @@ +# Requirement Map + +This slice targets the peer-review diagnostics part of issue #13. + +| Requirement | Coverage | +| --- | --- | +| Statistical error detection | Checks p-values, confidence intervals, effect sizes, significance labels, and null-value consistency. | +| Compliance-style review packet | Emits reviewer actions, audit events, deterministic manifest digests, and finding digests. | +| Citation/review workflow fit | Links each finding to an analysis and includes artifact ids where artifact checks apply. | +| Batch-ready local workflow | `npm run demo` evaluates a synthetic review packet with no external services. | +| Tests | `npm test` covers clean packets, invalid p-values, interval mismatches, unit mismatches, artifact gaps, and deterministic output. | diff --git a/statistical-consistency-checker/package.json b/statistical-consistency-checker/package.json new file mode 100644 index 0000000..750e9b1 --- /dev/null +++ b/statistical-consistency-checker/package.json @@ -0,0 +1,15 @@ +{ + "name": "statistical-consistency-checker", + "version": "1.0.0", + "description": "Deterministic statistical consistency checks for research review packets.", + "type": "module", + "scripts": { + "check": "node --check src/statistical-consistency-checker.js && node --check scripts/demo.js && node --check test/statistical-consistency-checker.test.js", + "demo": "node scripts/demo.js", + "test": "node --test test/*.test.js" + }, + "engines": { + "node": ">=18" + }, + "license": "MIT" +} diff --git a/statistical-consistency-checker/scripts/demo.js b/statistical-consistency-checker/scripts/demo.js new file mode 100644 index 0000000..ac984b3 --- /dev/null +++ b/statistical-consistency-checker/scripts/demo.js @@ -0,0 +1,14 @@ +import fs from "node:fs"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; +import { + evaluateStatisticalConsistency, + renderStatisticalConsistencyReport +} from "../src/statistical-consistency-checker.js"; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const inputPath = path.join(__dirname, "..", "data", "sample-statistics-input.json"); +const input = JSON.parse(fs.readFileSync(inputPath, "utf8")); + +const result = evaluateStatisticalConsistency(input); +console.log(renderStatisticalConsistencyReport(result)); diff --git a/statistical-consistency-checker/src/statistical-consistency-checker.js b/statistical-consistency-checker/src/statistical-consistency-checker.js new file mode 100644 index 0000000..99e0f0c --- /dev/null +++ b/statistical-consistency-checker/src/statistical-consistency-checker.js @@ -0,0 +1,505 @@ +import crypto from "node:crypto"; + +const SEVERITY_WEIGHT = { + high: 18, + medium: 9, + low: 4 +}; + +const SEVERITY_RANK = { + high: 0, + medium: 1, + low: 2 +}; + +const DEFAULT_ALPHA = 0.05; +const MIN_TOTAL_SAMPLE_SIZE = 30; +const MIN_GROUP_SAMPLE_SIZE = 10; + +function asArray(value) { + return Array.isArray(value) ? value : []; +} + +function asObject(value) { + return value && typeof value === "object" && !Array.isArray(value) ? value : {}; +} + +function parseNumber(value) { + if (typeof value === "number" && Number.isFinite(value)) { + return value; + } + + if (typeof value === "string" && value.trim() !== "") { + const parsed = Number(value); + return Number.isFinite(parsed) ? parsed : null; + } + + return null; +} + +function normalizeText(value) { + return String(value ?? "") + .trim() + .toLowerCase() + .replace(/\s+/g, " "); +} + +function normalizeUnit(value) { + return normalizeText(value).replace(/[._-]+/g, " "); +} + +function stableStringify(value) { + if (Array.isArray(value)) { + return `[${value.map(stableStringify).join(",")}]`; + } + + if (value && typeof value === "object") { + return `{${Object.keys(value) + .sort() + .map((key) => `${JSON.stringify(key)}:${stableStringify(value[key])}`) + .join(",")}}`; + } + + return JSON.stringify(value); +} + +function shortDigest(value, length = 16) { + return crypto.createHash("sha256").update(stableStringify(value)).digest("hex").slice(0, length); +} + +function normalizeSeverity(severity) { + return severity === "high" || severity === "medium" || severity === "low" ? severity : "low"; +} + +function availableArtifactIds(artifacts) { + return new Set( + asArray(artifacts) + .filter((artifact) => artifact && artifact.available !== false) + .map((artifact) => artifact.id) + .filter(Boolean) + ); +} + +function addFinding(findings, analysis, severity, code, message, action, metadata = {}) { + findings.push({ + analysisId: analysis.id ?? "unlabeled-analysis", + severity: normalizeSeverity(severity), + code, + message, + action, + ...metadata + }); +} + +function alphaForAnalysis(analysis) { + const rawAlpha = analysis.alpha; + if (rawAlpha === undefined || rawAlpha === null || rawAlpha === "") { + return DEFAULT_ALPHA; + } + + const alpha = parseNumber(rawAlpha); + return alpha !== null && alpha > 0 && alpha < 1 ? alpha : DEFAULT_ALPHA; +} + +function confidenceInterval(analysis) { + const values = asArray(analysis.confidenceInterval); + if (values.length !== 2) { + return null; + } + + const lower = parseNumber(values[0]); + const upper = parseNumber(values[1]); + if (lower === null || upper === null || lower > upper) { + return null; + } + + return { lower, upper }; +} + +function comparisonCount(analysis) { + const multipleComparisons = asObject(analysis.multipleComparisons); + const explicitTests = parseNumber(multipleComparisons.tests ?? analysis.comparisonCount); + const families = asArray(multipleComparisons.families); + + if (explicitTests !== null && explicitTests > 0) { + return Math.floor(explicitTests); + } + + if (families.length > 0) { + return families.length; + } + + return 1; +} + +function hasCorrection(analysis) { + const correction = normalizeText(asObject(analysis.multipleComparisons).correction); + return correction !== "" && correction !== "none" && correction !== "not reported"; +} + +function checkAlpha(analysis, findings) { + const rawAlpha = analysis.alpha; + if (rawAlpha === undefined || rawAlpha === null || rawAlpha === "") { + return; + } + + const alpha = parseNumber(rawAlpha); + if (alpha === null || alpha <= 0 || alpha >= 1) { + addFinding( + findings, + analysis, + "high", + "invalid_alpha", + "The alpha threshold is missing or outside the 0..1 range.", + "Correct the alpha threshold before interpreting significance." + ); + } +} + +function checkPValueConsistency(analysis, findings) { + const alpha = alphaForAnalysis(analysis); + const pValue = parseNumber(analysis.pValue); + const reportedSignificant = analysis.reportedSignificant; + + if (pValue === null || pValue < 0 || pValue > 1) { + addFinding( + findings, + analysis, + "high", + "invalid_p_value", + "The reported p-value is missing or outside the 0..1 range.", + "Correct the p-value or mark the analysis as pending." + ); + return; + } + + if (typeof reportedSignificant === "boolean") { + const expectedSignificant = pValue <= alpha; + if (reportedSignificant !== expectedSignificant) { + addFinding( + findings, + analysis, + "medium", + "significance_label_mismatch", + `The significance label does not match p=${pValue} at alpha=${alpha}.`, + "Update the significance label or explain the threshold used." + ); + } + } +} + +function checkConfidenceInterval(analysis, findings) { + const interval = confidenceInterval(analysis); + const pValue = parseNumber(analysis.pValue); + const effectSize = parseNumber(analysis.effectSize); + const alpha = alphaForAnalysis(analysis); + const nullValue = parseNumber(analysis.nullValue) ?? 0; + + if (!interval) { + addFinding( + findings, + analysis, + "medium", + "confidence_interval_missing", + "The confidence interval is missing or invalid.", + "Add a two-sided confidence interval for the reported effect." + ); + return; + } + + if (effectSize !== null && (effectSize < interval.lower || effectSize > interval.upper)) { + addFinding( + findings, + analysis, + "high", + "effect_outside_ci", + "The reported effect size is outside its confidence interval.", + "Recalculate the effect size or the interval before submission." + ); + } + + if (pValue !== null && pValue >= 0 && pValue <= 1) { + const crossesNull = interval.lower <= nullValue && interval.upper >= nullValue; + if (pValue <= alpha && crossesNull) { + addFinding( + findings, + analysis, + "high", + "significant_p_value_ci_crosses_null", + "The p-value is significant but the interval still crosses the null value.", + "Recheck the model, confidence level, and reported interval." + ); + } + + if (pValue > alpha && !crossesNull) { + addFinding( + findings, + analysis, + "medium", + "nonsignificant_p_value_ci_excludes_null", + "The p-value is not significant but the interval excludes the null value.", + "Align the p-value, interval, and hypothesis test." + ); + } + } + + const direction = normalizeText(analysis.direction); + if (direction === "positive" && interval.upper < 0) { + addFinding( + findings, + analysis, + "medium", + "direction_interval_mismatch", + "The direction is labeled positive but the interval is negative.", + "Fix the direction label or review the sign convention." + ); + } + + if (direction === "negative" && interval.lower > 0) { + addFinding( + findings, + analysis, + "medium", + "direction_interval_mismatch", + "The direction is labeled negative but the interval is positive.", + "Fix the direction label or review the sign convention." + ); + } +} + +function checkSamples(analysis, findings) { + const sampleSize = parseNumber(analysis.sampleSize); + const groups = asArray(analysis.groups); + + if (sampleSize === null || sampleSize < MIN_TOTAL_SAMPLE_SIZE) { + addFinding( + findings, + analysis, + "medium", + "sample_size_needs_review", + "The analysis has no sample size or a small total sample size.", + "Add power evidence or mark this as exploratory." + ); + } + + for (const group of groups) { + const groupN = parseNumber(group?.n); + if (groupN !== null && groupN < MIN_GROUP_SAMPLE_SIZE) { + addFinding( + findings, + analysis, + "low", + "small_group_size", + `Group ${group.name ?? "unnamed"} has fewer than ${MIN_GROUP_SAMPLE_SIZE} observations.`, + "Confirm the group is not over-interpreted." + ); + } + } +} + +function checkMultipleComparisons(analysis, findings) { + const tests = comparisonCount(analysis); + if (tests > 1 && !hasCorrection(analysis)) { + addFinding( + findings, + analysis, + "medium", + "multiple_comparisons_uncorrected", + `${tests} comparisons are reported without a correction method.`, + "Add a correction method or justify the family-wise testing plan." + ); + } +} + +function checkUnits(analysis, findings) { + for (const unit of asArray(analysis.units)) { + const expected = normalizeUnit(unit?.expected); + const observed = normalizeUnit(unit?.observed); + if (expected && observed && expected !== observed) { + addFinding( + findings, + analysis, + "medium", + "unit_mismatch", + `${unit.variable ?? "A variable"} is reported as ${unit.observed}, expected ${unit.expected}.`, + "Fix the unit label or add a conversion note." + ); + } + } +} + +function checkArtifacts(analysis, artifacts, findings) { + const ids = availableArtifactIds(artifacts); + const requiredIds = [ + ["data", analysis.dataArtifactId], + ["code", analysis.codeArtifactId] + ]; + + for (const [kind, id] of requiredIds) { + if (!id) { + addFinding( + findings, + analysis, + "low", + `${kind}_artifact_not_linked`, + `The ${kind} artifact is not linked to this analysis.`, + `Attach the ${kind} artifact before final review.` + ); + continue; + } + + if (!ids.has(id)) { + addFinding( + findings, + analysis, + "high", + `${kind}_artifact_not_found`, + `The linked ${kind} artifact ${id} is unavailable.`, + `Attach ${id} or update the analysis artifact link.`, + { artifactId: id, artifactKind: kind } + ); + } + } +} + +function checkPreregistration(analysis, findings) { + if (analysis.primaryOutcome === true && analysis.preregistered !== true) { + addFinding( + findings, + analysis, + "medium", + "primary_outcome_not_preregistered", + "A primary outcome is not marked as preregistered.", + "Add registration evidence or mark this as post hoc." + ); + } +} + +function statusFromFindings(score, findings) { + if (findings.some((finding) => finding.severity === "high")) { + return "blocked"; + } + + if (score >= 80) { + return "ready"; + } + + if (score >= 55) { + return "review"; + } + + return "blocked"; +} + +function severityCounts(findings) { + return findings.reduce( + (counts, finding) => { + counts[finding.severity] += 1; + return counts; + }, + { high: 0, medium: 0, low: 0 } + ); +} + +function sortedReviewerActions(findings) { + return [...findings] + .filter((finding) => finding.severity !== "low") + .sort( + (left, right) => + SEVERITY_RANK[left.severity] - SEVERITY_RANK[right.severity] || left.code.localeCompare(right.code) + ) + .map((finding) => ({ + severity: finding.severity, + code: finding.code, + analysisId: finding.analysisId, + artifactId: finding.artifactId, + action: finding.action + })); +} + +export function evaluateStatisticalConsistency(input) { + const packet = asObject(input); + const manuscript = asObject(packet.manuscript); + const analyses = asArray(packet.analyses); + const artifacts = asArray(packet.artifacts); + const findings = []; + + if (!packet.generatedAt) { + throw new Error("generatedAt is required"); + } + + if (analyses.length === 0) { + addFinding( + findings, + { id: "packet" }, + "high", + "analysis_set_empty", + "No statistical analyses were provided.", + "Add at least one analysis record before review." + ); + } + + for (const analysis of analyses.map(asObject)) { + checkAlpha(analysis, findings); + checkPValueConsistency(analysis, findings); + checkConfidenceInterval(analysis, findings); + checkSamples(analysis, findings); + checkMultipleComparisons(analysis, findings); + checkUnits(analysis, findings); + checkArtifacts(analysis, artifacts, findings); + checkPreregistration(analysis, findings); + } + + const penalty = findings.reduce((total, finding) => total + SEVERITY_WEIGHT[finding.severity], 0); + const score = Math.max(0, 100 - penalty); + const counts = severityCounts(findings); + const manifestDigest = shortDigest({ + manuscript, + analyses, + artifacts, + generatedAt: packet.generatedAt + }); + const findingsDigest = shortDigest(findings); + + return { + manuscriptId: manuscript.id ?? "unlabeled-manuscript", + title: manuscript.title ?? "Untitled manuscript", + status: statusFromFindings(score, findings), + score, + counts, + findings, + reviewerActions: sortedReviewerActions(findings), + auditEvents: [ + { + type: "statistical_consistency_evaluated", + at: packet.generatedAt, + analyses: analyses.length, + findings: findings.length, + findingsDigest + } + ], + manifestDigest, + findingsDigest + }; +} + +export function renderStatisticalConsistencyReport(result) { + const lines = [ + "Statistical Consistency Check", + `${result.title}: ${result.status} (${result.score}/100)`, + `Findings high/medium/low: ${result.counts.high}/${result.counts.medium}/${result.counts.low}`, + `Manifest: ${result.manifestDigest}`, + "", + "Reviewer actions:" + ]; + + if (result.reviewerActions.length === 0) { + lines.push("- none"); + } else { + for (const action of result.reviewerActions) { + lines.push(`- ${action.severity} ${action.code}: ${action.action}`); + } + } + + return lines.join("\n"); +} diff --git a/statistical-consistency-checker/test/statistical-consistency-checker.test.js b/statistical-consistency-checker/test/statistical-consistency-checker.test.js new file mode 100644 index 0000000..4282adb --- /dev/null +++ b/statistical-consistency-checker/test/statistical-consistency-checker.test.js @@ -0,0 +1,238 @@ +import assert from "node:assert/strict"; +import test from "node:test"; +import { + evaluateStatisticalConsistency, + renderStatisticalConsistencyReport +} from "../src/statistical-consistency-checker.js"; + +function basePacket(overrides = {}) { + return { + generatedAt: "2026-05-16T16:35:00Z", + manuscript: { + id: "ms-001", + title: "Clean Trial" + }, + artifacts: [ + { id: "data-1", type: "dataset", available: true }, + { id: "code-1", type: "script", available: true } + ], + analyses: [ + { + id: "primary", + pValue: 0.2, + alpha: 0.05, + reportedSignificant: false, + effectSize: 0.1, + confidenceInterval: [-0.2, 0.4], + direction: "positive", + sampleSize: 80, + groups: [ + { name: "a", n: 40 }, + { name: "b", n: 40 } + ], + multipleComparisons: { + tests: 1, + correction: "not reported" + }, + units: [{ variable: "score", expected: "points", observed: "points" }], + primaryOutcome: false, + preregistered: true, + dataArtifactId: "data-1", + codeArtifactId: "code-1" + } + ], + ...overrides + }; +} + +test("allows a clean packet", () => { + const result = evaluateStatisticalConsistency(basePacket()); + + assert.equal(result.status, "ready"); + assert.equal(result.score, 100); + assert.deepEqual(result.counts, { high: 0, medium: 0, low: 0 }); + assert.equal(result.reviewerActions.length, 0); +}); + +test("blocks an effect size outside the confidence interval", () => { + const input = basePacket({ + analyses: [ + { + ...basePacket().analyses[0], + effectSize: 1.2, + confidenceInterval: [0.2, 0.8] + } + ] + }); + + const result = evaluateStatisticalConsistency(input); + + assert.equal(result.status, "blocked"); + assert(result.findings.some((finding) => finding.code === "effect_outside_ci")); +}); + +test("flags significant p-values whose interval crosses the null", () => { + const input = basePacket({ + analyses: [ + { + ...basePacket().analyses[0], + pValue: 0.01, + reportedSignificant: true, + confidenceInterval: [-0.1, 0.5] + } + ] + }); + + const result = evaluateStatisticalConsistency(input); + + assert(result.findings.some((finding) => finding.code === "significant_p_value_ci_crosses_null")); +}); + +test("flags non-significant p-values whose interval excludes the null", () => { + const input = basePacket({ + analyses: [ + { + ...basePacket().analyses[0], + pValue: 0.18, + reportedSignificant: false, + confidenceInterval: [0.1, 0.5] + } + ] + }); + + const result = evaluateStatisticalConsistency(input); + + assert(result.findings.some((finding) => finding.code === "nonsignificant_p_value_ci_excludes_null")); +}); + +test("flags invalid p-values", () => { + const input = basePacket({ + analyses: [ + { + ...basePacket().analyses[0], + pValue: 1.4 + } + ] + }); + + const result = evaluateStatisticalConsistency(input); + + assert.equal(result.status, "blocked"); + assert(result.findings.some((finding) => finding.code === "invalid_p_value")); +}); + +test("flags invalid alpha thresholds", () => { + const input = basePacket({ + analyses: [ + { + ...basePacket().analyses[0], + alpha: 1.3 + } + ] + }); + + const result = evaluateStatisticalConsistency(input); + + assert.equal(result.status, "blocked"); + assert(result.findings.some((finding) => finding.code === "invalid_alpha")); +}); + +test("flags significance labels that do not match alpha", () => { + const input = basePacket({ + analyses: [ + { + ...basePacket().analyses[0], + pValue: 0.01, + reportedSignificant: false, + confidenceInterval: [0.1, 0.5] + } + ] + }); + + const result = evaluateStatisticalConsistency(input); + + assert(result.findings.some((finding) => finding.code === "significance_label_mismatch")); +}); + +test("flags multiple comparisons without correction", () => { + const input = basePacket({ + analyses: [ + { + ...basePacket().analyses[0], + multipleComparisons: { tests: 4, correction: "none" } + } + ] + }); + + const result = evaluateStatisticalConsistency(input); + + assert(result.findings.some((finding) => finding.code === "multiple_comparisons_uncorrected")); +}); + +test("flags unit mismatches and missing artifacts", () => { + const input = basePacket({ + artifacts: [{ id: "data-1", type: "dataset", available: true }], + analyses: [ + { + ...basePacket().analyses[0], + units: [{ variable: "marker", expected: "ng/mL", observed: "pg/mL" }], + codeArtifactId: "missing-code" + } + ] + }); + + const result = evaluateStatisticalConsistency(input); + + assert(result.findings.some((finding) => finding.code === "unit_mismatch")); + const artifactFinding = result.findings.find((finding) => finding.code === "code_artifact_not_found"); + assert.equal(artifactFinding.artifactId, "missing-code"); +}); + +test("requires generatedAt", () => { + assert.throws(() => evaluateStatisticalConsistency({ analyses: [] }), /generatedAt is required/); +}); + +test("produces deterministic digests", () => { + const first = evaluateStatisticalConsistency(basePacket()); + const second = evaluateStatisticalConsistency(basePacket()); + + assert.equal(first.manifestDigest, second.manifestDigest); + assert.equal(first.findingsDigest, second.findingsDigest); +}); + +test("sorts reviewer actions by severity", () => { + const result = evaluateStatisticalConsistency( + basePacket({ + analyses: [ + { + ...basePacket().analyses[0], + multipleComparisons: { tests: 4, correction: "none" }, + codeArtifactId: "missing-code" + } + ] + }) + ); + + assert.equal(result.reviewerActions[0].severity, "high"); + assert.equal(result.reviewerActions[0].code, "code_artifact_not_found"); +}); + +test("renders a reviewer friendly report", () => { + const result = evaluateStatisticalConsistency( + basePacket({ + analyses: [ + { + ...basePacket().analyses[0], + pValue: "bad", + confidenceInterval: [0.1, 0.2] + } + ] + }) + ); + + const report = renderStatisticalConsistencyReport(result); + + assert.match(report, /Statistical Consistency Check/); + assert.match(report, /invalid_p_value/); + assert.match(report, /Manifest:/); +});