diff --git a/ai-output-evidence-verifier/README.md b/ai-output-evidence-verifier/README.md new file mode 100644 index 0000000..0b83bd7 --- /dev/null +++ b/ai-output-evidence-verifier/README.md @@ -0,0 +1,28 @@ +# AI Output Evidence Verifier + +This module is a focused implementation slice for SCIBASE issue #13, "AI-Assisted Research Tools (MVP Level)." It models the reviewer gate that should run before AI-generated summaries, peer-review aids, citation suggestions, or manuscript action packets are shown as trustworthy research guidance. + +The verifier answers four practical questions: + +- Are the AI tool's claims backed by known project sources? +- Are any cited sources retracted, under expression of concern, or too stale for the claim type? +- Does each output mode include the disclosure and citation fields expected by a research assistant? +- Which concrete reviewer actions should be completed before the output is released? + +## Files + +- `src/ai-output-evidence-verifier.js` contains deterministic evidence checks and audit digest generation. +- `data/sample-assistant-output.json` provides a mixed research-assistant output packet. +- `test/ai-output-evidence-verifier.test.js` covers blocked and releasable output paths. +- `scripts/demo.js` prints a concise release-readiness report for the sample packet. +- `docs/demo.svg` and `docs/demo.mp4` show the intended reviewer surface. + +## Run + +```sh +npm test +npm run demo +npm run demo:video +``` + +The implementation is dependency-free and uses synthetic project data only. diff --git a/ai-output-evidence-verifier/data/sample-assistant-output.json b/ai-output-evidence-verifier/data/sample-assistant-output.json new file mode 100644 index 0000000..b0eecee --- /dev/null +++ b/ai-output-evidence-verifier/data/sample-assistant-output.json @@ -0,0 +1,112 @@ +{ + "project": { + "id": "proj-neuro-imaging-42", + "title": "Neurovascular Calcium Imaging Protocol", + "domain": "neuroscience", + "reviewDate": "2026-05-16", + "requiredDisclosures": [ + "ai-generated", + "human-review-required", + "source-coverage" + ] + }, + "sources": [ + { + "id": "paper-astrocyte-2025", + "title": "Astrocyte calcium signaling during sleep spindle propagation", + "type": "preprint", + "status": "active", + "publishedAt": "2025-11-20", + "reviewedAt": "2026-05-10", + "tags": ["calcium-imaging", "sleep-spindles", "two-photon"], + "url": "https://example.org/preprints/astrocyte-calcium-2025", + "license": "CC-BY-4.0" + }, + { + "id": "dataset-ca-raw", + "title": "Raw calcium-imaging dataset and segmentation masks", + "type": "dataset", + "status": "active", + "publishedAt": "2026-01-12", + "reviewedAt": "2026-05-14", + "tags": ["calcium-imaging", "raw-data", "segmentation"], + "url": "https://example.org/datasets/ca-raw", + "license": "CC0-1.0" + }, + { + "id": "paper-retired-marker", + "title": "Legacy glial activation marker in fixed tissue", + "type": "article", + "status": "retracted", + "publishedAt": "2021-03-10", + "reviewedAt": "2024-01-10", + "tags": ["fixed-tissue", "marker-panel"], + "url": "https://example.org/articles/retracted-marker", + "license": "unknown" + } + ], + "outputs": [ + { + "id": "summary-main", + "mode": "summary", + "audience": "collaborator", + "generatedAt": "2026-05-16T17:30:00Z", + "disclosures": ["ai-generated", "human-review-required", "source-coverage"], + "claims": [ + { + "id": "c1", + "text": "Sleep spindle events correlate with astrocyte calcium transients in the sample cohort.", + "sourceIds": ["paper-astrocyte-2025", "dataset-ca-raw"], + "tags": ["calcium-imaging", "sleep-spindles"], + "confidence": 0.84 + }, + { + "id": "c2", + "text": "The analysis proves a causal glial mechanism for all sleep-stage transitions.", + "sourceIds": [], + "tags": ["causality", "sleep-stages"], + "confidence": 0.61 + } + ] + }, + { + "id": "peer-review-aid", + "mode": "peer-review", + "audience": "author", + "generatedAt": "2026-05-16T17:34:00Z", + "disclosures": ["ai-generated", "human-review-required"], + "claims": [ + { + "id": "c3", + "text": "The marker panel is validated for fixed tissue and can be cited without caveats.", + "sourceIds": ["paper-retired-marker"], + "tags": ["marker-panel"], + "confidence": 0.72 + }, + { + "id": "c4", + "text": "The manuscript should disclose segmentation mask provenance next to the methods section.", + "sourceIds": ["dataset-ca-raw"], + "tags": ["raw-data", "segmentation"], + "confidence": 0.9 + } + ] + }, + { + "id": "citation-suggestions", + "mode": "citation-recommendation", + "audience": "editor", + "generatedAt": "2026-05-16T17:36:00Z", + "disclosures": ["ai-generated", "human-review-required", "source-coverage"], + "claims": [ + { + "id": "c5", + "text": "Prioritize the calcium-imaging preprint and raw dataset in the collaborator handoff.", + "sourceIds": ["paper-astrocyte-2025", "dataset-ca-raw"], + "tags": ["citation", "handoff"], + "confidence": 0.88 + } + ] + } + ] +} diff --git a/ai-output-evidence-verifier/docs/demo.mp4 b/ai-output-evidence-verifier/docs/demo.mp4 new file mode 100644 index 0000000..d396534 Binary files /dev/null and b/ai-output-evidence-verifier/docs/demo.mp4 differ diff --git a/ai-output-evidence-verifier/docs/demo.svg b/ai-output-evidence-verifier/docs/demo.svg new file mode 100644 index 0000000..70472da --- /dev/null +++ b/ai-output-evidence-verifier/docs/demo.svg @@ -0,0 +1,37 @@ + + AI Output Evidence Verifier demo + Reviewer dashboard showing AI output source coverage, blockers, reviewer tasks, and audit digest. + + + AI Output Evidence Verifier + Research-assistant output trust gate for SCIBASE issue #13 + + + Source Coverage + known IDs + missing IDs + + + + Evidence Risk + unsupported claims + retracted sources + + + + Disclosures + AI generated + human review + + + + Audit Digest + policy hash + review packet + + + Neurovascular Calcium Imaging Protocol + 3 AI outputs, 5 claims, 3 sources, deterministic reviewer tasks + Recommendation: hold for evidence fix before collaborator release + Blocks unsupported, high-risk, and retracted-source claims + diff --git a/ai-output-evidence-verifier/docs/requirement-map.md b/ai-output-evidence-verifier/docs/requirement-map.md new file mode 100644 index 0000000..2591f17 --- /dev/null +++ b/ai-output-evidence-verifier/docs/requirement-map.md @@ -0,0 +1,15 @@ +# Requirement Map + +Issue #13 asks for AI-assisted research tools that help with summaries, peer-review diagnostics, citation management, and research quality without asking reviewers to trust ungrounded model output. This slice implements a deterministic evidence gate for those tool outputs. + +| Requirement area | Implementation evidence | +| --- | --- | +| AI paper summarizer | Validates summary claims, source IDs, source freshness, and required AI disclosures before release. | +| AI peer review aid | Blocks peer-review suggestions that cite retracted sources or omit source coverage disclosure. | +| Citation management | Verifies citation-recommendation claims cite known source records of acceptable types. | +| Technical issue checking | Produces reviewer tasks for unsupported claims, unsafe source status, stale review evidence, and missing disclosure. | +| MVP-level local demo | `npm run demo` prints a deterministic release-readiness report from synthetic data. | +| Reviewer confidence | `auditDigest` provides a stable hash of the reviewed outputs, policy thresholds, findings, and source coverage. | +| Demo video | `npm run demo:video` renders `docs/demo.mp4` without service credentials or external APIs. | + +This module intentionally avoids live LLM calls, scraping, external credentials, and private data. It is the trust layer around AI outputs, not another generic model wrapper. diff --git a/ai-output-evidence-verifier/package.json b/ai-output-evidence-verifier/package.json new file mode 100644 index 0000000..773359f --- /dev/null +++ b/ai-output-evidence-verifier/package.json @@ -0,0 +1,15 @@ +{ + "name": "ai-output-evidence-verifier", + "version": "0.1.0", + "private": true, + "description": "Evidence verifier for AI-assisted research tool outputs for SCIBASE issue #13.", + "type": "module", + "scripts": { + "test": "node test/ai-output-evidence-verifier.test.js", + "demo": "node scripts/demo.js", + "demo:video": "clang -fobjc-arc -framework Foundation -framework AppKit -framework AVFoundation -framework CoreMedia -framework CoreVideo scripts/render-demo-video.m -o /tmp/scibase-ai-output-evidence-demo && /tmp/scibase-ai-output-evidence-demo docs/demo.mp4" + }, + "engines": { + "node": ">=18" + } +} diff --git a/ai-output-evidence-verifier/scripts/demo.js b/ai-output-evidence-verifier/scripts/demo.js new file mode 100644 index 0000000..e166764 --- /dev/null +++ b/ai-output-evidence-verifier/scripts/demo.js @@ -0,0 +1,25 @@ +import { readFile } from "node:fs/promises"; +import { fileURLToPath } from "node:url"; +import { dirname, join } from "node:path"; +import { verifyAiOutputEvidence } from "../src/ai-output-evidence-verifier.js"; + +const here = dirname(fileURLToPath(import.meta.url)); +const samplePath = join(here, "..", "data", "sample-assistant-output.json"); +const sample = JSON.parse(await readFile(samplePath, "utf8")); +const report = verifyAiOutputEvidence(sample); + +console.log(`Project: ${report.project.title}`); +console.log(`Ready: ${report.ready ? "yes" : "no"}`); +console.log(`Recommendation: ${report.recommendation}`); +console.log(`Outputs: ${report.counts.outputs}`); +console.log(`Claims: ${report.counts.claims}`); +console.log(`Sources: ${report.counts.sources}`); +console.log(`Source coverage: ${report.sourceCoverage.knownCount}/${report.sourceCoverage.referencedCount} known`); +console.log(`Blockers: ${report.counts.blockers}`); +console.log(`Warnings: ${report.counts.warnings}`); +console.log(`Reviewer tasks: ${report.counts.reviewerTasks}`); +console.log(`Audit digest: ${report.auditDigest}`); + +for (const task of report.reviewerTasks.slice(0, 5)) { + console.log(`- [${task.severity}] ${task.target}: ${task.action}`); +} diff --git a/ai-output-evidence-verifier/scripts/render-demo-video.m b/ai-output-evidence-verifier/scripts/render-demo-video.m new file mode 100644 index 0000000..272b174 --- /dev/null +++ b/ai-output-evidence-verifier/scripts/render-demo-video.m @@ -0,0 +1,155 @@ +#import +#import +#import +#import + +static NSColor *RGB(CGFloat r, CGFloat g, CGFloat b) { + return [NSColor colorWithCalibratedRed:r / 255.0 green:g / 255.0 blue:b / 255.0 alpha:1.0]; +} + +static void FillRounded(NSRect rect, CGFloat radius, NSColor *color) { + NSBezierPath *path = [NSBezierPath bezierPathWithRoundedRect:rect xRadius:radius yRadius:radius]; + [color setFill]; + [path fill]; +} + +static void StrokeRounded(NSRect rect, CGFloat radius, NSColor *color) { + NSBezierPath *path = [NSBezierPath bezierPathWithRoundedRect:rect xRadius:radius yRadius:radius]; + [color setStroke]; + [path setLineWidth:2.0]; + [path stroke]; +} + +static void DrawText(NSString *text, NSRect rect, CGFloat size, NSColor *color, NSFontWeight weight) { + NSMutableParagraphStyle *style = [[NSMutableParagraphStyle alloc] init]; + style.lineBreakMode = NSLineBreakByWordWrapping; + NSDictionary *attrs = @{ + NSFontAttributeName: [NSFont systemFontOfSize:size weight:weight], + NSForegroundColorAttributeName: color, + NSParagraphStyleAttributeName: style + }; + [text drawInRect:rect withAttributes:attrs]; +} + +static void DrawCard(NSString *title, NSArray *lines, NSRect rect, NSColor *fill, NSColor *stroke, NSColor *titleColor, NSColor *bodyColor) { + FillRounded(rect, 14.0, fill); + StrokeRounded(rect, 14.0, stroke); + DrawText(title, NSMakeRect(rect.origin.x + 22, rect.origin.y + rect.size.height - 56, rect.size.width - 44, 30), 22, titleColor, NSFontWeightBold); + for (NSUInteger i = 0; i < lines.count; i++) { + DrawText(lines[i], NSMakeRect(rect.origin.x + 22, rect.origin.y + rect.size.height - 96 - (CGFloat)i * 31, rect.size.width - 44, 28), 18, bodyColor, NSFontWeightRegular); + } +} + +static void DrawFrame(CGContextRef cg, int width, int height, int frame) { + [NSGraphicsContext saveGraphicsState]; + NSGraphicsContext *context = [NSGraphicsContext graphicsContextWithCGContext:cg flipped:NO]; + [NSGraphicsContext setCurrentContext:context]; + + [RGB(245, 247, 249) setFill]; + NSRectFill(NSMakeRect(0, 0, width, height)); + + FillRounded(NSMakeRect(64, 54, 1152, 612), 18, [NSColor whiteColor]); + StrokeRounded(NSMakeRect(64, 54, 1152, 612), 18, RGB(207, 216, 223)); + + CGFloat progress = MIN(1.0, MAX(0.0, ((CGFloat)frame - 8.0) / 72.0)); + DrawText(@"AI Output Evidence Verifier", NSMakeRect(104, 595, 760, 48), 36, RGB(23, 33, 43), NSFontWeightBold); + DrawText(@"Research-assistant output trust gate for SCIBASE issue #13", NSMakeRect(104, 563, 860, 28), 18, RGB(82, 99, 113), NSFontWeightRegular); + + DrawCard(@"Source Coverage", @[@"known sources", @"missing IDs", @"source types"], NSMakeRect(104, 362, 252, 152), RGB(233, 246, 240), RGB(159, 208, 186), RGB(23, 72, 49), RGB(39, 99, 74)); + DrawCard(@"Evidence Risk", @[@"unsupported claims", @"retracted sources", @"high-risk tags"], NSMakeRect(386, 362, 252, 152), RGB(255, 240, 230), RGB(233, 180, 137), RGB(121, 59, 16), RGB(143, 78, 32)); + DrawCard(@"Disclosure Gate", @[@"AI generated", @"human review", @"source coverage"], NSMakeRect(668, 362, 252, 152), RGB(237, 242, 251), RGB(171, 192, 231), RGB(32, 60, 105), RGB(49, 85, 143)); + DrawCard(@"Audit Digest", @[@"policy hash", @"review packet", @"stable handoff"], NSMakeRect(950, 362, 226, 152), RGB(248, 237, 247), RGB(213, 171, 208), RGB(90, 40, 84), RGB(115, 56, 107)); + + FillRounded(NSMakeRect(104, 136, 1072, 174), 14, RGB(23, 33, 43)); + DrawText(@"Neurovascular Calcium Imaging Protocol", NSMakeRect(132, 250, 740, 34), 24, [NSColor whiteColor], NSFontWeightBold); + DrawText(@"3 AI outputs, 5 claims, 3 sources, deterministic reviewer tasks", NSMakeRect(132, 209, 930, 28), 18, RGB(200, 211, 220), NSFontWeightRegular); + DrawText(@"Recommendation: hold for evidence fix before collaborator release", NSMakeRect(132, 172, 930, 28), 18, RGB(200, 211, 220), NSFontWeightRegular); + DrawText(@"Blocks unsupported, high-risk, and retracted-source claims", NSMakeRect(132, 135, 930, 28), 18, RGB(200, 211, 220), NSFontWeightRegular); + + FillRounded(NSMakeRect(132, 93, 980.0 * progress, 10), 5, RGB(219, 88, 66)); + DrawText(@"evidence gate running", NSMakeRect(132 + 980.0 * progress + 14, 84, 220, 28), 14, RGB(82, 99, 113), NSFontWeightMedium); + + [NSGraphicsContext restoreGraphicsState]; +} + +int main(int argc, const char *argv[]) { + @autoreleasepool { + if (argc < 2) { + fprintf(stderr, "usage: render-demo-video output.mp4\n"); + return 2; + } + + NSString *outputPath = [NSString stringWithUTF8String:argv[1]]; + NSURL *outputURL = [NSURL fileURLWithPath:outputPath]; + [[NSFileManager defaultManager] removeItemAtURL:outputURL error:nil]; + + const int width = 1280; + const int height = 720; + NSError *error = nil; + AVAssetWriter *writer = [[AVAssetWriter alloc] initWithURL:outputURL fileType:AVFileTypeMPEG4 error:&error]; + if (!writer) { + NSLog(@"writer error: %@", error); + return 1; + } + + NSDictionary *videoSettings = @{ + AVVideoCodecKey: AVVideoCodecTypeH264, + AVVideoWidthKey: @(width), + AVVideoHeightKey: @(height) + }; + AVAssetWriterInput *input = [AVAssetWriterInput assetWriterInputWithMediaType:AVMediaTypeVideo outputSettings:videoSettings]; + input.expectsMediaDataInRealTime = NO; + + NSDictionary *pixelBufferAttributes = @{ + (NSString *)kCVPixelBufferPixelFormatTypeKey: @(kCVPixelFormatType_32ARGB), + (NSString *)kCVPixelBufferWidthKey: @(width), + (NSString *)kCVPixelBufferHeightKey: @(height) + }; + AVAssetWriterInputPixelBufferAdaptor *adaptor = [AVAssetWriterInputPixelBufferAdaptor assetWriterInputPixelBufferAdaptorWithAssetWriterInput:input sourcePixelBufferAttributes:pixelBufferAttributes]; + + if (![writer canAddInput:input]) { + NSLog(@"cannot add writer input"); + return 1; + } + [writer addInput:input]; + [writer startWriting]; + [writer startSessionAtSourceTime:kCMTimeZero]; + + CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB(); + for (int frame = 0; frame < 96; frame++) { + while (!input.readyForMoreMediaData) { + [NSThread sleepForTimeInterval:0.01]; + } + + CVPixelBufferRef buffer = NULL; + CVPixelBufferPoolCreatePixelBuffer(NULL, adaptor.pixelBufferPool, &buffer); + CVPixelBufferLockBaseAddress(buffer, 0); + void *baseAddress = CVPixelBufferGetBaseAddress(buffer); + size_t bytesPerRow = CVPixelBufferGetBytesPerRow(buffer); + CGContextRef cg = CGBitmapContextCreate(baseAddress, width, height, 8, bytesPerRow, colorSpace, kCGImageAlphaNoneSkipFirst); + DrawFrame(cg, width, height, frame); + CGContextRelease(cg); + CVPixelBufferUnlockBaseAddress(buffer, 0); + + CMTime presentationTime = CMTimeMake(frame, 24); + [adaptor appendPixelBuffer:buffer withPresentationTime:presentationTime]; + CVPixelBufferRelease(buffer); + } + + CGColorSpaceRelease(colorSpace); + [input markAsFinished]; + + dispatch_semaphore_t sema = dispatch_semaphore_create(0); + [writer finishWritingWithCompletionHandler:^{ + dispatch_semaphore_signal(sema); + }]; + dispatch_semaphore_wait(sema, DISPATCH_TIME_FOREVER); + + if (writer.status != AVAssetWriterStatusCompleted) { + NSLog(@"writer failed: %@", writer.error); + return 1; + } + } + + return 0; +} diff --git a/ai-output-evidence-verifier/src/ai-output-evidence-verifier.js b/ai-output-evidence-verifier/src/ai-output-evidence-verifier.js new file mode 100644 index 0000000..e69e475 --- /dev/null +++ b/ai-output-evidence-verifier/src/ai-output-evidence-verifier.js @@ -0,0 +1,311 @@ +import { createHash } from "node:crypto"; + +const DEFAULT_POLICY = { + staleReviewDays: 180, + minClaimConfidence: 0.65, + requiredDisclosuresByMode: { + summary: ["ai-generated", "human-review-required", "source-coverage"], + "peer-review": ["ai-generated", "human-review-required", "source-coverage"], + "citation-recommendation": ["ai-generated", "human-review-required", "source-coverage"] + }, + highRiskTags: ["causality", "clinical", "dosage", "safety", "policy"], + requiredSourceTypesByTag: { + "raw-data": ["dataset"], + segmentation: ["dataset"], + citation: ["article", "preprint", "dataset"] + } +}; + +function stableStringify(value) { + if (Array.isArray(value)) { + return `[${value.map(stableStringify).join(",")}]`; + } + + if (value && typeof value === "object") { + return `{${Object.keys(value) + .sort() + .map((key) => `${JSON.stringify(key)}:${stableStringify(value[key])}`) + .join(",")}}`; + } + + return JSON.stringify(value); +} + +function sha256(value) { + return createHash("sha256").update(stableStringify(value)).digest("hex"); +} + +function daysBetween(start, end) { + const startMs = Date.parse(start); + const endMs = Date.parse(end); + + if (Number.isNaN(startMs) || Number.isNaN(endMs)) { + return Number.POSITIVE_INFINITY; + } + + return Math.floor((endMs - startMs) / 86_400_000); +} + +function unique(values) { + return [...new Set(values.filter(Boolean))].sort(); +} + +function sourceIndex(sources) { + return new Map((sources || []).map((source) => [source.id, source])); +} + +function finding(code, severity, output, claim, message, evidence = {}) { + return { + code, + severity, + outputId: output.id, + mode: output.mode, + claimId: claim?.id || null, + message, + evidence + }; +} + +function buildSourceCoverage(outputs, sourcesById) { + const referencedSourceIds = unique( + (outputs || []).flatMap((output) => (output.claims || []).flatMap((claim) => claim.sourceIds || [])) + ); + const knownSourceIds = referencedSourceIds.filter((id) => sourcesById.has(id)); + const missingSourceIds = referencedSourceIds.filter((id) => !sourcesById.has(id)); + + return { + referencedSourceIds, + knownSourceIds, + missingSourceIds, + referencedCount: referencedSourceIds.length, + knownCount: knownSourceIds.length, + missingCount: missingSourceIds.length + }; +} + +function checkDisclosures(output, policy) { + const expected = policy.requiredDisclosuresByMode[output.mode] || []; + const actual = new Set(output.disclosures || []); + const missing = expected.filter((disclosure) => !actual.has(disclosure)); + + if (missing.length === 0) { + return []; + } + + return [ + finding( + "missing_disclosure", + "blocker", + output, + null, + `Output ${output.id} is missing required disclosure fields: ${missing.join(", ")}`, + { missing } + ) + ]; +} + +function checkClaimSources(output, claim, sourcesById, reviewDate, policy) { + const findings = []; + const sourceIds = claim.sourceIds || []; + const sources = sourceIds.map((id) => sourcesById.get(id)).filter(Boolean); + const missingSourceIds = sourceIds.filter((id) => !sourcesById.has(id)); + + if (sourceIds.length === 0) { + findings.push( + finding( + "unsupported_claim", + "blocker", + output, + claim, + "Claim has no cited source IDs.", + { text: claim.text } + ) + ); + } + + if (missingSourceIds.length > 0) { + findings.push( + finding( + "unknown_source", + "blocker", + output, + claim, + `Claim references unknown source IDs: ${missingSourceIds.join(", ")}`, + { missingSourceIds } + ) + ); + } + + for (const source of sources) { + if (source.status && source.status !== "active") { + findings.push( + finding( + "unsafe_source_status", + "blocker", + output, + claim, + `Claim cites source ${source.id} with status ${source.status}.`, + { sourceId: source.id, status: source.status } + ) + ); + } + + const ageDays = daysBetween(source.reviewedAt || source.publishedAt, reviewDate); + if (ageDays > policy.staleReviewDays) { + findings.push( + finding( + "stale_source_review", + "warning", + output, + claim, + `Source ${source.id} was last reviewed ${ageDays} days before the project review date.`, + { sourceId: source.id, ageDays } + ) + ); + } + } + + if (Number(claim.confidence || 0) < policy.minClaimConfidence) { + findings.push( + finding( + "low_confidence_claim", + "warning", + output, + claim, + `Claim confidence ${claim.confidence} is below the policy threshold ${policy.minClaimConfidence}.`, + { confidence: claim.confidence, threshold: policy.minClaimConfidence } + ) + ); + } + + const tags = new Set(claim.tags || []); + const highRiskTags = policy.highRiskTags.filter((tag) => tags.has(tag)); + if (highRiskTags.length > 0 && sources.length < 2) { + findings.push( + finding( + "high_risk_claim_needs_multiple_sources", + "blocker", + output, + claim, + `High-risk claim tags require at least two known sources: ${highRiskTags.join(", ")}`, + { highRiskTags, knownSourceCount: sources.length } + ) + ); + } + + for (const [tag, allowedTypes] of Object.entries(policy.requiredSourceTypesByTag || {})) { + if (!tags.has(tag)) { + continue; + } + + const hasRequiredType = sources.some((source) => allowedTypes.includes(source.type)); + if (!hasRequiredType) { + findings.push( + finding( + "missing_required_source_type", + "warning", + output, + claim, + `Claim tagged ${tag} should cite at least one source of type: ${allowedTypes.join(", ")}`, + { tag, allowedTypes } + ) + ); + } + } + + return findings; +} + +function buildReviewerTasks(findings) { + return findings.map((item) => { + const prefix = item.claimId ? `${item.outputId}/${item.claimId}` : item.outputId; + const actionByCode = { + unsupported_claim: "Add source evidence or downgrade the claim.", + unknown_source: "Replace missing source IDs with indexed project sources.", + unsafe_source_status: "Remove the claim or add an explicit retraction caveat.", + stale_source_review: "Refresh source review date or cite a newer source.", + low_confidence_claim: "Require human reviewer confirmation before release.", + high_risk_claim_needs_multiple_sources: "Add independent corroborating evidence.", + missing_required_source_type: "Cite a source type that supports this claim class.", + missing_disclosure: "Add the required AI disclosure fields." + }; + + return { + id: `${item.code}:${prefix}`, + severity: item.severity, + action: actionByCode[item.code] || "Review before release.", + target: prefix + }; + }); +} + +export function verifyAiOutputEvidence(packet, policyOverrides = {}) { + const policy = { + ...DEFAULT_POLICY, + ...policyOverrides, + requiredDisclosuresByMode: { + ...DEFAULT_POLICY.requiredDisclosuresByMode, + ...(policyOverrides.requiredDisclosuresByMode || {}) + }, + requiredSourceTypesByTag: { + ...DEFAULT_POLICY.requiredSourceTypesByTag, + ...(policyOverrides.requiredSourceTypesByTag || {}) + } + }; + const project = packet.project || {}; + const sources = packet.sources || []; + const outputs = packet.outputs || []; + const reviewDate = project.reviewDate || new Date().toISOString().slice(0, 10); + const sourcesById = sourceIndex(sources); + const findings = []; + + for (const output of outputs) { + findings.push(...checkDisclosures(output, policy)); + + for (const claim of output.claims || []) { + findings.push(...checkClaimSources(output, claim, sourcesById, reviewDate, policy)); + } + } + + const blockerCount = findings.filter((item) => item.severity === "blocker").length; + const warningCount = findings.filter((item) => item.severity === "warning").length; + const sourceCoverage = buildSourceCoverage(outputs, sourcesById); + const reviewerTasks = buildReviewerTasks(findings); + const claimCount = outputs.reduce((sum, output) => sum + (output.claims || []).length, 0); + const ready = blockerCount === 0; + const auditDigest = sha256({ + projectId: project.id, + outputIds: outputs.map((output) => output.id).sort(), + findingCodes: findings.map((item) => `${item.code}:${item.outputId}:${item.claimId || ""}`).sort(), + sourceCoverage, + policy: { + staleReviewDays: policy.staleReviewDays, + minClaimConfidence: policy.minClaimConfidence + } + }); + + return { + project: { + id: project.id, + title: project.title, + domain: project.domain, + reviewDate + }, + ready, + recommendation: ready ? "release_with_human_review" : "hold_for_evidence_fix", + counts: { + outputs: outputs.length, + claims: claimCount, + sources: sources.length, + blockers: blockerCount, + warnings: warningCount, + reviewerTasks: reviewerTasks.length + }, + sourceCoverage, + findings, + reviewerTasks, + auditDigest: `sha256:${auditDigest}` + }; +} + +export { DEFAULT_POLICY }; diff --git a/ai-output-evidence-verifier/test/ai-output-evidence-verifier.test.js b/ai-output-evidence-verifier/test/ai-output-evidence-verifier.test.js new file mode 100644 index 0000000..29200d5 --- /dev/null +++ b/ai-output-evidence-verifier/test/ai-output-evidence-verifier.test.js @@ -0,0 +1,69 @@ +import assert from "node:assert/strict"; +import { readFile } from "node:fs/promises"; +import { fileURLToPath } from "node:url"; +import { dirname, join } from "node:path"; +import { verifyAiOutputEvidence } from "../src/ai-output-evidence-verifier.js"; + +const here = dirname(fileURLToPath(import.meta.url)); +const sample = JSON.parse( + await readFile(join(here, "..", "data", "sample-assistant-output.json"), "utf8") +); + +const report = verifyAiOutputEvidence(sample); + +assert.equal(report.ready, false); +assert.equal(report.recommendation, "hold_for_evidence_fix"); +assert.equal(report.counts.outputs, 3); +assert.equal(report.counts.claims, 5); +assert.equal(report.counts.blockers, 4); +assert.equal(report.counts.warnings, 2); +assert.ok(report.auditDigest.startsWith("sha256:")); +assert.deepEqual( + report.findings.map((finding) => finding.code).sort(), + [ + "high_risk_claim_needs_multiple_sources", + "low_confidence_claim", + "missing_disclosure", + "stale_source_review", + "unsafe_source_status", + "unsupported_claim" + ].sort() +); +assert.ok(report.reviewerTasks.some((task) => task.action.includes("Add source evidence"))); +assert.equal(report.sourceCoverage.missingCount, 0); + +const cleanPacket = structuredClone(sample); +cleanPacket.sources = cleanPacket.sources.filter((source) => source.status === "active"); +cleanPacket.outputs = [ + { + id: "summary-clean", + mode: "summary", + audience: "collaborator", + generatedAt: "2026-05-16T18:00:00Z", + disclosures: ["ai-generated", "human-review-required", "source-coverage"], + claims: [ + { + id: "clean-claim", + text: "The summary is limited to observed calcium-imaging correlations and dataset provenance.", + sourceIds: ["paper-astrocyte-2025", "dataset-ca-raw"], + tags: ["calcium-imaging", "raw-data"], + confidence: 0.91 + } + ] + } +]; + +const cleanReport = verifyAiOutputEvidence(cleanPacket); +assert.equal(cleanReport.ready, true); +assert.equal(cleanReport.counts.blockers, 0); +assert.equal(cleanReport.counts.warnings, 0); +assert.equal(cleanReport.reviewerTasks.length, 0); + +const missingSourcePacket = structuredClone(cleanPacket); +missingSourcePacket.outputs[0].claims[0].sourceIds = ["missing-source"]; +const missingSourceReport = verifyAiOutputEvidence(missingSourcePacket); +assert.equal(missingSourceReport.ready, false); +assert.equal(missingSourceReport.sourceCoverage.missingCount, 1); +assert.ok(missingSourceReport.findings.some((finding) => finding.code === "unknown_source")); + +console.log("ai-output-evidence-verifier tests passed");