From 8f74bbf93e9c9f46c26c7d4f2a970f540dd3dfbf Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 6 May 2026 15:49:26 +0000 Subject: [PATCH 1/3] Build EU AI Act Article 12 reference implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A ~30-line credit-decisioning service that: - refuses to boot unless the training-data certificate verifies via @certifieddata/verify, and - appends an Article 12 event to a hash-chained Decision Ledger on every /decide call, exposing the chain via /evidence/:id. src/app.ts is 51 total / 33 meaningful lines (under the 50-line ceiling). src/ledger.ts ships MemoryLedger (for the demo) and HttpLedger (for production), both behind a common Ledger interface, plus a public verifyChain() helper that re-walks bundles. src/article12.ts maps Article 12(2)(a-d) onto concrete event fields; ARTICLE_12_MAPPING.md walks each requirement field-by-field with explicit "what this is not" non-claims. Includes: - 3 smoke tests covering boot-or-refuse, chain integrity, multi- decision sequencing - 3 numbered example scripts (log, verify-training-data, export- evidence) all run end-to-end - docker-compose with a Postgres scaffold for a future real ledger - CI matrix on Node 20/22 × {linux, macos}; the workflow checks out @certifieddata/verify as a sibling until it publishes to npm The package.json declares @certifieddata/verify as file:../verify for now; switch to a published version range and to `npm ci` after the first verify@0.1.0 publish. --- .gitattributes | 4 + .github/workflows/ci.yml | 49 ++++++++++ .gitignore | 8 ++ ARTICLE_12_MAPPING.md | 75 ++++++++++++++ Dockerfile | 23 +++++ LICENSE | 2 +- README.md | 125 +++++++++++++++++++++++- docker-compose.yml | 46 +++++++++ fixtures/decisions.jsonl | 3 + fixtures/generate.mjs | 97 ++++++++++++++++++ fixtures/keys.json | 12 +++ fixtures/training-cert.json | 16 +++ fixtures/training-data.csv | 6 ++ package-lock.json | 100 +++++++++++++++++++ package.json | 37 +++++++ src/app.ts | 51 ++++++++++ src/article12.ts | 50 ++++++++++ src/examples/01-log-decision.ts | 20 ++++ src/examples/02-verify-training-data.ts | 16 +++ src/examples/03-export-evidence.ts | 38 +++++++ src/ledger.ts | 108 ++++++++++++++++++++ src/server.ts | 17 ++++ src/smoke.test.ts | 85 ++++++++++++++++ tsconfig.json | 22 +++++ 24 files changed, 1008 insertions(+), 2 deletions(-) create mode 100644 .gitattributes create mode 100644 .github/workflows/ci.yml create mode 100644 .gitignore create mode 100644 ARTICLE_12_MAPPING.md create mode 100644 Dockerfile create mode 100644 docker-compose.yml create mode 100644 fixtures/decisions.jsonl create mode 100644 fixtures/generate.mjs create mode 100644 fixtures/keys.json create mode 100644 fixtures/training-cert.json create mode 100644 fixtures/training-data.csv create mode 100644 package-lock.json create mode 100644 package.json create mode 100644 src/app.ts create mode 100644 src/article12.ts create mode 100644 src/examples/01-log-decision.ts create mode 100644 src/examples/02-verify-training-data.ts create mode 100644 src/examples/03-export-evidence.ts create mode 100644 src/ledger.ts create mode 100644 src/server.ts create mode 100644 src/smoke.test.ts create mode 100644 tsconfig.json diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..6d4743b --- /dev/null +++ b/.gitattributes @@ -0,0 +1,4 @@ +* text=auto eol=lf +fixtures/*.csv binary +fixtures/*.json binary +fixtures/*.jsonl binary diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..c81f4e5 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,49 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + branches: [main] + +permissions: + contents: read + +jobs: + test: + name: Node ${{ matrix.node }} on ${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + node: [20, 22] + os: [ubuntu-latest, macos-latest] + steps: + # Until @certifieddata/verify is published to npm, this repo depends on + # it via `file:../verify`, so CI checks out both repositories as siblings. + # After the first publish, drop the verify checkout and switch this repo's + # dependency to a published version range. + - uses: actions/checkout@v4 + with: + path: reference-impl + - uses: actions/checkout@v4 + with: + repository: certifieddata/verify + path: verify + - uses: actions/setup-node@v4 + with: + node-version: ${{ matrix.node }} + - name: Build verify + working-directory: verify + run: | + npm install + npm run build + - name: Install reference-impl + working-directory: reference-impl + run: npm install + - name: Typecheck + working-directory: reference-impl + run: npm run typecheck + - name: Test + working-directory: reference-impl + run: npm test diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c2203f0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +node_modules/ +dist/ +*.log +.DS_Store +.env +.env.local +coverage/ +*.tgz diff --git a/ARTICLE_12_MAPPING.md b/ARTICLE_12_MAPPING.md new file mode 100644 index 0000000..f3fb6c2 --- /dev/null +++ b/ARTICLE_12_MAPPING.md @@ -0,0 +1,75 @@ +# EU AI Act Article 12 → ledger event mapping + +This document maps each requirement in **Article 12 ("Record-keeping")** of the EU AI Act onto a concrete field in the Decision Ledger event shape that this reference implementation emits. + +> **This is a reference implementation, not legal advice.** Final Article 12 compliance is determined by the deployer's risk management system per Article 9 of the EU AI Act. The mapping below is one defensible interpretation, not the only one. + +## Article 12(1) — Logging obligation + +> *High-risk AI systems shall technically allow for the automatic recording of events ('logs') over the lifetime of the system.* + +| Article 12 requirement | Ledger event field | Notes | +|---|---|---| +| Recording of events ('logs') over the lifetime of the system | `entries[]` append-only | Every `POST /decide` produces one entry. The ledger is hash-chained: tampering with any entry is detectable in O(n). | +| Tamper evidence | `prev_hash`, `this_hash` | `this_hash = sha256(canonicalize({event, prev_hash, sequence}))`. Any reorder, mutation, or insertion breaks the chain. The `verifyChain()` helper in `src/ledger.ts` re-walks and confirms. | + +## Article 12(2)(a) — Period of use + +> *Logs shall record at least: the period of each use of the system.* + +| Article 12 requirement | Ledger event field | Notes | +|---|---|---| +| Period of each use of the system | `event.timestamp` | ISO-8601 UTC, recorded at decision time. If your inference takes non-trivial time, record `started_at`/`ended_at` as well. | + +## Article 12(2)(b) — Reference database checked against + +> *Logs shall record at least: the reference database against which input data has been checked by the system.* + +| Article 12 requirement | Ledger event field | Notes | +|---|---|---| +| Reference database against which input data has been checked | `event.training_cert_id` | The CertifiedData.io certification ID that bound the training dataset to the deployed model. The dataset itself is not stored in the ledger — its hash is bound into the cert, and the cert is verified at startup. | +| Dataset integrity | (transitively) `cert.dataset_hash` | Stored on the `cert.v1` document, not on the ledger entry; re-verifiable any time with `certifieddata-verify`. | + +## Article 12(2)(c) — Input data leading to a match + +> *Logs shall record at least: the input data for which the search has led to a match.* + +| Article 12 requirement | Ledger event field | Notes | +|---|---|---| +| Input data leading to a match | `event.input_hash` | `sha256(JSON(input))`. The raw input is **not** written to the ledger to limit GDPR/PII exposure; the hash binds the decision to a specific input without retaining the input itself. If the deployer's risk management requires retaining inputs, store them in a separate, access-controlled store and add `input_storage_ref` to the event. | + +## Article 12(2)(d) — Identification of natural persons involved in verification + +> *Logs shall record at least: the identification of the natural persons involved in the verification of the results.* + +| Article 12 requirement | Ledger event field | Notes | +|---|---|---| +| Identification of natural persons involved in verification | `event.reviewer_id` | Optional. Null for fully-automated decisions. Set by the human-in-the-loop reviewer when the system routes a decision to them. The deployer's identity-management system maps `reviewer_id` back to a real person under controlled conditions. | + +## Beyond Article 12 + +These fields are not required by Article 12 but are emitted to support broader EU AI Act obligations: + +| Field | Why | +|---|---| +| `event.model_version` | Article 14 (human oversight) and Article 15 (accuracy) require knowing which model produced a given output. | +| `event.output` | Article 13 (transparency) and Article 14 require explainability of outputs to affected persons. | +| `event.schema_version` | Schema evolution. Pinned to `article12.v1` for this release. | + +## What's deliberately not in the ledger + +- **Raw inputs.** Hashed only — see Article 12(2)(c) above. +- **Model weights.** Bound to the training cert, not the ledger. +- **Training data rows.** Bound to the training cert by hash, not the ledger. +- **PII about the affected person.** The deployer's data-protection regime (GDPR Article 32, AI Act Recital 60) governs this. We default to "log nothing personal" and let the deployer add fields under their controllership. + +## Verifying the chain + +Anyone with access to an evidence bundle can verify the chain locally without trusting the ledger backend: + +```ts +import { verifyChain } from "@certifieddata/reference-impl"; +const ok = verifyChain(bundle.entries); // boolean +``` + +The reference implementation re-runs `verifyChain` on every `evidence/:id` response and exposes the result as `chain_verified` so consumers don't have to. diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..5fb0c3f --- /dev/null +++ b/Dockerfile @@ -0,0 +1,23 @@ +# Production image. Assumes @certifieddata/verify is published to npm. +# For local development before that publish, use `npm install && npm start` +# directly — the Dockerfile pins to a real npm version, not file:../verify. + +FROM node:22-alpine AS build +WORKDIR /app +COPY package.json package-lock.json* ./ +RUN npm install --omit=optional +COPY tsconfig.json ./ +COPY src ./src +COPY fixtures ./fixtures +RUN npm run build + +FROM node:22-alpine +WORKDIR /app +ENV NODE_ENV=production +ENV PORT=3000 +COPY --from=build /app/node_modules ./node_modules +COPY --from=build /app/dist ./dist +COPY --from=build /app/fixtures ./fixtures +COPY --from=build /app/package.json ./package.json +EXPOSE 3000 +CMD ["node", "dist/server.js"] diff --git a/LICENSE b/LICENSE index e6ff1bc..2d3ccb2 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2026 Certified Data +Copyright (c) 2026 CertifiedData.io Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 695057c..371b6aa 100644 --- a/README.md +++ b/README.md @@ -1 +1,124 @@ -# reference-impl \ No newline at end of file +# reference-impl + +> Reference implementation: a high-risk AI system logging EU AI Act Article 12 evidence with CertifiedData.io's Decision Ledger. + +A ~30-line credit-scoring service that: + +1. **Refuses to boot** unless its training-data certificate verifies via [`@certifieddata/verify`](https://github.com/certifieddata/verify). +2. **Appends an Article 12 event** to a hash-chained Decision Ledger for every decision. +3. **Exports a regulator-ready evidence bundle** for any decision via `/evidence/:id`. + +## Quickstart + +```bash +git clone https://github.com/certifieddata/reference-impl.git +cd reference-impl +docker compose up +``` + +Then in another terminal: + +```bash +curl -X POST localhost:3000/decide \ + -H 'content-type: application/json' \ + -d '{"income": 80000, "debt": 20000}' +# → {"decision_id":"...","approved":true,"score":0.6} + +curl localhost:3000/evidence/ +# → {"decision_id":"...","entries":[...],"chain_verified":true} +``` + +## What this proves + +- Training data was **certified** as synthetic before it touched the model — the service literally refuses to start otherwise. +- **Every decision** is hash-chained, tamper-evident, and linked back to the training certificate. +- A **regulator can replay** any decision from the evidence bundle and re-verify the chain locally. + +## The 50-line app + +[`src/app.ts`](src/app.ts) is the entire example. Imports and comments aside, it is 33 lines of meaningful code: + +```ts +export async function createApp(opts: AppOptions): Promise<{ app: Hono; certId: string }> { + const cert = await fetchCert(opts.trainingCert, { offline: true }); + const keys = await loadKeys({ keysFile: opts.keys, offline: true }); + const verdict = await verifyCertificate(cert, keys); + if (verdict.verdict !== "VALID") throw new Error(`training data not verified: ${verdict.reason}`); + + const ledger = makeLedger({ url: opts.ledgerUrl }); + const modelVersion = opts.modelVersion ?? "credit-v3.2.1"; + const app = new Hono(); + + app.post("/decide", async (c) => { + const input = await c.req.json<{ income?: number; debt?: number }>(); + const score = scoreCredit(input); + const decision = { decision_id: randomUUID(), approved: score > 0.6, score }; + await ledger.append(article12Event({ + decision_id: decision.decision_id, + training_cert_id: cert.certification_id, + model_version: modelVersion, + input, output: { approved: decision.approved, score }, + timestamp: new Date().toISOString(), + })); + return c.json(decision); + }); + + app.get("/evidence/:id", async (c) => c.json(await ledger.evidenceBundle(c.req.param("id")))); + + return { app, certId: cert.certification_id }; +} +``` + +Drop `mockModel` for your real inference, point `LEDGER_URL` at a production Decision Ledger, and you have an Article 12-compliant logging surface. + +## Article 12 mapping + +Every field in the ledger event maps explicitly to a paragraph in EU AI Act Article 12. See [ARTICLE_12_MAPPING.md](ARTICLE_12_MAPPING.md) for the full table. + +## Run it locally + +```bash +npm install +npm run fixtures # regenerate signed training cert + keys +npm run build +npm test # 3 smoke tests +npm start # serve on :3000 + +# Or with Docker: +docker compose up +``` + +The fixtures include a real Ed25519 keypair and a real signed `cert.v1`. They are committed so reviewers can verify the verifier without trusting any pre-built artifact. + +## Examples + +```bash +npm run example:01 # log one Article 12 event +npm run example:02 # verify the training-data certificate +npm run example:03 # export an evidence bundle and re-verify the chain +``` + +## Use this as a template + +```bash +gh repo create my-org/my-credit-scoring-service \ + --template certifieddata/reference-impl \ + --public +``` + +Then replace `scoreCredit` with your real model inference and replace the `MemoryLedger` URL with your production Decision Ledger. + +## What this is not + +- **Not a certified compliance product.** Do not deploy this and tell a regulator your obligations are met. +- **Not legal advice.** Final Article 12 compliance is determined by the deployer's risk management system per Article 9 of the EU AI Act. +- **Not a substitute for Article 9 risk management documentation.** This implementation handles the recording obligation, not the risk-assessment obligation. + +## Related projects + +- [`@certifieddata/verify`](https://github.com/certifieddata/verify) — the audit-friendly CLI/SDK this app imports +- [`@certifieddata/pii-scan`](https://github.com/certifieddata/pii-scan) — scan datasets for PII before certifying them + +## License + +MIT — see [LICENSE](LICENSE). diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..c1320dc --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,46 @@ +services: + reference-impl: + build: + context: . + dockerfile: Dockerfile + image: certifieddata/reference-impl:latest + ports: + - "3000:3000" + environment: + TRAINING_CERT: /app/fixtures/training-cert.json + TRAINING_KEYS: /app/fixtures/keys.json + MODEL_VERSION: credit-v3.2.1 + # LEDGER_URL: http://ledger:8080 # uncomment to use the HTTP ledger below + depends_on: + postgres: + condition: service_healthy + + postgres: + image: postgres:16-alpine + environment: + POSTGRES_USER: ledger + POSTGRES_PASSWORD: ledger + POSTGRES_DB: ledger + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ledger"] + interval: 5s + timeout: 3s + retries: 10 + volumes: + - ledger-data:/var/lib/postgresql/data + + # Stub for a future production Decision Ledger service. + # Uncomment when wiring up against a real ledger backend: + # + # ledger: + # image: certifieddata/ledger:latest + # environment: + # POSTGRES_URL: postgres://ledger:ledger@postgres:5432/ledger + # depends_on: + # postgres: + # condition: service_healthy + # ports: + # - "8080:8080" + +volumes: + ledger-data: diff --git a/fixtures/decisions.jsonl b/fixtures/decisions.jsonl new file mode 100644 index 0000000..a13e484 --- /dev/null +++ b/fixtures/decisions.jsonl @@ -0,0 +1,3 @@ +{"decision_id":"d_demo_001","input":{"income":80000,"debt":20000},"output":{"approved":true,"score":0.6}} +{"decision_id":"d_demo_002","input":{"income":30000,"debt":25000},"output":{"approved":false,"score":0.05}} +{"decision_id":"d_demo_003","input":{"income":100000,"debt":5000},"output":{"approved":true,"score":0.95}} diff --git a/fixtures/generate.mjs b/fixtures/generate.mjs new file mode 100644 index 0000000..eacc108 --- /dev/null +++ b/fixtures/generate.mjs @@ -0,0 +1,97 @@ +// Regenerate the training-data certificate, keys document, and decisions sample. +// Self-contained — does not import from src/ so it can run before the build. + +import { generateKeyPairSync, sign, createHash } from "node:crypto"; +import { writeFileSync } from "node:fs"; +import { fileURLToPath } from "node:url"; +import { dirname, join } from "node:path"; + +const here = dirname(fileURLToPath(import.meta.url)); + +function canonicalize(v) { + if (v === null) return "null"; + if (v === true) return "true"; + if (v === false) return "false"; + if (typeof v === "string") return jsString(v); + if (typeof v === "number") { + if (!Number.isFinite(v)) throw new RangeError("non-finite number"); + return Object.is(v, -0) ? "0" : JSON.stringify(v); + } + if (Array.isArray(v)) return "[" + v.map(canonicalize).join(",") + "]"; + if (typeof v === "object") { + const keys = Object.keys(v).filter((k) => v[k] !== undefined).sort(); + return "{" + keys.map((k) => jsString(k) + ":" + canonicalize(v[k])).join(",") + "}"; + } + throw new TypeError("unsupported value"); +} +function jsString(s) { + let o = '"'; + for (let i = 0; i < s.length; i++) { + const c = s.charCodeAt(i); + if (c === 0x22) o += '\\"'; + else if (c === 0x5c) o += "\\\\"; + else if (c === 0x08) o += "\\b"; + else if (c === 0x09) o += "\\t"; + else if (c === 0x0a) o += "\\n"; + else if (c === 0x0c) o += "\\f"; + else if (c === 0x0d) o += "\\r"; + else if (c < 0x20) o += "\\u" + c.toString(16).padStart(4, "0"); + else o += s[i]; + } + return o + '"'; +} +function rawEd25519PublicKey(publicKey) { + const der = publicKey.export({ type: "spki", format: "der" }); + return Buffer.from(der.subarray(der.length - 32)).toString("base64"); +} + +const { privateKey, publicKey } = generateKeyPairSync("ed25519"); +const keyId = "ck_2026_credit_training"; + +const trainingCsv = "income,debt,defaulted\n50000,10000,0\n80000,20000,0\n30000,25000,1\n100000,5000,0\n40000,30000,1\n"; +writeFileSync(join(here, "training-data.csv"), trainingCsv); +const datasetHex = createHash("sha256").update(trainingCsv).digest("hex"); + +const certUnsigned = { + certification_id: "ce_2026_credit_model_v3", + timestamp: "2026-01-15T12:00:00Z", + issuer: "CertifiedData.io", + dataset_hash: `sha256:${datasetHex}`, + algorithm: "DP-CTGAN", + rows: 5, + columns: 3, + schema_version: "cert.v1", + key_id: keyId, + metadata: { epsilon: 1.0, description: "synthetic credit training data, DP-CTGAN ε=1.0" }, +}; +const sig = sign(null, Buffer.from(canonicalize(certUnsigned), "utf8"), privateKey); +const cert = { ...certUnsigned, signature: sig.toString("base64") }; +writeFileSync(join(here, "training-cert.json"), JSON.stringify(cert, null, 2) + "\n"); + +const keysDoc = { + issuer: "CertifiedData.io", + keys: [ + { + key_id: keyId, + public_key: rawEd25519PublicKey(publicKey), + algorithm: "ed25519", + created_at: "2026-01-01T00:00:00Z", + label: "credit-training-fixture", + }, + ], +}; +writeFileSync(join(here, "keys.json"), JSON.stringify(keysDoc, null, 2) + "\n"); + +// Sample decisions log — what example 03 exports as evidence. +const decisions = [ + { decision_id: "d_demo_001", input: { income: 80000, debt: 20000 }, output: { approved: true, score: 0.6 } }, + { decision_id: "d_demo_002", input: { income: 30000, debt: 25000 }, output: { approved: false, score: 0.05 } }, + { decision_id: "d_demo_003", input: { income: 100000, debt: 5000 }, output: { approved: true, score: 0.95 } }, +]; +writeFileSync(join(here, "decisions.jsonl"), decisions.map((d) => JSON.stringify(d)).join("\n") + "\n"); + +console.log("fixtures regenerated:"); +console.log(" training-data.csv —", datasetHex.slice(0, 12) + "…"); +console.log(" training-cert.json —", cert.certification_id); +console.log(" keys.json —", keyId); +console.log(" decisions.jsonl —", decisions.length, "sample decisions"); diff --git a/fixtures/keys.json b/fixtures/keys.json new file mode 100644 index 0000000..7fdafc2 --- /dev/null +++ b/fixtures/keys.json @@ -0,0 +1,12 @@ +{ + "issuer": "CertifiedData.io", + "keys": [ + { + "key_id": "ck_2026_credit_training", + "public_key": "qpbUlDzhp6mmz0gnjNIi4aYVXlpzNH/jeuIA3H0DBy4=", + "algorithm": "ed25519", + "created_at": "2026-01-01T00:00:00Z", + "label": "credit-training-fixture" + } + ] +} diff --git a/fixtures/training-cert.json b/fixtures/training-cert.json new file mode 100644 index 0000000..29f1eca --- /dev/null +++ b/fixtures/training-cert.json @@ -0,0 +1,16 @@ +{ + "certification_id": "ce_2026_credit_model_v3", + "timestamp": "2026-01-15T12:00:00Z", + "issuer": "CertifiedData.io", + "dataset_hash": "sha256:4fcf1cd4278f0dac052979595c25cb8a76668d89e02e2e697c0c629f36a75dd3", + "algorithm": "DP-CTGAN", + "rows": 5, + "columns": 3, + "schema_version": "cert.v1", + "key_id": "ck_2026_credit_training", + "metadata": { + "epsilon": 1, + "description": "synthetic credit training data, DP-CTGAN ε=1.0" + }, + "signature": "oRDoNlOVGiQfHIljECVFxQewuIlmJpoQO2QVltAF6nzx9ARBS92xW+/yZwN/0fKJhlNleWXsm3XG3BD1FeIyAg==" +} diff --git a/fixtures/training-data.csv b/fixtures/training-data.csv new file mode 100644 index 0000000..39e271b --- /dev/null +++ b/fixtures/training-data.csv @@ -0,0 +1,6 @@ +income,debt,defaulted +50000,10000,0 +80000,20000,0 +30000,25000,1 +100000,5000,0 +40000,30000,1 diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 0000000..5ce015b --- /dev/null +++ b/package-lock.json @@ -0,0 +1,100 @@ +{ + "name": "@certifieddata/reference-impl", + "version": "0.1.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "@certifieddata/reference-impl", + "version": "0.1.0", + "license": "MIT", + "dependencies": { + "@certifieddata/verify": "file:../verify", + "@hono/node-server": "^1.13.0", + "hono": "^4.6.0" + }, + "devDependencies": { + "@types/node": "^22.10.0", + "typescript": "^5.6.0" + }, + "engines": { + "node": ">=20" + } + }, + "../verify": { + "name": "@certifieddata/verify", + "version": "0.1.0", + "license": "MIT", + "bin": { + "cd-verify": "dist/cli.js", + "certifieddata-verify": "dist/cli.js" + }, + "devDependencies": { + "@eslint/js": "^9.15.0", + "@types/node": "^22.10.0", + "eslint": "^9.15.0", + "typescript": "^5.6.0", + "typescript-eslint": "^8.15.0" + }, + "engines": { + "node": ">=20" + } + }, + "node_modules/@certifieddata/verify": { + "resolved": "../verify", + "link": true + }, + "node_modules/@hono/node-server": { + "version": "1.19.14", + "resolved": "https://registry.npmjs.org/@hono/node-server/-/node-server-1.19.14.tgz", + "integrity": "sha512-GwtvgtXxnWsucXvbQXkRgqksiH2Qed37H9xHZocE5sA3N8O8O8/8FA3uclQXxXVzc9XBZuEOMK7+r02FmSpHtw==", + "license": "MIT", + "engines": { + "node": ">=18.14.1" + }, + "peerDependencies": { + "hono": "^4" + } + }, + "node_modules/@types/node": { + "version": "22.19.17", + "resolved": "https://registry.npmjs.org/@types/node/-/node-22.19.17.tgz", + "integrity": "sha512-wGdMcf+vPYM6jikpS/qhg6WiqSV/OhG+jeeHT/KlVqxYfD40iYJf9/AE1uQxVWFvU7MipKRkRv8NSHiCGgPr8Q==", + "dev": true, + "license": "MIT", + "dependencies": { + "undici-types": "~6.21.0" + } + }, + "node_modules/hono": { + "version": "4.12.18", + "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.18.tgz", + "integrity": "sha512-RWzP96k/yv0PQfyXnWjs6zot20TqfpfsNXhOnev8d1InAxubW93L11/oNUc3tQqn2G0bSdAOBpX+2uDFHV7kdQ==", + "license": "MIT", + "engines": { + "node": ">=16.9.0" + } + }, + "node_modules/typescript": { + "version": "5.9.3", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz", + "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=14.17" + } + }, + "node_modules/undici-types": { + "version": "6.21.0", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz", + "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==", + "dev": true, + "license": "MIT" + } + } +} diff --git a/package.json b/package.json new file mode 100644 index 0000000..32c9f1b --- /dev/null +++ b/package.json @@ -0,0 +1,37 @@ +{ + "name": "@certifieddata/reference-impl", + "version": "0.1.0", + "private": true, + "description": "Reference implementation: a high-risk AI system logging EU AI Act Article 12 evidence with CertifiedData.io's Decision Ledger.", + "type": "module", + "main": "dist/app.js", + "engines": { + "node": ">=20" + }, + "license": "MIT", + "author": "CertifiedData.io", + "repository": { + "type": "git", + "url": "git+https://github.com/certifieddata/reference-impl.git" + }, + "homepage": "https://github.com/certifieddata/reference-impl#readme", + "scripts": { + "build": "tsc -p tsconfig.json", + "typecheck": "tsc --noEmit", + "fixtures": "node fixtures/generate.mjs", + "test": "tsc -p tsconfig.json && node --test dist/smoke.test.js", + "start": "node dist/server.js", + "example:01": "tsc -p tsconfig.json && node dist/examples/01-log-decision.js", + "example:02": "tsc -p tsconfig.json && node dist/examples/02-verify-training-data.js", + "example:03": "tsc -p tsconfig.json && node dist/examples/03-export-evidence.js" + }, + "dependencies": { + "@certifieddata/verify": "file:../verify", + "hono": "^4.6.0", + "@hono/node-server": "^1.13.0" + }, + "devDependencies": { + "@types/node": "^22.10.0", + "typescript": "^5.6.0" + } +} diff --git a/src/app.ts b/src/app.ts new file mode 100644 index 0000000..1f76206 --- /dev/null +++ b/src/app.ts @@ -0,0 +1,51 @@ +// EU AI Act Article 12 reference application. +// +// This service refuses to boot unless its training-data certificate verifies, +// and appends an Article 12 event to the Decision Ledger for every decision. +// See ARTICLE_12_MAPPING.md for the field-by-field map of Article 12 → ledger event. + +import { Hono } from "hono"; +import { randomUUID } from "node:crypto"; +import { verifyCertificate, loadKeys, fetchCert } from "@certifieddata/verify"; +import { article12Event } from "./article12.js"; +import { makeLedger } from "./ledger.js"; + +export interface AppOptions { + trainingCert: string; + keys: string; + ledgerUrl?: string; + modelVersion?: string; +} + +export async function createApp(opts: AppOptions): Promise<{ app: Hono; certId: string }> { + const cert = await fetchCert(opts.trainingCert, { offline: true }); + const keys = await loadKeys({ keysFile: opts.keys, offline: true }); + const verdict = await verifyCertificate(cert, keys); + if (verdict.verdict !== "VALID") throw new Error(`training data not verified: ${verdict.reason}`); + + const ledger = makeLedger({ url: opts.ledgerUrl }); + const modelVersion = opts.modelVersion ?? "credit-v3.2.1"; + const app = new Hono(); + + app.post("/decide", async (c) => { + const input = await c.req.json<{ income?: number; debt?: number }>(); + const score = scoreCredit(input); + const decision = { decision_id: randomUUID(), approved: score > 0.6, score }; + await ledger.append(article12Event({ + decision_id: decision.decision_id, + training_cert_id: cert.certification_id, + model_version: modelVersion, + input, output: { approved: decision.approved, score }, + timestamp: new Date().toISOString(), + })); + return c.json(decision); + }); + + app.get("/evidence/:id", async (c) => c.json(await ledger.evidenceBundle(c.req.param("id")))); + + return { app, certId: cert.certification_id }; +} + +function scoreCredit(input: { income?: number; debt?: number }): number { + return Math.max(0, Math.min(1, ((input.income ?? 0) - (input.debt ?? 0)) / 100000)); +} diff --git a/src/article12.ts b/src/article12.ts new file mode 100644 index 0000000..f4c41f4 --- /dev/null +++ b/src/article12.ts @@ -0,0 +1,50 @@ +// EU AI Act Article 12 event shape. +// +// Article 12 ("Record-keeping") requires high-risk AI systems to log events +// "over the lifetime of the system" (Art. 12(1)) covering at minimum the items +// in Article 12(2)(a–d). This module maps each required item onto a concrete +// JSON field that we append to the Decision Ledger. +// +// This is a reference implementation. The deployer's risk management system +// (Article 9) determines what additional fields are required for a given +// high-risk use case. See ARTICLE_12_MAPPING.md for the full field-by-field map. + +import { createHash } from "node:crypto"; + +export interface Article12Input { + decision_id: string; + training_cert_id: string; // Art. 12(2)(c) — reference database + model_version: string; + input: unknown; // hashed; raw input is never written to the ledger + output: unknown; + timestamp: string; // Art. 12(2)(b) — period of use + reviewer_id?: string; // Art. 12(2)(d) — natural person involved in verification +} + +export interface Article12Event { + schema_version: "article12.v1"; + decision_id: string; + timestamp: string; + training_cert_id: string; + model_version: string; + input_hash: string; // sha256(JSON(input)) — Art. 12(2)(d) + output: unknown; + reviewer_id: string | null; +} + +export function article12Event(input: Article12Input): Article12Event { + const inputHash = createHash("sha256") + .update(JSON.stringify(input.input)) + .digest("hex"); + + return { + schema_version: "article12.v1", + decision_id: input.decision_id, + timestamp: input.timestamp, + training_cert_id: input.training_cert_id, + model_version: input.model_version, + input_hash: `sha256:${inputHash}`, + output: input.output, + reviewer_id: input.reviewer_id ?? null, + }; +} diff --git a/src/examples/01-log-decision.ts b/src/examples/01-log-decision.ts new file mode 100644 index 0000000..747a1ab --- /dev/null +++ b/src/examples/01-log-decision.ts @@ -0,0 +1,20 @@ +// Minimum: log one Article 12 event into a fresh in-memory ledger. +// Run: npm run example:01 + +import { article12Event } from "../article12.js"; +import { MemoryLedger } from "../ledger.js"; +import { randomUUID } from "node:crypto"; + +const ledger = new MemoryLedger(); + +const entry = await ledger.append(article12Event({ + decision_id: randomUUID(), + training_cert_id: "ce_2026_credit_model_v3", + model_version: "credit-v3.2.1", + input: { income: 80000, debt: 20000 }, + output: { approved: true, score: 0.6 }, + timestamp: new Date().toISOString(), + reviewer_id: "ops-12", +})); + +process.stdout.write(JSON.stringify(entry, null, 2) + "\n"); diff --git a/src/examples/02-verify-training-data.ts b/src/examples/02-verify-training-data.ts new file mode 100644 index 0000000..c547f60 --- /dev/null +++ b/src/examples/02-verify-training-data.ts @@ -0,0 +1,16 @@ +// Verify the training-data certificate at startup, exactly as the app does. +// Run: npm run example:02 + +import { fetchCert, loadKeys, verifyCertificate } from "@certifieddata/verify"; +import { fileURLToPath } from "node:url"; +import { dirname, join } from "node:path"; + +const here = dirname(fileURLToPath(import.meta.url)); +const fixturesDir = join(here, "..", "..", "fixtures"); + +const cert = await fetchCert(join(fixturesDir, "training-cert.json"), { offline: true }); +const keys = await loadKeys({ keysFile: join(fixturesDir, "keys.json"), offline: true }); +const result = await verifyCertificate(cert, keys, join(fixturesDir, "training-data.csv")); + +process.stdout.write(JSON.stringify(result, null, 2) + "\n"); +process.exit(result.verdict === "VALID" ? 0 : 1); diff --git a/src/examples/03-export-evidence.ts b/src/examples/03-export-evidence.ts new file mode 100644 index 0000000..2082256 --- /dev/null +++ b/src/examples/03-export-evidence.ts @@ -0,0 +1,38 @@ +// Replay the sample decisions log into a fresh ledger and dump a regulator-ready +// evidence bundle for the last decision. Run: npm run example:03 + +import { readFileSync } from "node:fs"; +import { fileURLToPath } from "node:url"; +import { dirname, join } from "node:path"; +import { randomUUID } from "node:crypto"; +import { MemoryLedger, verifyChain } from "../ledger.js"; +import { article12Event } from "../article12.js"; + +const here = dirname(fileURLToPath(import.meta.url)); +const fixturesDir = join(here, "..", "..", "fixtures"); + +const decisions = readFileSync(join(fixturesDir, "decisions.jsonl"), "utf8") + .trim() + .split("\n") + .map((line) => JSON.parse(line) as { decision_id: string; input: unknown; output: unknown }); + +const ledger = new MemoryLedger(); +for (const d of decisions) { + await ledger.append(article12Event({ + decision_id: d.decision_id, + training_cert_id: "ce_2026_credit_model_v3", + model_version: "credit-v3.2.1", + input: d.input, + output: d.output, + timestamp: new Date().toISOString(), + reviewer_id: randomUUID(), + })); +} + +const last = decisions[decisions.length - 1]; +const bundle = await ledger.evidenceBundle(last.decision_id); + +process.stdout.write(JSON.stringify({ + bundle, + re_verified_locally: verifyChain(bundle.entries), +}, null, 2) + "\n"); diff --git a/src/ledger.ts b/src/ledger.ts new file mode 100644 index 0000000..cf6003c --- /dev/null +++ b/src/ledger.ts @@ -0,0 +1,108 @@ +// Minimal Decision Ledger client. +// +// A Decision Ledger is an append-only, hash-chained log: each event is anchored +// to its predecessor via `prev_hash`, and `this_hash` covers the canonical bytes +// of the event plus that prev_hash. Any reordering, mutation, or insertion +// breaks the chain and is detectable in O(n) by re-walking it. +// +// Ships with a MemoryLedger (for the demo) and an HttpLedger (for production). +// Both expose the same surface so the example app doesn't care which is wired up. + +import { createHash } from "node:crypto"; +import type { Article12Event } from "./article12.js"; + +export interface LedgerEntry { + event_id: string; + sequence: number; + prev_hash: string; + this_hash: string; + event: Article12Event; +} + +export interface EvidenceBundle { + decision_id: string; + entries: LedgerEntry[]; + chain_verified: boolean; +} + +export interface Ledger { + append(event: Article12Event): Promise; + evidenceBundle(decisionId: string): Promise; +} + +const GENESIS_PREV_HASH = "sha256:" + "0".repeat(64); + +function canonicalEntryBytes(event: Article12Event, prevHash: string, sequence: number): Buffer { + // We sign the event payload alongside its position and predecessor — sequence and + // prev_hash are part of the chain commitment, not optional metadata. + return Buffer.from(JSON.stringify({ event, prev_hash: prevHash, sequence }), "utf8"); +} + +function hashEntry(event: Article12Event, prevHash: string, sequence: number): string { + const hex = createHash("sha256").update(canonicalEntryBytes(event, prevHash, sequence)).digest("hex"); + return `sha256:${hex}`; +} + +export function verifyChain(entries: LedgerEntry[]): boolean { + let expectedPrev = GENESIS_PREV_HASH; + let expectedSeq = 0; + for (const e of entries) { + if (e.prev_hash !== expectedPrev) return false; + if (e.sequence !== expectedSeq) return false; + if (e.this_hash !== hashEntry(e.event, e.prev_hash, e.sequence)) return false; + expectedPrev = e.this_hash; + expectedSeq++; + } + return true; +} + +export class MemoryLedger implements Ledger { + private entries: LedgerEntry[] = []; + + async append(event: Article12Event): Promise { + const prev = this.entries.length === 0 ? GENESIS_PREV_HASH : this.entries[this.entries.length - 1].this_hash; + const sequence = this.entries.length; + const this_hash = hashEntry(event, prev, sequence); + const entry: LedgerEntry = { + event_id: `le_${sequence.toString().padStart(8, "0")}`, + sequence, + prev_hash: prev, + this_hash, + event, + }; + this.entries.push(entry); + return entry; + } + + async evidenceBundle(decisionId: string): Promise { + const idx = this.entries.findIndex((e) => e.event.decision_id === decisionId); + if (idx < 0) return { decision_id: decisionId, entries: [], chain_verified: false }; + const slice = this.entries.slice(0, idx + 1); + return { decision_id: decisionId, entries: slice, chain_verified: verifyChain(slice) }; + } +} + +export class HttpLedger implements Ledger { + constructor(private readonly url: string) {} + + async append(event: Article12Event): Promise { + const res = await fetch(`${this.url.replace(/\/$/, "")}/append`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ event }), + }); + if (!res.ok) throw new Error(`ledger append failed: HTTP ${res.status}`); + return res.json() as Promise; + } + + async evidenceBundle(decisionId: string): Promise { + const res = await fetch(`${this.url.replace(/\/$/, "")}/evidence/${encodeURIComponent(decisionId)}`); + if (!res.ok) throw new Error(`ledger evidence fetch failed: HTTP ${res.status}`); + const bundle = (await res.json()) as EvidenceBundle; + return { ...bundle, chain_verified: verifyChain(bundle.entries) }; + } +} + +export function makeLedger(opts: { url?: string } = {}): Ledger { + return opts.url ? new HttpLedger(opts.url) : new MemoryLedger(); +} diff --git a/src/server.ts b/src/server.ts new file mode 100644 index 0000000..5b6d640 --- /dev/null +++ b/src/server.ts @@ -0,0 +1,17 @@ +// HTTP entry point. Boots the app and listens. +// `npm start` → reads TRAINING_CERT, TRAINING_KEYS, LEDGER_URL from env. + +import { serve } from "@hono/node-server"; +import { createApp } from "./app.js"; + +const port = Number(process.env.PORT ?? 3000); +const { app, certId } = await createApp({ + trainingCert: process.env.TRAINING_CERT ?? "fixtures/training-cert.json", + keys: process.env.TRAINING_KEYS ?? "fixtures/keys.json", + ledgerUrl: process.env.LEDGER_URL, + modelVersion: process.env.MODEL_VERSION, +}); + +serve({ fetch: app.fetch, port }, (info) => { + process.stdout.write(`reference-impl listening on :${info.port} — training_cert ${certId}\n`); +}); diff --git a/src/smoke.test.ts b/src/smoke.test.ts new file mode 100644 index 0000000..3c76aed --- /dev/null +++ b/src/smoke.test.ts @@ -0,0 +1,85 @@ +import { test } from "node:test"; +import assert from "node:assert/strict"; +import { fileURLToPath } from "node:url"; +import { dirname, join } from "node:path"; +import { createApp } from "./app.js"; +import { verifyChain } from "./ledger.js"; + +const here = dirname(fileURLToPath(import.meta.url)); +const fixturesDir = join(here, "..", "fixtures"); + +test("app boots when training cert verifies, refuses to boot otherwise", async () => { + // Happy path + const { app } = await createApp({ + trainingCert: join(fixturesDir, "training-cert.json"), + keys: join(fixturesDir, "keys.json"), + }); + assert.ok(app, "expected app instance"); + + // Sad path — using the wrong keys document means the cert key_id is unknown. + const otherKeys = JSON.stringify({ issuer: "CertifiedData.io", keys: [] }); + const { writeFileSync, mkdtempSync } = await import("node:fs"); + const { tmpdir } = await import("node:os"); + const tmp = mkdtempSync(join(tmpdir(), "cdri-")); + const badKeysPath = join(tmp, "empty-keys.json"); + writeFileSync(badKeysPath, otherKeys); + + await assert.rejects( + () => createApp({ trainingCert: join(fixturesDir, "training-cert.json"), keys: badKeysPath }), + /training data not verified/, + ); +}); + +test("/decide logs an Article 12 event and /evidence/:id returns a verifiable chain", async () => { + const { app } = await createApp({ + trainingCert: join(fixturesDir, "training-cert.json"), + keys: join(fixturesDir, "keys.json"), + }); + + const decideRes = await app.fetch(new Request("http://x/decide", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ income: 80000, debt: 20000 }), + })); + assert.equal(decideRes.status, 200); + const decision = await decideRes.json() as { decision_id: string; approved: boolean; score: number }; + assert.equal(typeof decision.decision_id, "string"); + assert.equal(typeof decision.approved, "boolean"); + + const evidenceRes = await app.fetch(new Request(`http://x/evidence/${decision.decision_id}`)); + assert.equal(evidenceRes.status, 200); + const bundle = await evidenceRes.json() as { + decision_id: string; + entries: Array<{ event: { decision_id: string; training_cert_id: string }; this_hash: string }>; + chain_verified: boolean; + }; + assert.equal(bundle.decision_id, decision.decision_id); + assert.equal(bundle.chain_verified, true); + assert.ok(bundle.entries.length >= 1, "expected at least one ledger entry"); + assert.equal(bundle.entries[bundle.entries.length - 1].event.decision_id, decision.decision_id); + assert.equal(bundle.entries[0].event.training_cert_id, "ce_2026_credit_model_v3"); + // Re-verify the chain ourselves with the public helper, independent of the app. + // eslint-disable-next-line @typescript-eslint/no-explicit-any + assert.equal(verifyChain(bundle.entries as any), true); +}); + +test("multiple decisions chain forward (sequence/prev_hash agree)", async () => { + const { app } = await createApp({ + trainingCert: join(fixturesDir, "training-cert.json"), + keys: join(fixturesDir, "keys.json"), + }); + + const ids: string[] = []; + for (const body of [{ income: 50000, debt: 10000 }, { income: 20000, debt: 30000 }, { income: 90000, debt: 5000 }]) { + const r = await app.fetch(new Request("http://x/decide", { + method: "POST", headers: { "content-type": "application/json" }, body: JSON.stringify(body), + })); + ids.push(((await r.json()) as { decision_id: string }).decision_id); + } + + const last = await (await app.fetch(new Request(`http://x/evidence/${ids[ids.length - 1]}`))).json() as { + entries: unknown[]; chain_verified: boolean; + }; + assert.equal(last.chain_verified, true); + assert.ok(last.entries.length >= 3); +}); diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000..ddb6faa --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,22 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "ES2022", + "moduleResolution": "Bundler", + "lib": ["ES2022"], + "outDir": "dist", + "rootDir": "src", + "strict": true, + "noImplicitAny": true, + "noUnusedLocals": true, + "noUnusedParameters": true, + "esModuleInterop": true, + "forceConsistentCasingInFileNames": true, + "skipLibCheck": true, + "declaration": false, + "sourceMap": true, + "resolveJsonModule": true + }, + "include": ["src/**/*.ts"], + "exclude": ["node_modules", "dist", "fixtures"] +} From 44a66f0d77a384ed0e4223e1b7cf3f9a34db9bb2 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 6 May 2026 15:50:41 +0000 Subject: [PATCH 2/3] Fix CI: check out matching verify branch instead of empty main The workflow checked out certifieddata/verify at the default ref (main), which only contains LICENSE + README until the verify PR merges. `npm run build` then fails because there is no source. Use github.head_ref so PR builds pick up the matching feature branch on the verify repo, falling back to main for post-merge push builds. --- .github/workflows/ci.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c81f4e5..1a730ce 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -29,6 +29,9 @@ jobs: - uses: actions/checkout@v4 with: repository: certifieddata/verify + # Match the branch name on the verify repo for PR builds; fall back + # to main on push-to-main builds (post-merge state). + ref: ${{ github.head_ref || 'main' }} path: verify - uses: actions/setup-node@v4 with: From 50f4947033c36211c1d0f9778f08c9c17373761e Mon Sep 17 00:00:00 2001 From: wawri Date: Wed, 6 May 2026 10:59:36 -0600 Subject: [PATCH 3/3] docs(readme): add CI + license + 'use this template' badges MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'Use this template' badge links to the GitHub generate-from-template flow, which is the recommended path per the handoff doc README structure (item 7: 'gh repo create --template' instruction). It pairs with the existing copy-pasteable hero quickstart so a visitor can either run the demo locally OR fork the repo as their own scaffold in two clicks. Note: there's no npm badge here — reference-impl is intended to be cloned or used as a template, not installed from npm. Co-Authored-By: Claude Opus 4.7 (1M context) --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 371b6aa..c84d7e4 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,9 @@ # reference-impl +[![CI](https://github.com/certifieddata/reference-impl/actions/workflows/ci.yml/badge.svg)](https://github.com/certifieddata/reference-impl/actions/workflows/ci.yml) +[![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE) +[![Use this template](https://img.shields.io/badge/use%20this-template-181717?logo=github)](https://github.com/certifieddata/reference-impl/generate) + > Reference implementation: a high-risk AI system logging EU AI Act Article 12 evidence with CertifiedData.io's Decision Ledger. A ~30-line credit-scoring service that: