From 78fedceab2289797fc0b7af2069fa43ec8bad49a Mon Sep 17 00:00:00 2001 From: "Jonathan D.A. Jewell" <6759885+hyperpolymath@users.noreply.github.com> Date: Wed, 13 May 2026 02:31:01 +0200 Subject: [PATCH 1/7] docs(adr): record octad/verification/justfile decisions, strip empty trees MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds three ADRs for verisimiser and one for verisimdb-data, plus the mechanical file deletions they authorise. - ADR-0001 (octad-ontology): concerns octad is canonical; modalities become Tier 2 overlays. Closes #19; sets up #20; closes #21 wontfix. - ADR-0002 (verification-tree): strip the empty 8-subdirectory tree; Idris2 stubs in src/interface/abi/ are unaffected. Closes #15. - ADR-0003 (justfile-aspirational-recipes): delete recipes that name non-existent clap subcommands. Closes #11 (#10 is the mechanical follow-up). - ADR-0001 (verisimdb-data, repo-purpose): repo carries two explicit purposes (scan store + ABI dogfood). Lands in the data repo commit. Deletes: - examples/SafeDOMExample.res, examples/web-project-deno.json (unrelated template flotsam — closes #12) - root SECURITY.md, root CODE_OF_CONDUCT.md (duplicate; .github/ versions are canonical — closes #13, #14) - verification/ subtree (closes #15 via ADR-0002) Closes #11, #12, #13, #14, #15, #19, #21 Co-Authored-By: Claude Opus 4.7 --- CODE_OF_CONDUCT.md | 27 ----- SECURITY.md | 21 ---- docs/decisions/ADR-0001-octad-ontology.adoc | 102 ++++++++++++++++ .../decisions/ADR-0002-verification-tree.adoc | 66 +++++++++++ ...DR-0003-justfile-aspirational-recipes.adoc | 52 +++++++++ examples/SafeDOMExample.res | 109 ------------------ examples/web-project-deno.json | 20 ---- verification/0.1-AI-MANIFEST.a2ml | 27 ----- verification/README.adoc | 1 - verification/benchmarks/0.2-AI-MANIFEST.a2ml | 11 -- verification/benchmarks/README.adoc | 1 - verification/coverage/0.2-AI-MANIFEST.a2ml | 12 -- verification/coverage/README.adoc | 1 - verification/fuzzing/0.2-AI-MANIFEST.a2ml | 11 -- verification/fuzzing/README.adoc | 1 - verification/proofs/0.2-AI-MANIFEST.a2ml | 11 -- verification/proofs/README.adoc | 1 - verification/safety_case/0.2-AI-MANIFEST.a2ml | 12 -- verification/safety_case/README.adoc | 1 - verification/simulations/0.2-AI-MANIFEST.a2ml | 11 -- verification/simulations/README.adoc | 1 - verification/tests/0.2-AI-MANIFEST.a2ml | 1 - verification/tests/README.adoc | 1 - .../traceability/0.2-AI-MANIFEST.a2ml | 12 -- verification/traceability/README.adoc | 1 - 25 files changed, 220 insertions(+), 294 deletions(-) delete mode 100644 CODE_OF_CONDUCT.md delete mode 100644 SECURITY.md create mode 100644 docs/decisions/ADR-0001-octad-ontology.adoc create mode 100644 docs/decisions/ADR-0002-verification-tree.adoc create mode 100644 docs/decisions/ADR-0003-justfile-aspirational-recipes.adoc delete mode 100644 examples/SafeDOMExample.res delete mode 100644 examples/web-project-deno.json delete mode 100644 verification/0.1-AI-MANIFEST.a2ml delete mode 100644 verification/README.adoc delete mode 100644 verification/benchmarks/0.2-AI-MANIFEST.a2ml delete mode 100644 verification/benchmarks/README.adoc delete mode 100644 verification/coverage/0.2-AI-MANIFEST.a2ml delete mode 100644 verification/coverage/README.adoc delete mode 100644 verification/fuzzing/0.2-AI-MANIFEST.a2ml delete mode 100644 verification/fuzzing/README.adoc delete mode 100644 verification/proofs/0.2-AI-MANIFEST.a2ml delete mode 100644 verification/proofs/README.adoc delete mode 100644 verification/safety_case/0.2-AI-MANIFEST.a2ml delete mode 100644 verification/safety_case/README.adoc delete mode 100644 verification/simulations/0.2-AI-MANIFEST.a2ml delete mode 100644 verification/simulations/README.adoc delete mode 100644 verification/tests/0.2-AI-MANIFEST.a2ml delete mode 100644 verification/tests/README.adoc delete mode 100644 verification/traceability/0.2-AI-MANIFEST.a2ml delete mode 100644 verification/traceability/README.adoc diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md deleted file mode 100644 index c32021a..0000000 --- a/CODE_OF_CONDUCT.md +++ /dev/null @@ -1,27 +0,0 @@ - -# Contributor Covenant Code of Conduct - -## Our Pledge - -We pledge to make participation a harassment-free experience for everyone. - -## Our Standards - -**Positive behavior:** -* Using welcoming language -* Being respectful of differing viewpoints -* Accepting constructive criticism -* Focusing on what is best for the community - -**Unacceptable behavior:** -* Harassment, trolling, or personal attacks -* Publishing private information without permission - -## Enforcement - -Report issues to the maintainers. All complaints will be reviewed. - -## Attribution - -Adapted from [Contributor Covenant](https://www.contributor-covenant.org/) v2.1. - diff --git a/SECURITY.md b/SECURITY.md deleted file mode 100644 index ca3ebc8..0000000 --- a/SECURITY.md +++ /dev/null @@ -1,21 +0,0 @@ -# Security Policy - -## Supported Versions - -| Version | Supported | -|---------|-----------| -| 0.1.x | ✅ | - -## Reporting a Vulnerability - -Please report security vulnerabilities to: j.d.a.jewell@open.ac.uk - -Do NOT open a public issue for security vulnerabilities. - -## Response Time - -We aim to respond within 48 hours and provide a fix within 7 days for critical issues. - -## Scope - -This policy covers the verisimiser CLI tool and its generated artifacts. diff --git a/docs/decisions/ADR-0001-octad-ontology.adoc b/docs/decisions/ADR-0001-octad-ontology.adoc new file mode 100644 index 0000000..4d29be0 --- /dev/null +++ b/docs/decisions/ADR-0001-octad-ontology.adoc @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later +// Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) += ADR-0001: Canonical octad ontology — concerns, not modalities +:revdate: 2026-05-13 +:status: Accepted + +== Status + +Accepted — 2026-05-13. + +Resolves: https://github.com/hyperpolymath/verisimiser/issues/19[V-L1-A1]. + +Closes as wontfix: https://github.com/hyperpolymath/verisimiser/issues/21[V-L1-A3] (the modalities-first refactor). + +Unblocks: https://github.com/hyperpolymath/verisimiser/issues/20[V-L1-A2] (the README rewrite). + +== Context + +Two competing ontologies have lived in this repository under the same name +("octad"): + +Modalities octad (README §"VeriSimDB's Octad: Eight Modalities"):: +Graph · Vector · Tensor · Semantic · Document · Temporal · Provenance · Spatial. +These are *representations* of an entity — how the same data is stored in +different shapes. The README's eight cross-modal drift categories +(structural, semantic, temporal, statistical, referential, provenance, +spatial, embedding) presuppose this ontology. + +Concerns octad (`src/abi/mod.rs::OctadDimension`, `src/manifest/mod.rs::OctadConfig`, `src/main.rs::print_octad`):: +Data · Metadata · Provenance · Lineage · Constraints · AccessControl · +Temporal · Simulation. These are *concerns/aspects* of data — what you +want to know or enforce about it. + +The code commits to the concerns octad; the README leads with the modalities +octad. A user cannot answer "what does an octad-augmented entity look like?" +without picking one. + +== Decision + +The *concerns* octad is canonical. + +The eight dimensions of the verisimiser octad are: + +. **Data** — the original entity as stored in the target database. +. **Metadata** — schema and type information. +. **Provenance** — SHA-256 hash-chain tracking of who did what and when. +. **Lineage** — directed-edge graph of data derivation (target nameschematically a DAG; see ADR-0004 when written). +. **Constraints** — cross-dimensional invariant enforcement, including + drift detection between Data + Metadata + active overlays. +. **AccessControl** — policy-based row/column-level access permissions. +. **Temporal** — version history with point-in-time queries and rollback. +. **Simulation** — what-if branching and sandbox query execution. + +Modalities (Graph, Vector, Tensor, Semantic, Document, Spatial) are +*Tier 2 overlays* — independent representational projections that a user +can enable per-entity for similarity search, full-text search, geospatial +indexing, etc. They are not "the octad" and not co-equal with the eight +concerns. Provenance and Temporal in the modalities list collapse onto +the same-named concerns. + +The eight "cross-modal drift categories" become *symptoms observed by +the Constraints concern* when Data, Metadata, and the active overlays +disagree: + +[cols="1,2"] +|=== +| Drift category (legacy framing) | Where it lives in the concerns ontology + +| Structural | Constraints (Data vs Metadata schema agreement) +| Semantic | Constraints across overlays +| Temporal | Constraints between Temporal versions across overlays +| Statistical | Constraints over Tier 2 vector/tensor overlay drift +| Referential | Constraints between Tier 2 graph overlay and Data +| Provenance | Constraints over Provenance chain integrity +| Spatial | Constraints over Tier 2 spatial overlay +| Embedding | Constraints between Tier 2 vector overlay and source documents +|=== + +== Consequences + +. README and ROADMAP must be rewritten to drop the modalities octad table + and reframe the drift categories under Constraints. Tracked as V-L1-A2. +. The modalities-first refactor (V-L1-A3) is *not* done. Closed as wontfix. +. Tier 2 design (V-L1-F, V-L1-G, V-L1-H) continues to use modality terms + for overlays — they are just no longer presented as "the octad." +. `OctadDimension` enum and `OctadConfig` fields are stable; no source-level + rename triggered by this ADR. + +== Alternatives considered + +Modalities octad as canonical:: +Rejected. Would have required rewriting `abi`, `manifest`, `codegen`, tests, +and example manifests — a multi-week change for pre-alpha framing. The +codebase had already converged on the concerns ontology; the only cost of +keeping it is doc updates. + +"Both, with one as primary":: +Considered but rejected. Two ontologies sharing a name is the bug; +ranking them doesn't fix it. + +Renaming "octad" to something else (e.g. "octave"):: +Out of scope here. The brand survives; the contents are defined. diff --git a/docs/decisions/ADR-0002-verification-tree.adoc b/docs/decisions/ADR-0002-verification-tree.adoc new file mode 100644 index 0000000..f9bce7d --- /dev/null +++ b/docs/decisions/ADR-0002-verification-tree.adoc @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later +// Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) += ADR-0002: Strip the empty verification/ subtree +:revdate: 2026-05-13 +:status: Accepted + +== Status + +Accepted — 2026-05-13. + +Resolves: https://github.com/hyperpolymath/verisimiser/issues/15[V-L3-D1]. + +== Context + +The `verification/` tree contained eight subdirectories +(`benchmarks/`, `coverage/`, `fuzzing/`, `proofs/`, `safety_case/`, +`simulations/`, `tests/`, `traceability/`), each with a ~20-byte +`README.adoc` and an a2ml manifest. Zero proofs, zero benchmarks, +zero fuzzing, zero safety case. + +(The actual Idris2 ABI declarations and Zig FFI implementation live in +`src/interface/abi/{Types,Layout,Foreign}.idr` and `src/interface/ffi/` +respectively — those are real Phase 0 stubs and are unaffected by this +ADR. The README's reference to "Idris2 proofs (in `src/interface/abi/`)" +already points at the correct location for the stubs; only the proofs +themselves are still future work.) + +A reader following the README's verification/-shaped tree was misled +into thinking benchmarks, fuzzing, safety cases, and traceability +artifacts existed for this product. + +== Decision + +Strip the empty `verification/` subtree from the repository. + +Reintroduce `verification/` only when there is something to put in it. +The first content is expected to be a property-test harness landing +alongside V-L1-C1 (the Tier 1 SQLite piggyback) — at that point a +single `verification/property-tests/` directory will be added with +real content. Don't predeclare the rest of the eight subdirs until +they have artifacts. + +== Consequences + +. The empty `verification/` subtree is deleted in the same change set + that lands this ADR. +. The README's "Status" / "Architecture" copy that referenced + `verification/` is reworded to point at `src/interface/abi/` only + (where the Idris2 stubs actually live). +. Future verification artifacts (proofs, benchmarks, fuzz targets) are + added directory-by-directory as they appear, not predeclared. The + first such addition is expected alongside V-L1-C1 (Tier 1 SQLite + piggyback) as a property-test harness. + +== Alternatives considered + +Populate the subtree:: +Rejected for now. Writing eight stub READMEs costs effort and doesn't +move the product forward. The contracts the README cited (drift +correctness, chain integrity, version ordering, sidecar isolation) +become provable only once the Tier 1 implementation exists, which is +V-L1-C1. + +Leave the subtree, soften the README:: +Rejected. Empty scaffolding misleads contributors who follow the tree +looking for examples to extend. diff --git a/docs/decisions/ADR-0003-justfile-aspirational-recipes.adoc b/docs/decisions/ADR-0003-justfile-aspirational-recipes.adoc new file mode 100644 index 0000000..281eb87 --- /dev/null +++ b/docs/decisions/ADR-0003-justfile-aspirational-recipes.adoc @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later +// Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) += ADR-0003: Remove aspirational Justfile recipes +:revdate: 2026-05-13 +:status: Accepted + +== Status + +Accepted — 2026-05-13. + +Resolves: https://github.com/hyperpolymath/verisimiser/issues/11[V-L3-C2]. +Unblocks: https://github.com/hyperpolymath/verisimiser/issues/10[V-L3-C1] (the +mechanical fix to the broken recipe block). + +== Context + +`Justfile` carried a recipe block whose source contained literal `\n` +characters where newlines were intended. The intent was three recipes +(`augment`, `check-octad`, `migrate`) calling clap subcommands of the +same names. The clap CLI in `src/main.rs` exposes +`init / generate / start / drift / provenance / history / status / octad` +— there are no `augment`, `check-octad`, or `migrate` subcommands. + +So the Justfile advertised commands that don't exist, and did so in a +syntactically broken way that made the breakage invisible to `just --list`. + +== Decision + +Delete the three aspirational recipes from `Justfile`. + +The Justfile should only reference subcommands that exist. When new +subcommands are added (per their own design issues), recipe wrappers +can be added at the same time. + +== Consequences + +. The broken `\n`-collapsed block is removed cleanly (V-L3-C1). +. `just --list` no longer shows phantom recipes. +. Future "convenience wrapper" recipes must be added in the same change + set as the underlying subcommand. + +== Alternatives considered + +Add the missing subcommands:: +Out of scope for V-L3-C2/C1, which are ground-clearing issues. The +semantics of `augment` / `check-octad` / `migrate` aren't pinned down — +e.g. `augment DB_URL` overlaps with `generate` (overlay DDL) plus `start` +(daemon). Designing them is a separate issue, not a one-line fix. + +Keep the recipes pointing at not-yet-implemented subcommands:: +Rejected. The recipes would still fail with "Unknown subcommand"; the +broken UX would just be in a different place. diff --git a/examples/SafeDOMExample.res b/examples/SafeDOMExample.res deleted file mode 100644 index 2c1b5b3..0000000 --- a/examples/SafeDOMExample.res +++ /dev/null @@ -1,109 +0,0 @@ -// SPDX-License-Identifier: PMPL-1.0-or-later -// Example: Using SafeDOM for formally verified DOM mounting - -open SafeDOM - -// Example 1: Basic mounting with error handling -let mountApp = () => { - mountSafe( - "#app", - "

Hello, World!

Mounted safely with proofs.

", - ~onSuccess=el => { - Console.log("✓ App mounted successfully!") - Console.log("Element:", el) - }, - ~onError=err => { - Console.error("✗ Mount failed:", err) - } - ) -} - -// Example 2: Wait for DOM ready before mounting -let mountWhenDOMReady = () => { - mountWhenReady( - "#app", - "

App Title

", - ~onSuccess=_ => Console.log("✓ Mounted after DOM ready"), - ~onError=err => Console.error("✗ Failed:", err) - ) -} - -// Example 3: Batch mounting (atomic - all or nothing) -let mountMultiple = () => { - let specs = [ - {selector: "#header", html: "

Site Title

"}, - {selector: "#nav", html: ""}, - {selector: "#main", html: "

Content here

"}, - {selector: "#footer", html: "
© 2026
"} - ] - - switch mountBatch(specs) { - | Ok(elements) => { - Console.log(`✓ Successfully mounted ${Array.length(elements)} elements`) - elements->Array.forEach(el => Console.log(" -", el)) - } - | Error(err) => { - Console.error("✗ Batch mount failed:", err) - Console.error(" (None were mounted - atomic operation)") - } - } -} - -// Example 4: Explicit validation before mounting -let mountWithValidation = () => { - // Validate selector first - switch ProvenSelector.validate("#my-app") { - | Error(e) => Console.error(`Invalid selector: ${e}`) - | Ok(validSelector) => { - // Validate HTML - switch ProvenHTML.validate("
Content
") { - | Error(e) => Console.error(`Invalid HTML: ${e}`) - | Ok(validHtml) => { - // Now mount with proven safety - switch mount(validSelector, validHtml) { - | Mounted(el) => Console.log("✓ Mounted with validated inputs:", el) - | MountPointNotFound(s) => Console.error(`✗ Element not found: ${s}`) - | InvalidSelector(_) => Console.error("Impossible - already validated") - | InvalidHTML(_) => Console.error("Impossible - already validated") - } - } - } - } -} - -// Example 5: Integration with TEA -module MyApp = { - type model = {message: string} - type msg = NoOp - - let init = () => {message: "Hello from TEA"} - let update = (model, _msg) => model - let view = model => `

${model.message}

` -} - -let mountTEAApp = () => { - let model = MyApp.init() - let html = MyApp.view(model) - - mountWhenReady( - "#tea-app", - html, - ~onSuccess=el => { - Console.log("✓ TEA app mounted") - // Set up event handlers, subscriptions here - }, - ~onError=err => Console.error(`✗ TEA mount failed: ${err}`) - ) -} - -// Entry point -let main = () => { - Console.log("SafeDOM Examples") - Console.log("================\n") - - // Choose which example to run - mountWhenDOMReady() // Run on DOM ready -} - -// Auto-execute when module loads -main() diff --git a/examples/web-project-deno.json b/examples/web-project-deno.json deleted file mode 100644 index 5ddd3bd..0000000 --- a/examples/web-project-deno.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "// NOTE": "Example deno.json for ReScript web projects", - "tasks": { - "build": "deno run -A npm:rescript", - "clean": "deno run -A npm:rescript clean", - "watch": "deno run -A npm:rescript -w", - "serve": "deno run -A jsr:@std/http/file-server .", - "test": "deno test --allow-all" - }, - "imports": { - "rescript": "^12.0.0", - "@rescript/core": "npm:@rescript/core@^1.6.0", - "safe-dom/": "https://raw.githubusercontent.com/hyperpolymath/rescript-dom-mounter/main/src/", - "proven/": "../proven/bindings/rescript/src/" - }, - "compilerOptions": { - "allowJs": true, - "checkJs": false - } -} diff --git a/verification/0.1-AI-MANIFEST.a2ml b/verification/0.1-AI-MANIFEST.a2ml deleted file mode 100644 index 39b370f..0000000 --- a/verification/0.1-AI-MANIFEST.a2ml +++ /dev/null @@ -1,27 +0,0 @@ -# SPDX-License-Identifier: PMPL-1.0-or-later ---- -### [META] -id: "verification-pillar" -level: 1 -parent: "../0-AI-MANIFEST.a2ml" - ---- -### [AI_MANIFEST] -description: | - Primary verification pillar. Contains evidence for correctness, - performance, formal proofs, randomized testing, and aerospace-grade - high-assurance metrics (MC/DC coverage, traceability, safety cases). - -canonical_locations: - tests: "tests/" - benchmarks: "benchmarks/" - proofs: "proofs/" - fuzzing: "fuzzing/" - simulations: "simulations/" - coverage: "coverage/" - traceability: "traceability/" - safety_case: "safety_case/" - -invariants: - - "Evidence MUST be reproducible and documented" - - "High-assurance deployments MUST satisfy traceability and safety_case requirements" diff --git a/verification/README.adoc b/verification/README.adoc deleted file mode 100644 index f07e7f3..0000000 --- a/verification/README.adoc +++ /dev/null @@ -1 +0,0 @@ -= Verification Pillar diff --git a/verification/benchmarks/0.2-AI-MANIFEST.a2ml b/verification/benchmarks/0.2-AI-MANIFEST.a2ml deleted file mode 100644 index 6416309..0000000 --- a/verification/benchmarks/0.2-AI-MANIFEST.a2ml +++ /dev/null @@ -1,11 +0,0 @@ -# SPDX-License-Identifier: PMPL-1.0-or-later ---- -### [META] -id: "benches-pillar" -level: 2 -parent: "../0.1-AI-MANIFEST.a2ml" - ---- -### [AI_MANIFEST] -description: | - Benches pillar. diff --git a/verification/benchmarks/README.adoc b/verification/benchmarks/README.adoc deleted file mode 100644 index 5db7648..0000000 --- a/verification/benchmarks/README.adoc +++ /dev/null @@ -1 +0,0 @@ -= Benchmarks Unit diff --git a/verification/coverage/0.2-AI-MANIFEST.a2ml b/verification/coverage/0.2-AI-MANIFEST.a2ml deleted file mode 100644 index fc15bd3..0000000 --- a/verification/coverage/0.2-AI-MANIFEST.a2ml +++ /dev/null @@ -1,12 +0,0 @@ -# SPDX-License-Identifier: PMPL-1.0-or-later ---- -### [META] -id: "verification-unit-coverage" -level: 2 -parent: "../0.1-AI-MANIFEST.a2ml" - ---- -### [AI_MANIFEST] -description: | - High-assurance verification unit for coverage. - Critical for safety-of-life and aerospace-grade deployment standards. diff --git a/verification/coverage/README.adoc b/verification/coverage/README.adoc deleted file mode 100644 index 2566956..0000000 --- a/verification/coverage/README.adoc +++ /dev/null @@ -1 +0,0 @@ -= Coverage Unit diff --git a/verification/fuzzing/0.2-AI-MANIFEST.a2ml b/verification/fuzzing/0.2-AI-MANIFEST.a2ml deleted file mode 100644 index 79c4fef..0000000 --- a/verification/fuzzing/0.2-AI-MANIFEST.a2ml +++ /dev/null @@ -1,11 +0,0 @@ -# SPDX-License-Identifier: PMPL-1.0-or-later ---- -### [META] -id: "fuzzing-unit" -level: 2 -parent: "../0.1-AI-MANIFEST.a2ml" - ---- -### [AI_MANIFEST] -description: | - Fuzzing unit for high-rigor verification. diff --git a/verification/fuzzing/README.adoc b/verification/fuzzing/README.adoc deleted file mode 100644 index edeb179..0000000 --- a/verification/fuzzing/README.adoc +++ /dev/null @@ -1 +0,0 @@ -= Fuzzing Unit diff --git a/verification/proofs/0.2-AI-MANIFEST.a2ml b/verification/proofs/0.2-AI-MANIFEST.a2ml deleted file mode 100644 index 0e5666f..0000000 --- a/verification/proofs/0.2-AI-MANIFEST.a2ml +++ /dev/null @@ -1,11 +0,0 @@ -# SPDX-License-Identifier: PMPL-1.0-or-later ---- -### [META] -id: "verification-unit-proofs" -level: 2 -parent: "../0.1-AI-MANIFEST.a2ml" - ---- -### [AI_MANIFEST] -description: | - Sub-unit focusing on proofs. diff --git a/verification/proofs/README.adoc b/verification/proofs/README.adoc deleted file mode 100644 index 1ae324d..0000000 --- a/verification/proofs/README.adoc +++ /dev/null @@ -1 +0,0 @@ -= Proofs Unit diff --git a/verification/safety_case/0.2-AI-MANIFEST.a2ml b/verification/safety_case/0.2-AI-MANIFEST.a2ml deleted file mode 100644 index 818fba4..0000000 --- a/verification/safety_case/0.2-AI-MANIFEST.a2ml +++ /dev/null @@ -1,12 +0,0 @@ -# SPDX-License-Identifier: PMPL-1.0-or-later ---- -### [META] -id: "verification-unit-safety_case" -level: 2 -parent: "../0.1-AI-MANIFEST.a2ml" - ---- -### [AI_MANIFEST] -description: | - High-assurance verification unit for safety case. - Critical for safety-of-life and aerospace-grade deployment standards. diff --git a/verification/safety_case/README.adoc b/verification/safety_case/README.adoc deleted file mode 100644 index 47c8e36..0000000 --- a/verification/safety_case/README.adoc +++ /dev/null @@ -1 +0,0 @@ -= Safety case Unit diff --git a/verification/simulations/0.2-AI-MANIFEST.a2ml b/verification/simulations/0.2-AI-MANIFEST.a2ml deleted file mode 100644 index f40fc1c..0000000 --- a/verification/simulations/0.2-AI-MANIFEST.a2ml +++ /dev/null @@ -1,11 +0,0 @@ -# SPDX-License-Identifier: PMPL-1.0-or-later ---- -### [META] -id: "simulations-unit" -level: 2 -parent: "../0.1-AI-MANIFEST.a2ml" - ---- -### [AI_MANIFEST] -description: | - Simulations unit for high-rigor verification. diff --git a/verification/simulations/README.adoc b/verification/simulations/README.adoc deleted file mode 100644 index 8e1b13a..0000000 --- a/verification/simulations/README.adoc +++ /dev/null @@ -1 +0,0 @@ -= Simulations Unit diff --git a/verification/tests/0.2-AI-MANIFEST.a2ml b/verification/tests/0.2-AI-MANIFEST.a2ml deleted file mode 100644 index 0008fcf..0000000 --- a/verification/tests/0.2-AI-MANIFEST.a2ml +++ /dev/null @@ -1 +0,0 @@ -# AI Manifest - Level 1: tests diff --git a/verification/tests/README.adoc b/verification/tests/README.adoc deleted file mode 100644 index 344bf86..0000000 --- a/verification/tests/README.adoc +++ /dev/null @@ -1 +0,0 @@ -= Tests Unit diff --git a/verification/traceability/0.2-AI-MANIFEST.a2ml b/verification/traceability/0.2-AI-MANIFEST.a2ml deleted file mode 100644 index defa125..0000000 --- a/verification/traceability/0.2-AI-MANIFEST.a2ml +++ /dev/null @@ -1,12 +0,0 @@ -# SPDX-License-Identifier: PMPL-1.0-or-later ---- -### [META] -id: "verification-unit-traceability" -level: 2 -parent: "../0.1-AI-MANIFEST.a2ml" - ---- -### [AI_MANIFEST] -description: | - High-assurance verification unit for traceability. - Critical for safety-of-life and aerospace-grade deployment standards. diff --git a/verification/traceability/README.adoc b/verification/traceability/README.adoc deleted file mode 100644 index ff23dd7..0000000 --- a/verification/traceability/README.adoc +++ /dev/null @@ -1 +0,0 @@ -= Traceability Unit From de0e7fbb36db3c9b0f5631bf2c9da12cf600dabb Mon Sep 17 00:00:00 2001 From: "Jonathan D.A. Jewell" <6759885+hyperpolymath@users.noreply.github.com> Date: Wed, 13 May 2026 02:34:09 +0200 Subject: [PATCH 2/7] test(integration): fix verisim_ vs verisimdb_ prefix + Windows path escaping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two bugs in tests/integration_test.rs caused 2 of 9 integration tests to fail (the unit tests were unaffected). 1. Prefix mismatch — codegen emits identifiers prefixed `verisimdb_` (see src/codegen/overlay.rs). The integration tests asserted substring presence of `verisim_…` which is not a substring of `verisimdb_…`. Replaced 11 occurrences in tests/integration_test.rs. 2. Windows path escaping — test_end_to_end_file_workflow interpolates `schema_path.display()` into a TOML basic string with `"…"`. On Windows the path contains backslashes which TOML treats as escapes, producing a malformed manifest and an unwrap-on-Err. Switched the embedded path to a TOML literal string (single quotes) which suppresses escape interpretation. Verified: cargo test now reports 26 + 26 + 9 = 61 tests, 0 failed. Closes #8 Co-Authored-By: Claude Opus 4.7 --- tests/integration_test.rs | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/tests/integration_test.rs b/tests/integration_test.rs index 093ad97..5cad7e1 100644 --- a/tests/integration_test.rs +++ b/tests/integration_test.rs @@ -81,23 +81,23 @@ fn test_full_pipeline_blog_schema() { // Verify all expected sidecar tables are present. assert!( - overlay_ddl.contains("verisim_metadata"), + overlay_ddl.contains("verisimdb_metadata"), "Should contain metadata table" ); assert!( - overlay_ddl.contains("verisim_provenance_log"), + overlay_ddl.contains("verisimdb_provenance_log"), "Should contain provenance table" ); assert!( - overlay_ddl.contains("verisim_lineage_graph"), + overlay_ddl.contains("verisimdb_lineage_graph"), "Should contain lineage table" ); assert!( - overlay_ddl.contains("verisim_temporal_versions"), + overlay_ddl.contains("verisimdb_temporal_versions"), "Should contain temporal table" ); assert!( - overlay_ddl.contains("verisim_access_policies"), + overlay_ddl.contains("verisimdb_access_policies"), "Should contain access policies table" ); @@ -149,9 +149,9 @@ fn test_full_pipeline_blog_schema() { // Step 4: Render interceptors to SQL and verify output. let rendered = query::render_interceptors(&interceptors); - assert!(rendered.contains("verisim_users_with_provenance")); - assert!(rendered.contains("verisim_posts_with_temporal")); - assert!(rendered.contains("verisim_comments_with_provenance")); + assert!(rendered.contains("verisimdb_users_with_provenance")); + assert!(rendered.contains("verisimdb_posts_with_temporal")); + assert!(rendered.contains("verisimdb_comments_with_provenance")); } // --------------------------------------------------------------------------- @@ -433,7 +433,9 @@ fn test_end_to_end_file_workflow() { .unwrap(); } - // Write a manifest file. + // Write a manifest file. Note: on Windows, schema_path uses backslashes + // which are escape characters in TOML basic strings — emit the path as a + // TOML literal string (single-quoted) to dodge escape interpretation. let manifest_path = dir.path().join("verisimiser.toml"); { let mut f = std::fs::File::create(&manifest_path).unwrap(); @@ -446,7 +448,7 @@ name = "test-articles" [database] backend = "sqlite" connection-string-env = "TEST_DB" -schema-source = "{}" +schema-source = '{}' [octad] enable-provenance = true @@ -476,14 +478,14 @@ path = ".verisim/test.db" // Generate overlay. let overlay_ddl = overlay::generate_sidecar_schema(&schema, &manifest.octad); - assert!(overlay_ddl.contains("verisim_provenance_log")); - assert!(overlay_ddl.contains("verisim_temporal_versions")); + assert!(overlay_ddl.contains("verisimdb_provenance_log")); + assert!(overlay_ddl.contains("verisimdb_temporal_versions")); assert!( - !overlay_ddl.contains("verisim_lineage_graph"), + !overlay_ddl.contains("verisimdb_lineage_graph"), "Lineage is disabled" ); assert!( - !overlay_ddl.contains("verisim_access_policies"), + !overlay_ddl.contains("verisimdb_access_policies"), "Access control is disabled" ); From 6a0ccfdf91f85b0b579a990198ac17a190698301 Mon Sep 17 00:00:00 2001 From: "Jonathan D.A. Jewell" <6759885+hyperpolymath@users.noreply.github.com> Date: Wed, 13 May 2026 02:35:03 +0200 Subject: [PATCH 3/7] build(just): remove aspirational augment/check-octad/migrate recipes Per ADR-0003. The previous recipe block contained literal `\n` characters where newlines were intended, collapsing three recipes into one syntactically broken rule whose target name embedded `\n`. Even with newlines restored the recipes pointed at clap subcommands that don't exist in src/main.rs. Replaced the block with a comment placeholder noting why it was removed and what to do when the subcommands ship. Closes #10 Co-Authored-By: Claude Opus 4.7 --- Justfile | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Justfile b/Justfile index 465036a..8aafee0 100644 --- a/Justfile +++ b/Justfile @@ -51,8 +51,12 @@ assail: @command -v panic-attack >/dev/null 2>&1 && panic-attack assail . || echo "panic-attack not found — install from https://github.com/hyperpolymath/panic-attacker" # --- Domain-Specific Recipes (verisimiser) --- - -# Augment a database with VeriSimDB octad\naugment DB_URL:\n cargo run -- augment {{DB_URL}}\n\n# Check octad layer completeness\ncheck-octad DB_URL:\n cargo run -- check-octad {{DB_URL}}\n\n# Generate migration scripts\nmigrate DB_URL:\n cargo run -- migrate {{DB_URL}} +# +# (Reserved.) Recipes for clap subcommands like `augment`, `check-octad`, +# and `migrate` were removed per ADR-0003: they wrapped subcommands that +# don't exist in src/main.rs (the real subcommands are `init`, `generate`, +# `start`, `drift`, `provenance`, `history`, `status`, `octad`). +# Re-add wrappers here when their underlying subcommands ship. # Run contractile checks contractile-check: From b218130524c7f43c2f1ee5a282cc28e937bbf755 Mon Sep 17 00:00:00 2001 From: "Jonathan D.A. Jewell" <6759885+hyperpolymath@users.noreply.github.com> Date: Wed, 13 May 2026 02:38:45 +0200 Subject: [PATCH 4/7] chore(lint): remove blanket #![allow(...)] blocks; fix surfaced lints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 13-lint allow block in both lib.rs and main.rs silenced clippy across the codebase, making `just lint` (`cargo clippy -- -D warnings`) a hollow signal. Removed both blocks and fixed every lint clippy surfaced. Fixes: - codegen/query.rs:124 — nested format!() flagged by `clippy::format_in_format_args`. Combined `format!("{}::text", format!("{}.ctid", t))` into `format!("{}.ctid::text", t)`. - manifest/mod.rs:309 — `init_manifest` had a dead ternary returning "false" on both branches (flagged by `clippy::if_same_then_else`). Replaced with a single binding plus a comment explaining where the per-backend toggle would go if/when it becomes real. - main.rs — was re-declaring `mod abi; mod codegen; mod intercept; mod manifest; mod tier1; mod tier2;` already declared in `lib.rs`, so each module compiled twice. From the bin's perspective most of the ABI types (ProvenanceEntry, LineageEdge, TemporalVersion, AccessPolicy, SidecarConfig, DriftCategory, …) appeared as dead code. Replaced the six `mod …;` lines with `use verisimiser::{abi, codegen, manifest};` so the bin consumes the library properly. This also halves redundant test runs (35 unique tests instead of 61 with duplicates). Verified: - `cargo clippy --all-targets -- -D warnings` exits clean - `cargo test` reports 26 lib + 9 integration tests, 0 failed Closes #16, #17 Co-Authored-By: Claude Opus 4.7 --- src/codegen/query.rs | 2 +- src/lib.rs | 14 -------------- src/main.rs | 22 +--------------------- src/manifest/mod.rs | 10 +++++----- 4 files changed, 7 insertions(+), 41 deletions(-) diff --git a/src/codegen/query.rs b/src/codegen/query.rs index 86d1e3c..9f5f99a 100644 --- a/src/codegen/query.rs +++ b/src/codegen/query.rs @@ -121,7 +121,7 @@ fn build_entity_id_expr(pk_columns: &[&str], table_name: &str, backend: Database // No PK defined — fall back to internal row identifier. match backend { DatabaseBackend::SQLite => format!("{}.rowid", table_name), - DatabaseBackend::PostgreSQL => format!("{}::text", format!("{}.ctid", table_name)), + DatabaseBackend::PostgreSQL => format!("{}.ctid::text", table_name), DatabaseBackend::MongoDB => "CAST(_id AS TEXT)".to_string(), } } else if pk_columns.len() == 1 { diff --git a/src/lib.rs b/src/lib.rs index a584e89..38eba2c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,18 +1,4 @@ #![forbid(unsafe_code)] -#![allow( - dead_code, - clippy::too_many_arguments, - clippy::manual_strip, - clippy::if_same_then_else, - clippy::vec_init_then_push, - clippy::upper_case_acronyms, - clippy::format_in_format_args, - clippy::enum_variant_names, - clippy::module_inception, - clippy::doc_lazy_continuation, - clippy::manual_clamp, - clippy::type_complexity -)] // SPDX-License-Identifier: PMPL-1.0-or-later // Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) // diff --git a/src/main.rs b/src/main.rs index bac3e96..534eaeb 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,17 +1,3 @@ -#![allow( - dead_code, - clippy::too_many_arguments, - clippy::manual_strip, - clippy::if_same_then_else, - clippy::vec_init_then_push, - clippy::upper_case_acronyms, - clippy::format_in_format_args, - clippy::enum_variant_names, - clippy::module_inception, - clippy::doc_lazy_continuation, - clippy::manual_clamp, - clippy::type_complexity -)] #![forbid(unsafe_code)] // SPDX-License-Identifier: PMPL-1.0-or-later // Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) @@ -31,13 +17,7 @@ use anyhow::Result; use clap::{Parser, Subcommand}; - -mod abi; -mod codegen; -mod intercept; -mod manifest; -mod tier1; -mod tier2; +use verisimiser::{abi, codegen, manifest}; /// VeriSimiser — augment any database with VeriSimDB octad capabilities. #[derive(Parser)] diff --git a/src/manifest/mod.rs b/src/manifest/mod.rs index c6a678b..504db61 100644 --- a/src/manifest/mod.rs +++ b/src/manifest/mod.rs @@ -306,11 +306,11 @@ pub fn init_manifest(database: &str) -> Result<()> { anyhow::bail!("{} already exists — remove it first to reinitialise", path); } - let enable_simulation = if database == "sqlite" { - "false" - } else { - "false" - }; + // Simulation defaults to off across all backends. The previous ternary + // returned "false" on both branches; if backend-specific defaults are + // needed later (e.g. enable simulation only when the storage is SQLite), + // this is the place to add them. + let enable_simulation = "false"; let template = format!( r#"# SPDX-License-Identifier: PMPL-1.0-or-later From 46663c7ff7fafc130ac0ac92623417e0b5fd41a5 Mon Sep 17 00:00:00 2001 From: "Jonathan D.A. Jewell" <6759885+hyperpolymath@users.noreply.github.com> Date: Wed, 13 May 2026 02:40:44 +0200 Subject: [PATCH 5/7] docs(readme,roadmap): align around concerns octad (ADR-0001) Per ADR-0001 the canonical octad is concerns (Data/Metadata/Provenance/Lineage/Constraints/AccessControl/Temporal/ Simulation), not modalities. The previous README led with a modalities table the codebase no longer supported. README.adoc rewrites: - Replace the "Eight Modalities" table with an "Eight Concerns" table whose rows match `OctadDimension` enum, OctadConfig fields, and the emitted sidecar tables. - Reframe the eight cross-modal drift categories under Constraints (they are symptoms observed by Constraints when Data, Metadata, and active Tier 2 overlays disagree). Note explicitly that each category still needs a computable definition. - Tier 1 narrative reorganised around the five Tier 1 concerns (Provenance, Temporal, Constraints, Lineage, AccessControl). - Tier 2 retains modalities but as overlay representations, not as "the octad". - Add a "Related repos" section linking verisimdb-data. - Add an "ABI" section pointing at src/interface/abi/ and src/interface/ffi/ where the Idris2 and Zig stubs actually live. - Cite ADR-0001 and ADR-0002 inline. ROADMAP.adoc rewrites: - Phase 0 marked complete with accurate evidence (ABI types exist in three languages; codegen scaffolding ships). - Phases reordered to match the bottom-up plan: SQLite Tier 1 MVP first (cheapest end-to-end), then PostgreSQL, then multi-backend, then Constraints/Drift, then AccessControl/Lineage, then Tier 2 modality overlays, then Simulation, then VCL-total integration, then production hardening, then ecosystem. - Each phase phrased in concerns/modality terms consistent with ADR-0001. Closes #20 Co-Authored-By: Claude Opus 4.7 --- README.adoc | 255 +++++++++++++++++++++++++++++---------------------- ROADMAP.adoc | 116 ++++++++++++++++------- 2 files changed, 232 insertions(+), 139 deletions(-) diff --git a/README.adoc b/README.adoc index 87184fd..a2df91f 100644 --- a/README.adoc +++ b/README.adoc @@ -7,12 +7,19 @@ Jonathan D.A. Jewell :icons: font :source-highlighter: rouge +== Related repos + +* https://github.com/hyperpolymath/verisimdb-data[`verisimdb-data`] — + flat-file store for scan results and drift snapshots, and the dogfood + site for the hyperpolymath Idris2 + Zig ABI shared with `proven`, + `burble`, `gossamer`. Verisimiser does not require it. + == What Is This? VeriSimiser augments existing databases with capabilities from -https://github.com/hyperpolymath/nextgen-databases[VeriSimDB]'s octad model — -specifically the capabilities that work as genuine piggybacks without requiring -you to replace your database. +https://github.com/hyperpolymath/nextgen-databases[VeriSimDB]'s **octad +model** — eight concerns layered over any backend without forcing a +schema migration. **Honest framing**: this is not a pure bolt-on like the language -isers. Language -isers generate a separate wrapper alongside your code — one-way @@ -20,64 +27,47 @@ dependency, your code untouched. Database augmentation is fundamentally different because it interacts with shared mutable state. VeriSimiser is therefore split into two tiers: -* **Tier 1 (true piggyback)** — capabilities that sit alongside or in front - of your database, never touching its storage engine. These are safe bolt-ons. -* **Tier 2 (augmentation layer)** — capabilities that require additional - storage alongside your database. These are honest about being "VeriSimDB - with your database as one backend" rather than pretending to be invisible. +* **Tier 1 (true piggyback)** — concerns that sit alongside or in front + of your database, writing only to a sidecar and never touching your + storage engine. Safe bolt-ons. +* **Tier 2 (augmentation layer)** — modality overlays (graph, vector, + tensor, semantic, document, spatial) that require additional storage + alongside your database. Honest about being "VeriSimDB with your + database as one backend" rather than pretending to be invisible. -== VeriSimDB's Octad: Eight Modalities +== The Octad: Eight Concerns -Each entity in VeriSimDB exists simultaneously across up to 8 representations: +Every entity in a verisimiser-augmented database is observable along up +to eight concerns. Two are inherent to the target database (always on); +the remaining six are added by sidecars. -[cols="1,2,1,1"] +[cols="1,3,2,1"] |=== -| Modality | Purpose | Storage | VeriSimiser Tier - -| **Graph** | RDF triples and property graph edges | Pure Rust | Tier 2 -| **Vector** | Embeddings for similarity search | HNSW | Tier 2 -| **Tensor** | Multi-dimensional numeric data | ndarray/Burn | Tier 2 -| **Semantic** | Type annotations and CBOR proof blobs | CBOR | Tier 2 -| **Document** | Full-text searchable content | Tantivy | Tier 2 -| **Temporal** | Version history and time-series | chrono | Tier 1 ✓ -| **Provenance** | Origin tracking and transformation chain | SHA-256 hash-chain | Tier 1 ✓ -| **Spatial** | Geospatial coordinates and geometries | R-tree | Tier 2 +| Concern | Purpose | Sidecar storage | Tier + +| **Data** | The original entity as stored in the target DB | (target DB itself) | inherent +| **Metadata** | Schema and type information extracted from the target DB | `verisimdb_metadata` | inherent +| **Provenance** | SHA-256 hash-chain of who did what and when | `verisimdb_provenance_log` | Tier 1 ✓ +| **Lineage** | Directed-edge graph of data derivation (intended DAG; see ADR future) | `verisimdb_lineage_graph` | Tier 1 +| **Constraints** | Cross-dimensional invariants and drift between Data + Metadata + overlays | (rules + observers, not a table) | Tier 1 +| **AccessControl** | Row/column-level access policies evaluated at query time | `verisimdb_access_policies` | Tier 1 +| **Temporal** | Version history with point-in-time queries and rollback | `verisimdb_temporal_versions` | Tier 1 ✓ +| **Simulation** | What-if branching and sandbox query execution | `verisimdb_simulation_branches` + `_deltas` | Tier 2 |=== +The codebase commits to this ontology in +`src/abi/mod.rs::OctadDimension`, `src/manifest/mod.rs::OctadConfig`, +and the codegen tables. See `docs/decisions/ADR-0001-octad-ontology.adoc` +for why "concerns" was chosen over the alternative "modalities" framing. + == Tier 1: True Piggybacks These work like PostGIS — they add capability without replacing anything. -=== Cross-Modal Drift Detection - -The jewel of VeriSimDB. Drift detection monitors whether representations -of the same entity stay consistent across modalities. VeriSimiser can -overlay drift detection onto any database: - -* Your database stores the primary data (unchanged) -* VeriSimiser maintains a lightweight **drift index** alongside it -* Queries pass through to your database normally -* VeriSimiser intercepts results and checks for cross-modal inconsistencies -* Alerts when representations drift apart - -This is a **read-path augmentation** — it observes query results, it doesn't -modify them. Safe to add, safe to remove, no data dependency. - -VeriSimDB detects eight categories of cross-modal drift: - -1. Structural drift — schema changes not reflected across modalities -2. Semantic drift — meaning divergence between representations -3. Temporal drift — version skew between modalities -4. Statistical drift — distribution shift in vector/tensor spaces -5. Referential drift — broken links between graph and document modalities -6. Provenance drift — transformation chain inconsistencies -7. Spatial drift — coordinates inconsistent with other modalities -8. Embedding drift — vector embeddings stale relative to source documents - === Provenance Tracking Hash-chain verified origin tracking for every piece of data. VeriSimiser -can add provenance to any database as a sidecar: +adds provenance to any database as a sidecar: * Every write is intercepted and a provenance record created * SHA-256 hash chain links provenance records in order @@ -90,7 +80,7 @@ change what happened. The provenance chain is stored in a separate sidecar === Temporal Versioning -Automatic version history for entities. VeriSimiser can maintain a temporal +Automatic version history for entities. VeriSimiser maintains a temporal sidecar that records every state change: * Point-in-time queries: "what did this entity look like at time T?" @@ -98,67 +88,95 @@ sidecar that records every state change: * Rollback capability: restore any entity to a previous state * Retention policies: auto-prune history older than N days -This piggybacks onto write events (triggers, CDC, or application-level hooks) -and stores version history in a separate sidecar. +This piggybacks onto write events (triggers, CDC, or application-level +hooks) and stores version history in a separate sidecar. + +=== Constraints & Drift Detection + +The Constraints concern enforces cross-dimensional invariants and +observes *drift* — places where Data, Metadata, and any active Tier 2 +overlays disagree. Drift detection is a **read-path observer**: it +inspects query results, never modifies them. + +VeriSimiser tracks eight symptomatic categories (mapped onto the +Constraints concern): + +. **Structural drift** — schema changes not reflected across active overlays +. **Semantic drift** — meaning divergence between overlays +. **Temporal drift** — version skew between overlays +. **Statistical drift** — distribution shift in vector/tensor overlays +. **Referential drift** — broken links between the graph overlay and Data +. **Provenance drift** — transformation-chain inconsistencies +. **Spatial drift** — coordinates inconsistent with other overlays +. **Embedding drift** — vector embeddings stale relative to source documents + +(Each category needs a computable definition before it can be detected; +this is on the roadmap, not implemented yet.) + +=== Lineage + +Directed-edge graph of data derivation: which entity was derived from +which other entity, and by what transformation. Intended as a DAG (cycle +prevention enforcement is a future ADR). + +=== Access Control + +Row-level and column-level policies evaluated at query time. Independent +of any backend-native role system; verisimiser interprets policies and +filters/redacts results. == Tier 2: Augmentation Layer These capabilities require additional storage alongside your database. -They're honest about being "VeriSimDB modalities with your database as the -document/primary store." This is still valuable — it's how you get octad -capabilities incrementally — but it's not a bolt-on. +They are honest about being "VeriSimDB modalities with your database as +the document/primary store." This is still valuable — it's how you get +extra modality projections incrementally — but it's not invisible. -* **Graph overlay**: Add RDF triples and property graph edges to entities +* **Graph overlay**: Add RDF triples and property-graph edges to entities in your relational database. Stored in a separate graph index. -* **Vector overlay**: Add embeddings for similarity search. Stored in HNSW - index alongside your database. +* **Vector overlay**: Add embeddings for similarity search. Stored in + HNSW index alongside your database. * **Tensor overlay**: Add multi-dimensional numeric data. Stored in ndarray-backed sidecar. * **Semantic overlay**: Add type annotations and proof blobs. Stored in CBOR sidecar. +* **Document overlay**: Add full-text search over text columns. + Stored in a Tantivy index. * **Spatial overlay**: Add geospatial coordinates. Stored in R-tree sidecar. +* **Simulation**: Branched copies of data for what-if analysis. Stored + in `verisimdb_simulation_branches` + `_deltas`. -Each Tier 2 modality has its own storage and can be enabled independently. -Your primary database remains the source of truth for its native data. +Each Tier 2 overlay has its own storage and can be enabled +independently. Your primary database remains the source of truth for +its native data. == The Manifest [source,toml] ---- -[verisimiser] +[project] name = "my-augmented-db" +version = "0.1.0" [database] -target-db = "postgresql" -connection-string = "postgres://localhost/mydb" - -# Tier 1: true piggybacks (no additional storage in your database) -[tier1] -drift-detection = true # cross-modal drift monitoring -provenance = true # SHA-256 hash-chain audit trail -temporal-versioning = true # automatic version history - -[tier1.provenance] -sidecar = "sqlite" # sqlite | file | verisim -sidecar-path = ".verisimiser/provenance.db" - -[tier1.temporal] -sidecar = "sqlite" -retention-days = 90 - -# Tier 2: augmentation layer (additional storage alongside your database) -[tier2] -graph = false # RDF/property graph overlay -vector = false # embedding similarity search -tensor = false # multi-dimensional numeric -semantic = false # type annotations + proof blobs -spatial = false # geospatial coordinates - -[tier2.vector] -# model = "sentence-transformers/all-MiniLM-L6-v2" -# dimensions = 384 +backend = "sqlite" # sqlite | postgresql | mongodb +connection-string-env = "DATABASE_URL" # env var; never the literal secret +schema-source = "schema.sql" # optional; SQL DDL describing target schema + +[octad] +enable-provenance = true +enable-lineage = true +enable-temporal = true +enable-access-control = true +enable-simulation = false # Tier 2: branching/sandboxing + +[sidecar] +storage = "sqlite" # sqlite (default) | json +path = ".verisim/sidecar.db" ---- +Run `verisimiser init --database sqlite` to generate a starter manifest. + == Architecture [source] @@ -167,20 +185,20 @@ Your Application │ ├──── writes ────► Your Database (unchanged) │ │ - │ VeriSimiser intercepts + │ VeriSimiser intercepts (write-path observer) │ │ │ ┌─────────────┼──────────────┐ │ │ │ │ - │ Drift Index Provenance Temporal - │ (Tier 1) Sidecar Sidecar - │ (Tier 1) (Tier 1) + │ Provenance Temporal Lineage / AccessControl / + │ sidecar sidecar Constraints sidecars (Tier 1) │ - └──── optional ──► Tier 2 Sidecars - (Graph, Vector, Tensor, - Semantic, Spatial) + └──── optional ──► Tier 2 modality overlays + (Graph, Vector, Tensor, Semantic, + Document, Spatial, Simulation) ---- -**Interception methods** (configurable per database): +**Interception methods** (configurable per database — most are still +roadmap items, see `ROADMAP.adoc`): * **PostgreSQL**: logical replication / `pg_notify` / triggers * **MySQL**: binlog CDC / triggers @@ -188,32 +206,53 @@ Your Application * **MongoDB**: change streams * **Application-level**: middleware / ORM hooks +== ABI + +The Application Binary Interface for the augmentation layer is declared +in two languages: + +* `src/interface/abi/` — Idris2 type definitions + (`Types.idr`, `Layout.idr`, `Foreign.idr`). These are the canonical + shapes; formal proofs of correctness against them are future work + (see `docs/decisions/ADR-0002-verification-tree.adoc`). +* `src/interface/ffi/` — Zig C-compatible FFI implementing the Idris2 + ABI for use from native code. + +The Rust `src/abi/mod.rs` mirrors the Idris2 types as plain structs +used directly by the CLI and codegen. + == Relationship to VeriSimDB VeriSimiser is NOT a replacement for VeriSimDB. It's a gateway drug. -* **VeriSimiser Tier 1** gives you drift detection, provenance, and temporal - versioning on your existing database. Zero commitment. -* **VeriSimiser Tier 2** gives you individual octad modalities as sidecars. - Incremental adoption. -* **Full VeriSimDB** gives you the complete octad with native cross-modal - querying, VCL, and built-in drift normalisation. Full commitment. +* **VeriSimiser Tier 1** gives you provenance, temporal, lineage, + access control, and constraints/drift detection on your existing + database. Zero commitment. +* **VeriSimiser Tier 2** gives you individual modality overlays + (graph/vector/tensor/semantic/document/spatial) and simulation as + sidecars. Incremental adoption. +* **Full VeriSimDB** gives you the complete model with native + cross-modal querying, VCL, and built-in drift normalisation. Full + commitment. -The migration path is: Tier 1 → Tier 2 → full VeriSimDB (if you want it). -Most users will be happy at Tier 1 or Tier 2. +The migration path is: Tier 1 → Tier 2 → full VeriSimDB (if you want +it). Most users will be happy at Tier 1 or Tier 2. == Integration with TypedQLiser -VeriSimiser works alongside https://github.com/hyperpolymath/typedqliser[TypedQLiser]: +VeriSimiser works alongside +https://github.com/hyperpolymath/typedqliser[TypedQLiser]: * TypedQLiser type-checks your queries (compile-time, no runtime cost) -* VeriSimiser augments your database with octad capabilities (runtime) +* VeriSimiser augments your database with octad concerns (runtime) * Together: formally verified queries against an augmented database == Status -**Pre-alpha.** Architecture defined, tier system designed. Tier 1 (drift -detection, provenance, temporal versioning) is the priority implementation. +**Pre-alpha.** Architecture defined, octad ontology pinned (ADR-0001), +codegen scaffolding in place. The next implementation milestone is V-L1-C1: +end-to-end SQLite Tier 1 piggyback (`sqlite3_update_hook` → +`verisimdb_provenance_log` sidecar). See `ROADMAP.adoc`. Part of the https://github.com/hyperpolymath/iseriser[-iser family]. **#3 priority** (after TypedQLiser and Chapeliser). diff --git a/ROADMAP.adoc b/ROADMAP.adoc index 7f0565f..92748b8 100644 --- a/ROADMAP.adoc +++ b/ROADMAP.adoc @@ -4,35 +4,78 @@ :toc: :icons: font +The phases below are stated in terms of the *concerns* octad +(Data, Metadata, Provenance, Lineage, Constraints, AccessControl, +Temporal, Simulation) per `docs/decisions/ADR-0001-octad-ontology.adoc`. +Tier 2 *modalities* (graph, vector, tensor, semantic, document, spatial) +are independent overlay representations layered on top. + == Phase 0: Scaffold (COMPLETE) -* [x] RSR template with full CI/CD (17 workflows) -* [x] CLI with subcommands (init, start, drift, provenance, history, status, octad) -* [x] Manifest parser (verisimiser.toml with tier1/tier2 config) -* [x] Tier 1 data types (DriftReport, ProvenanceRecord, TemporalVersion) -* [x] ABI module stubs (Idris2 + Zig FFI) -* [x] README with two-tier architecture and honest framing - -== Phase 1: PostgreSQL Tier 1 MVP + +* [x] RSR template with full CI/CD +* [x] CLI with subcommands (init, generate, start, drift, provenance, + history, status, octad) +* [x] Manifest parser (`verisimiser.toml` with `[octad]` toggles + + legacy `tier1`/`tier2` back-compat) +* [x] ABI types in Rust (`src/abi/mod.rs`) + Idris2 declarations + (`src/interface/abi/`) + Zig FFI stubs (`src/interface/ffi/`) +* [x] Codegen for sidecar overlay schema and query interceptor SQL +* [x] README + ADRs covering octad ontology, verification tree, + Justfile recipes + +== Phase 1: SQLite Tier 1 MVP + +The shortest end-to-end loop: SQLite target, SQLite sidecar, provenance ++ temporal concerns. Sequencing follows the bottom-up issue plan in +`docs/decisions/` (forthcoming). + +* [ ] SQLite interception via `sqlite3_update_hook` +* [ ] Provenance sidecar — write-path observer, SHA-256 hash chain + covering operation + actor + before-snapshot + transformation (not + just operation + ts) +* [ ] Temporal sidecar — version history, point-in-time read, + rollback, partial-unique-index enforcement of "exactly one current" +* [ ] Property tests for hash-chain integrity, version ordering, and + sidecar isolation (Tier 1 never writes to target) +* [ ] `verisimiser doctor` + `verisimiser validate` subcommands +* [ ] Structured logging (`tracing`), `--log-format=json|pretty` + +== Phase 2: PostgreSQL Tier 1 + * [ ] PostgreSQL logical replication interception -* [ ] Provenance sidecar (SQLite) — write-path observer -* [ ] SHA-256 hash-chain integrity for provenance records -* [ ] Temporal versioning sidecar — point-in-time queries -* [ ] Cross-modal drift detection — read-path observer -* [ ] Drift index with 8-category classification -* [ ] Idris2 ABI proofs: sidecar isolation, hash-chain integrity, version ordering -* [ ] Zig FFI bridge: database connection, overlay operations, VCL-total queries -* [ ] End-to-end test: PostgreSQL -> verisimiser overlay -> VCL-total query - -== Phase 2: Multi-Backend Support -* [ ] SQLite interception via sqlite3_update_hook / WAL monitoring +* [ ] Provenance + temporal sidecars against PG target +* [ ] Idris2 ABI proofs: sidecar isolation, hash-chain integrity, + version ordering +* [ ] Zig FFI bridge: database connection, overlay operations + +== Phase 3: Multi-Backend Support + * [ ] MongoDB interception via change streams -* [ ] Redis interception via keyspace notifications -* [ ] MySQL interception via binlog CDC * [ ] Application-level middleware / ORM hooks * [ ] Backend-agnostic interception trait abstraction * [ ] Per-backend integration tests +* [ ] MySQL (binlog CDC) / Redis (keyspace notifications) — only if + there is real demand; the manifest enum currently excludes them. + +== Phase 4: Constraints / Drift Detection + +* [ ] Per-category drift definition (one ADR per category) +* [ ] First implemented category: Temporal drift (version skew — + cheapest to define and observe) +* [ ] Drift index storage + query API +* [ ] `verisimiser drift` subcommand wired to real measurements + +== Phase 5: AccessControl + Lineage + +* [ ] AccessControl model ADR: principals, role composition, deny vs + allow precedence, view interaction +* [ ] Typed policy condition language (replace free-form SQL TEXT) +* [ ] Lineage DAG enforcement: self-edge CHECK + cycle prevention + ADR +* [ ] Lineage traversal subcommand (upstream/downstream) + +== Phase 6: Tier 2 Modality Overlays -== Phase 3: Tier 2 Overlays * [ ] Graph overlay (RDF triples / property graph edges) * [ ] Vector overlay (HNSW embedding similarity search) * [ ] Tensor overlay (ndarray multi-dimensional numeric data) @@ -41,26 +84,37 @@ * [ ] Spatial overlay (R-tree geospatial coordinates) * [ ] Independent enable/disable per overlay via manifest -== Phase 4: VCL-total Integration +== Phase 7: Simulation + +* [ ] Branching semantics ADR (isolation, merge policy, conflict + resolution) +* [ ] FK enforcement on `simulation_branches.parent_branch` +* [ ] `verisimiser simulate` subcommand + +== Phase 8: VCL-total Integration + * [ ] VCL-total type-safe query parsing -* [ ] Cross-tier queries (Tier 1 + Tier 2 in single query) +* [ ] Cross-concern queries (Tier 1 + Tier 2 in single query) * [ ] TypedQLiser integration for compile-time query validation * [ ] Query planner for multi-sidecar operations * [ ] Performance benchmarks: overhead of augmentation layer -== Phase 5: Production Hardening -* [ ] Retention policies (auto-prune temporal history) +== Phase 9: Production Hardening + +* [ ] Retention policies (`[retention]` section in manifest) * [ ] Sidecar compaction and garbage collection -* [ ] Concurrent access safety (multi-writer provenance chains) +* [ ] Concurrent access safety (multi-writer provenance chains — + per-entity serialisation + UNIQUE(entity_id, previous_hash)) * [ ] Backup and restore for sidecars * [ ] Monitoring and alerting integration * [ ] Error recovery and graceful degradation * [ ] Shell completions (bash, zsh, fish) -== Phase 6: Ecosystem -* [ ] PanLL panel for drift monitoring dashboard +== Phase 10: Ecosystem + * [ ] BoJ-server cartridge (MCP integration) -* [ ] SqueakWell integration (database recovery via cross-modal constraint propagation) -* [ ] Migration tooling: Tier 1 -> Tier 2 -> full VeriSimDB +* [ ] SqueakWell integration (database recovery via cross-concern + constraint propagation) +* [ ] Migration tooling: Tier 1 → Tier 2 → full VeriSimDB * [ ] Publish to crates.io * [ ] Chainguard container image From db7b7e9927cc1dced59f9682217239f727ba54a9 Mon Sep 17 00:00:00 2001 From: "Jonathan D.A. Jewell" <6759885+hyperpolymath@users.noreply.github.com> Date: Wed, 13 May 2026 02:44:43 +0200 Subject: [PATCH 6/7] ci(rust-ci): add concurrency cancellation; honour fmt fixes from prior commit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `rust-ci.yml` lane already runs `cargo check + clippy + fmt` (job `check`) and `cargo test --all-targets` (job `test`, depends on `check`). That is exactly what V-L3-A2 asked for — the previous mega-list incorrectly claimed no cargo-test gate existed. Real improvement this commit lands: - Add a `concurrency` block at workflow level so push storms cancel older queued runs instead of queueing for minutes. Group keyed on `github.ref` so concurrent PR pushes serialize per-PR but parallel to main. Verified locally: - `cargo fmt --all -- --check` clean - `cargo clippy --all-targets -- -D warnings` clean - `cargo test` reports 35 tests (26 lib + 9 integration), 0 failed The cargo-test gate becomes a required check when branch protection is configured to require Rust CI / Cargo test — that is a repo settings change, not a workflow change. Closes #9 Co-Authored-By: Claude Opus 4.7 --- .github/workflows/rust-ci.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/rust-ci.yml b/.github/workflows/rust-ci.yml index da9db6c..7abbaf7 100644 --- a/.github/workflows/rust-ci.yml +++ b/.github/workflows/rust-ci.yml @@ -11,6 +11,10 @@ on: push: branches: [main, master] +concurrency: + group: rust-ci-${{ github.ref }} + cancel-in-progress: true + permissions: contents: read From 7a4ccfd78868032183fa59bd34e874181325000b Mon Sep 17 00:00:00 2001 From: "Jonathan D.A. Jewell" <6759885+hyperpolymath@users.noreply.github.com> Date: Wed, 13 May 2026 03:18:45 +0200 Subject: [PATCH 7/7] =?UTF-8?q?prov:=20tamper-evident=20hash=20chain=20?= =?UTF-8?q?=E2=80=94=20V-L1-B1=20+=20V-L2-N1=20+=20V-L2-C1..C4=20+=20V-L2-?= =?UTF-8?q?L1..L2=20(#33)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Step 2 of the bottom-up plan. Brings the Provenance octad concern up to the claim made in the README: tampering with any audit-relevant field in a logged entry breaks `verify()`. V-L1-B1 — docs/theory/provenance-threat-model.adoc: Four-adversary model (R / SW / SR / SR+CK), per-adversary protection matrix, the field-coverage and canonical-encoding requirements that bind V-L2-C1 + V-L2-C2, the append-serialisation requirement that binds V-L2-L1 + V-L2-L2, anchor/notary future work, open questions (None vs Some(""), chain_id). Each Step 2 issue cites a section. V-L2-N1 — deduplicate ProvenanceRecord vs ProvenanceEntry: Delete src/tier1/provenance.rs::ProvenanceRecord (orphan duplicate of abi::ProvenanceEntry with its own compute_hash that risked drifting). tier1/provenance.rs now re-exports the canonical type; the file is the future home of V-L1-C1's write-path helpers (sqlite3_update_hook → append_provenance). TOPOLOGY.md updated. V-L2-C1 — full-field, domain-separated hash: compute_hash signature changes from (4 strs) to (5 strs + DateTime + 2 Options). New preimage = domain tag b"verisim-prov-v1\0" || length-prefixed (previous_hash, entity_id, operation, actor) || canonical timestamp (V-L2-C2) || length-prefixed (before_snapshot, transformation). All seven fields participate. PROV_DOMAIN_TAG versioning is reserved for a future SHA-256→? migration. verify(), genesis(), chain() all pass the full field set. V-L2-C2 — canonical timestamp: Replace timestamp.to_rfc3339() (multiple valid forms per instant) with i64_le(timestamp()) || u32_le(timestamp_subsec_nanos()), 12 bytes total. Round-trip unit test asserts two construction paths that yield the same instant produce the same hash. V-L2-C3 — positive tamper-detection tests: Eight new unit tests in abi::tests covering each hash-covered field (entity_id, actor, before_snapshot, transformation, operation, previous_hash, timestamp) plus the canonical-encoding property test plus a 4-entry chain mutation-matrix that asserts every field mutation on every entry breaks verify(). 9 new test cases (26 → 35 lib tests). V-L2-C4 — flip the wontfix test: tests/integration_test.rs::test_provenance_chain_integrity_multi_step previously codified the bug ("Actor is not part of hash — tamper to actor alone is invisible"). Replaced with assertions that tampering with actor and with before_snapshot both break verify(). V-L2-L1 — chain_head table + write-path serialisation spec: codegen/overlay.rs emits a new verisimdb_provenance_chain_head (entity_id PK, head_hash, updated_at) alongside the provenance log. The write-path lock (SELECT … FOR UPDATE / BEGIN IMMEDIATE on the head row, INSERT into log, UPDATE head, COMMIT) is specified in the threat-model doc and the table-generator docstring. The library function that performs the transaction is V-L1-C1's job; V-L2-L1 only lands the schema. V-L2-L2 — UNIQUE INDEX makes forks unrepresentable: CREATE UNIQUE INDEX IF NOT EXISTS ux_provenance_chain ON verisimdb_provenance_log(entity_id, previous_hash). Genesis rows all carry previous_hash='' so the same constraint enforces exactly one genesis per entity. Two new DDL tests assert presence of both the UNIQUE INDEX and the chain_head table. Verified locally: - cargo fmt --all -- --check clean - cargo clippy --all-targets -- -D warnings clean - cargo test reports 35 + 9 = 44 tests, 0 failed Closes #25, #26, #27, #28, #29, #30, #31, #32 Co-authored-by: Claude Opus 4.7 --- docs/architecture/TOPOLOGY.md | 2 +- docs/theory/provenance-threat-model.adoc | 202 +++++++++++++++++++++++ src/abi/mod.rs | 195 ++++++++++++++++++++-- src/codegen/overlay.rs | 44 ++++- src/tier1/provenance.rs | 65 ++------ tests/integration_test.rs | 18 +- 6 files changed, 450 insertions(+), 76 deletions(-) create mode 100644 docs/theory/provenance-threat-model.adoc diff --git a/docs/architecture/TOPOLOGY.md b/docs/architecture/TOPOLOGY.md index 3609305..66827d9 100644 --- a/docs/architecture/TOPOLOGY.md +++ b/docs/architecture/TOPOLOGY.md @@ -12,7 +12,7 @@ verisimiser/ │ ├── src/manifest/ — TOML manifest parsing (verisimiser.toml) │ ├── src/tier1/ — Tier 1 piggyback data types │ │ ├── drift.rs — DriftReport, DriftCategory (8 categories) -│ │ ├── provenance.rs — ProvenanceRecord, SHA-256 hash chain +│ │ ├── provenance.rs — re-exports abi::ProvenanceEntry; future write-path helpers (V-L1-C1) │ │ └── temporal.rs — TemporalVersion, point-in-time snapshots │ ├── src/tier2/ — Tier 2 overlay stubs (graph, vector, tensor, semantic, document, spatial) │ ├── src/intercept/ — Per-backend interception strategies diff --git a/docs/theory/provenance-threat-model.adoc b/docs/theory/provenance-threat-model.adoc new file mode 100644 index 0000000..efd26f4 --- /dev/null +++ b/docs/theory/provenance-threat-model.adoc @@ -0,0 +1,202 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later +// Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) += Provenance threat model +:toc: left +:toclevels: 3 +:icons: font + +This document fixes what the Provenance concern's hash chain proves and +what it doesn't. It binds the design choices made in V-L2-C1, V-L2-C2, +V-L2-L1, V-L2-L2, and the ADR-0004 follow-up. + +Resolves: https://github.com/hyperpolymath/verisimiser/issues/25[V-L1-B1]. + +== Scope + +In scope:: the `Provenance` octad concern as implemented by +`ProvenanceEntry` in `src/abi/mod.rs` plus the sidecar table +`verisimdb_provenance_log` plus (post V-L2-L1) the `chain_head` table. + +Out of scope:: denial-of-service against the sidecar; side-channels +(timing, cache); tampering of the target database itself +(verisimiser only sees what its interceptors intercept); retroactive +provenance for pre-existing rows (the genesis entry for an entity +attests its existence at the moment verisimiser started observing it, +not before); cryptographic compromise of SHA-256. + +== Adversaries + +Four adversaries cover the relevant capability axes. Each is a +*lattice point*; real attackers combine capabilities. + +[cols="1,3"] +|=== +| Tag | Capability + +| **R** | Read-only — can read both the target database and the +sidecar. No write to either. Models: a forensic auditor; +a leaked replica; a debugging copy on a laptop. + +| **SW** | Sidecar-Write — can append new rows to +`verisimdb_provenance_log` and `verisimdb_temporal_versions` but +**cannot delete or rewrite existing rows**. Models: a sidecar +configured append-only (filesystem-level WORM, S3 Object Lock, +SQLite + revoked-DELETE/UPDATE permissions); also models a buggy +verisimiser daemon that double-writes. + +| **SR** | Sidecar-Rewrite — can rewrite or delete arbitrary rows +in the sidecar. Models: root on the sidecar host; compromised +application credential with full sidecar privileges; a backup +operator restoring an older sidecar snapshot. + +| **CK** | Clock-skew — can write entries (via SW or SR) with +timestamps that lie. Models: a system clock that drifts; an +adversary who controls the clock source; coordinated backdating. +|=== + +== Per-adversary protection matrix + +For each adversary, what the chain proves about each field: +**P** = protected (tampering detected), +**N** = not protected, +**C** = conditionally protected (see note). + +[cols="2,1,1,1,1"] +|=== +| Field | R | SW | SR | SR+CK + +| Genesis existence / order | P | P | N | N +| `previous_hash` of any entry | P | P | C-1 | C-1 +| `entity_id` of any entry | P | P | C-1 | C-1 +| `operation` of any entry | P | P | C-1 | C-1 +| `actor` of any entry | P | P | C-1 | C-1 +| `timestamp` of any entry | P | P | C-1 | N (CK falsifies) +| `before_snapshot` of any entry | P | P | C-1 | C-1 +| `transformation` of any entry | P | P | C-1 | C-1 +| Absence of an entry | C-2 | C-2 | N | N +| Total ordering across entities | N | N | N | N +|=== + +**C-1** — under SR (or SR+CK), the adversary can rewrite an +arbitrary suffix of the chain (recomputing hashes as they go). What's +preserved against SR is **only the prefix up to the most-recent +externally attested hash** (e.g. a hash periodically signed by an +out-of-band notary, anchored to an append-only log, or published to +a transparency service). Without an external anchor, the chain +proves *nothing* against SR. + +**C-2** — absence is provable only if every legitimate append goes +through verisimiser. Direct writes to the target database that +bypass interception are invisible to the chain; the chain cannot +attest to what it never saw. + +== Field coverage requirement + +A direct consequence of C-1 / C-2 and the per-adversary matrix: + +[NOTE] +==== +Every field that an auditor will rely on for forensic purposes +**must** participate in the hash. `actor`, `before_snapshot`, and +`transformation` are all such fields — they are the audit. If they +are not in the preimage, the chain protects them against R and SW +only by *coincidence* (because the row itself was hash-keyed in the +DB), not by design. + +This document therefore *requires* V-L2-C1: the preimage must cover +`previous_hash`, `entity_id`, `operation`, `actor`, `timestamp`, +`before_snapshot`, `transformation`. Any future field added to +`ProvenanceEntry` must either be added to the preimage or +explicitly recorded here with a justification for its omission. +==== + +== Canonical encoding requirement + +A direct consequence of "the hash protects the field" being a +*function*, not a relation: + +[NOTE] +==== +Two distinct preimages must produce distinct hashes (collision +resistance is SHA-256's job). Two *equal* preimages must produce +equal hashes (canonicalisation is our job). The encoding must: + +. Domain-separate verisimiser provenance hashes from any other +hash the system computes (`b"verisim-prov-v1\0"`). +. Length-prefix variable-length fields so concatenation is +unambiguous. +. Use a canonical timestamp encoding (V-L2-C2: + `i64_le(secs) || u32_le(nanos)`), not a string representation that + admits multiple valid forms for the same instant. +==== + +== Append serialisation requirement + +A direct consequence of "previous_hash chains entries linearly": + +[NOTE] +==== +Two writers cannot independently chain from the same `previous_hash` +without forking the chain. Verisimiser must serialise appends +per-entity. V-L2-L1 specifies the write-path lock; V-L2-L2 specifies +the database UNIQUE constraint that makes forks structurally +impossible even if the lock is bypassed. + +The chain is *per-entity-serial* but *cross-entity-parallel*. +A global serial order across entities is *not* a requirement +(see "Total ordering" in the matrix above). +==== + +== Anchor / notary (future) + +Protection against SR requires an *external anchor* that the +adversary cannot rewrite. Options, none of which this document +mandates yet: + +. **Periodic notarisation** — every N minutes, sign the latest +chain_head with a key not held on the sidecar host, and publish +the signature to an out-of-band log. +. **Transparency log** — submit each `chain_head` update to an +external append-only log (Sigstore-style). +. **Replication to immutable storage** — write each new entry to +S3 Object Lock (or equivalent) as a defence in depth. + +The threat model leaves the choice for ADR-0005 once a deployment +context exists. + +== Out-of-band assumptions + +. The sidecar host's clock is monotonic and within bounded skew of +real time. Without this, all timestamps are advisory (see CK in the +matrix). +. Verisimiser's process integrity is assumed — a verisimiser binary +that has been swapped for a malicious one can produce a hash-chain +that verifies against itself but attests to nothing real. Binary +provenance is a separate concern (out of scope here). +. SHA-256 is collision-resistant in the cryptographic sense for the +lifetime of the audit window. + +== Open questions + +. Should `Option` fields (`before_snapshot`, `transformation`) +encode `None` vs `Some("")` distinctly? The current proposal collapses +them (both encode as `u64_le(0)` length). Document explicitly that +the chain treats "no snapshot" and "empty snapshot" identically; if a +future use case requires distinguishing them, a single sentinel byte +(`0x00` for None, `0x01` for Some) prefixed inside the length-prefixed +slot resolves it. +. Should the chain include an explicit `chain_id` covering all of an +entity's entries (in addition to chaining via `previous_hash`)? Cheap +defence in depth against entity_id confusion; defer to ADR-0004. + +== Cross-references + +* V-L2-C1 — implements the field coverage + domain separation +* V-L2-C2 — implements canonical timestamp encoding +* V-L2-C3 — positive tamper-detection tests +* V-L2-C4 — removes the wontfix test that codified the C-1 gap +* V-L2-L1 — per-entity write-path serialisation +* V-L2-L2 — UNIQUE INDEX(entity_id, previous_hash) defence in depth +* V-L2-N1 — deduplicates the type used here (ProvenanceEntry vs + ProvenanceRecord) +* ADR-0004 (future) — records the binding choices made here diff --git a/src/abi/mod.rs b/src/abi/mod.rs index b50c83b..d21e3c0 100644 --- a/src/abi/mod.rs +++ b/src/abi/mod.rs @@ -161,34 +161,79 @@ pub struct ProvenanceEntry { pub transformation: Option, } +/// Domain-separation tag for verisimiser provenance hashes (V-L2-C1). +/// +/// Bumping the version suffix (`v1` -> `v2`) constitutes a hash-algorithm +/// migration: existing chains keep verifying with the old tag, new +/// entries use the new tag, and `verify()` dispatches on the stored tag. +/// (No migration is currently planned; the tag exists for future-proofing.) +const PROV_DOMAIN_TAG: &[u8] = b"verisim-prov-v1\0"; + impl ProvenanceEntry { - /// Compute the SHA-256 hash for a provenance entry, chaining from the previous hash. + /// Compute the SHA-256 hash for a provenance entry (V-L2-C1, V-L2-C2). + /// + /// Preimage = domain tag || length-prefixed fields || canonical timestamp: /// - /// The hash covers: previous_hash, entity_id, operation, and timestamp. - /// This ensures that any tampering with the chain is detectable. + /// ```text + /// SHA-256( + /// "verisim-prov-v1\0" + /// || u64_le(len(previous_hash)) || previous_hash + /// || u64_le(len(entity_id)) || entity_id + /// || u64_le(len(operation)) || operation + /// || u64_le(len(actor)) || actor + /// || i64_le(timestamp.timestamp()) + /// || u32_le(timestamp.timestamp_subsec_nanos()) + /// || u64_le(len(before_snapshot.unwrap_or(""))) + /// || before_snapshot.unwrap_or("") + /// || u64_le(len(transformation.unwrap_or(""))) + /// || transformation.unwrap_or("") + /// ) + /// ``` + /// + /// All seven fields participate, so tampering with any of them is + /// detectable. See `docs/theory/provenance-threat-model.adoc` for the + /// adversary matrix and `docs/decisions/ADR-0004` (forthcoming) for + /// the binding choices. pub fn compute_hash( previous_hash: &str, entity_id: &str, operation: &str, - timestamp: &str, + actor: &str, + timestamp: &DateTime, + before_snapshot: Option<&str>, + transformation: Option<&str>, ) -> String { + fn write_lp(hasher: &mut Sha256, bytes: &[u8]) { + hasher.update((bytes.len() as u64).to_le_bytes()); + hasher.update(bytes); + } let mut hasher = Sha256::new(); - hasher.update(previous_hash.as_bytes()); - hasher.update(entity_id.as_bytes()); - hasher.update(operation.as_bytes()); - hasher.update(timestamp.as_bytes()); + hasher.update(PROV_DOMAIN_TAG); + write_lp(&mut hasher, previous_hash.as_bytes()); + write_lp(&mut hasher, entity_id.as_bytes()); + write_lp(&mut hasher, operation.as_bytes()); + write_lp(&mut hasher, actor.as_bytes()); + hasher.update(timestamp.timestamp().to_le_bytes()); + hasher.update(timestamp.timestamp_subsec_nanos().to_le_bytes()); + write_lp(&mut hasher, before_snapshot.unwrap_or("").as_bytes()); + write_lp(&mut hasher, transformation.unwrap_or("").as_bytes()); format!("{:x}", hasher.finalize()) } - /// Verify that this entry's hash is consistent with its contents. + /// Verify that this entry's hash is consistent with all of its contents. /// - /// Returns `true` if the stored hash matches the recomputed hash. + /// Returns `true` iff the stored hash matches the recomputed hash over + /// the full field set (previous_hash, entity_id, operation, actor, + /// timestamp, before_snapshot, transformation). pub fn verify(&self) -> bool { let expected = Self::compute_hash( &self.previous_hash, &self.entity_id, &self.operation, - &self.timestamp.to_rfc3339(), + &self.actor, + &self.timestamp, + self.before_snapshot.as_deref(), + self.transformation.as_deref(), ); self.hash == expected } @@ -196,7 +241,7 @@ impl ProvenanceEntry { /// Create a new genesis entry (first in the chain for an entity). pub fn genesis(entity_id: &str, actor: &str) -> Self { let timestamp = Utc::now(); - let hash = Self::compute_hash("", entity_id, "insert", ×tamp.to_rfc3339()); + let hash = Self::compute_hash("", entity_id, "insert", actor, ×tamp, None, None); Self { hash, previous_hash: String::new(), @@ -216,7 +261,10 @@ impl ProvenanceEntry { &self.hash, &self.entity_id, operation, - ×tamp.to_rfc3339(), + actor, + ×tamp, + None, + None, ); Self { hash, @@ -491,11 +539,126 @@ mod tests { } #[test] - fn test_provenance_tamper_detection() { + fn test_provenance_tamper_entity_id() { let mut entry = ProvenanceEntry::genesis("entity-1", "system"); - // Tamper with the entity_id after hash computation. entry.entity_id = "entity-2".to_string(); - assert!(!entry.verify(), "Tampered entry should fail verification"); + assert!( + !entry.verify(), + "tampering with entity_id must break verify" + ); + } + + /// V-L2-C3: actor is hashed; tampering with it must be detected. + #[test] + fn test_provenance_tamper_actor() { + let mut entry = ProvenanceEntry::genesis("entity-1", "alice"); + entry.actor = "mallory".to_string(); + assert!(!entry.verify(), "tampering with actor must break verify"); + } + + /// V-L2-C3: before_snapshot is hashed; tampering with it must be detected. + #[test] + fn test_provenance_tamper_before_snapshot() { + let mut entry = ProvenanceEntry::genesis("entity-1", "alice"); + // Adding a snapshot (None -> Some) should break the original hash. + entry.before_snapshot = Some("{\"redacted\":true}".to_string()); + assert!( + !entry.verify(), + "tampering with before_snapshot must break verify" + ); + } + + /// V-L2-C3: transformation is hashed; tampering with it must be detected. + #[test] + fn test_provenance_tamper_transformation() { + let mut entry = ProvenanceEntry::genesis("entity-1", "alice"); + entry.transformation = Some("evil-rewrite".to_string()); + assert!( + !entry.verify(), + "tampering with transformation must break verify" + ); + } + + /// V-L2-C3: operation is hashed; tampering with it must be detected. + #[test] + fn test_provenance_tamper_operation() { + let mut entry = ProvenanceEntry::genesis("entity-1", "alice"); + entry.operation = "delete".to_string(); + assert!( + !entry.verify(), + "tampering with operation must break verify" + ); + } + + /// V-L2-C3: previous_hash is hashed; tampering with it must be detected. + #[test] + fn test_provenance_tamper_previous_hash() { + let genesis = ProvenanceEntry::genesis("entity-1", "alice"); + let mut update = genesis.chain("update", "bob"); + update.previous_hash = "deadbeef".to_string(); + assert!( + !update.verify(), + "tampering with previous_hash must break verify" + ); + } + + /// V-L2-C2: hash depends on the canonical (i64+u32) timestamp encoding, + /// not on a string representation that might vary. Two `DateTime` + /// values that represent the same instant — one parsed from RFC3339, + /// one constructed via `from_timestamp` — must produce the same hash. + #[test] + fn test_provenance_hash_timestamp_canonical() { + let parsed: DateTime = "2026-05-13T08:00:00.000000000Z".parse().unwrap(); + let built = DateTime::::from_timestamp(parsed.timestamp(), 0).unwrap(); + assert_eq!( + parsed, built, + "construction paths must yield equal instants" + ); + + let h1 = ProvenanceEntry::compute_hash("", "e1", "insert", "alice", &parsed, None, None); + let h2 = ProvenanceEntry::compute_hash("", "e1", "insert", "alice", &built, None, None); + assert_eq!( + h1, h2, + "canonical timestamp encoding must be path-independent" + ); + } + + /// V-L2-C3: round-trip — build a chain of N entries and assert every + /// mutation of every field breaks verification. + #[test] + fn test_provenance_chain_round_trip_mutation_matrix() { + let g = ProvenanceEntry::genesis("post-7", "system"); + let u1 = g.chain("update", "alice"); + let u2 = u1.chain("update", "bob"); + let d = u2.chain("delete", "alice"); + for entry in [&g, &u1, &u2, &d] { + assert!(entry.verify(), "every legitimate entry must verify"); + } + + for original in [&g, &u1, &u2, &d] { + // Permute each hash-covered field and assert verify fails. + for mutate in [ + |e: &mut ProvenanceEntry| e.actor.push_str("-tamper"), + |e: &mut ProvenanceEntry| e.entity_id.push_str("-tamper"), + |e: &mut ProvenanceEntry| e.operation.push_str("-tamper"), + |e: &mut ProvenanceEntry| { + e.previous_hash = "00".repeat(32); + }, + |e: &mut ProvenanceEntry| { + e.timestamp += chrono::Duration::nanoseconds(1); + }, + |e: &mut ProvenanceEntry| { + e.before_snapshot = Some("tampered".into()); + }, + |e: &mut ProvenanceEntry| { + e.transformation = Some("tampered".into()); + }, + ] { + let mut clone = original.clone(); + mutate(&mut clone); + assert!(!clone.verify(), "field mutation must break verification"); + } + } } #[test] diff --git a/src/codegen/overlay.rs b/src/codegen/overlay.rs index 1d1ea02..0d557a9 100644 --- a/src/codegen/overlay.rs +++ b/src/codegen/overlay.rs @@ -114,7 +114,13 @@ fn generate_metadata_table(schema: &ParsedSchema) -> String { /// /// Stores a SHA-256 hash-chained audit trail of all data modifications. /// Each row chains to its predecessor via `previous_hash`, forming an -/// append-only, tamper-evident log. +/// append-only, tamper-evident log (see +/// `docs/theory/provenance-threat-model.adoc`). +/// +/// The `chain_head` table is the per-entity head pointer used for the +/// write-path lock (V-L2-L1). The UNIQUE INDEX on `(entity_id, +/// previous_hash)` (V-L2-L2) makes chain forks structurally impossible +/// — defence in depth for if the lock is ever bypassed. fn generate_provenance_table() -> String { "-- Provenance: SHA-256 hash-chained audit trail\n\ CREATE TABLE IF NOT EXISTS verisimdb_provenance_log (\n\ @@ -128,8 +134,24 @@ fn generate_provenance_table() -> String { \x20 before_snapshot TEXT, -- JSON of entity state before operation\n\ \x20 transformation TEXT -- description of transformation applied\n\ );\n\ + -- V-L2-L2: forbid chain forks at the DB level. Genesis records all\n\ + -- carry previous_hash='' so this also enforces a single genesis per\n\ + -- entity.\n\ + CREATE UNIQUE INDEX IF NOT EXISTS ux_provenance_chain\n\ + \x20 ON verisimdb_provenance_log(entity_id, previous_hash);\n\ CREATE INDEX IF NOT EXISTS idx_provenance_entity ON verisimdb_provenance_log(entity_id);\n\ - CREATE INDEX IF NOT EXISTS idx_provenance_table ON verisimdb_provenance_log(table_name);\n\n" + CREATE INDEX IF NOT EXISTS idx_provenance_table ON verisimdb_provenance_log(table_name);\n\ + \n\ + -- V-L2-L1: per-entity head pointer. The write path takes a row\n\ + -- lock here (SELECT … FOR UPDATE / BEGIN IMMEDIATE) so concurrent\n\ + -- appenders on the same entity serialise; cross-entity appends\n\ + -- remain parallel. Each successful append updates head_hash in\n\ + -- the same transaction as the INSERT into verisimdb_provenance_log.\n\ + CREATE TABLE IF NOT EXISTS verisimdb_provenance_chain_head (\n\ + \x20 entity_id TEXT PRIMARY KEY,\n\ + \x20 head_hash TEXT NOT NULL,\n\ + \x20 updated_at TEXT NOT NULL\n\ + );\n\n" .to_string() } @@ -321,6 +343,24 @@ mod tests { assert!(ddl.contains("actor")); } + /// V-L2-L2: forks are forbidden by a UNIQUE INDEX on + /// (entity_id, previous_hash). + #[test] + fn test_provenance_table_has_unique_chain_index() { + let ddl = generate_provenance_table(); + assert!(ddl.contains("UNIQUE INDEX")); + assert!(ddl.contains("ux_provenance_chain")); + assert!(ddl.contains("(entity_id, previous_hash)")); + } + + /// V-L2-L1: chain_head table exists for per-entity write serialisation. + #[test] + fn test_provenance_table_has_chain_head() { + let ddl = generate_provenance_table(); + assert!(ddl.contains("verisimdb_provenance_chain_head")); + assert!(ddl.contains("head_hash")); + } + #[test] fn test_temporal_table_has_versioning() { let ddl = generate_temporal_table(); diff --git a/src/tier1/provenance.rs b/src/tier1/provenance.rs index 4886e18..283e7ee 100644 --- a/src/tier1/provenance.rs +++ b/src/tier1/provenance.rs @@ -1,57 +1,18 @@ // SPDX-License-Identifier: PMPL-1.0-or-later // Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) // -// Provenance tracking via SHA-256 hash chains. -// Write-path observer: records what happened, never changes what happened. - -use serde::{Deserialize, Serialize}; -use sha2::{Digest, Sha256}; - -/// A single link in the provenance hash chain. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ProvenanceRecord { - /// Hash of this record (SHA-256 of previous_hash + entity_id + operation + timestamp). - pub hash: String, - /// Hash of the previous record in the chain (empty string for genesis). - pub previous_hash: String, - /// Entity this record is about. - pub entity_id: String, - /// What happened: "create", "update", "delete", "transform". - pub operation: String, - /// Who did it (user, service, or system identifier). - pub actor: String, - /// When it happened. - pub timestamp: chrono::DateTime, - /// Optional: what the entity looked like before (for updates/deletes). - pub before_snapshot: Option, - /// Optional: transformation description (for derived data). - pub transformation: Option, -} +// Tier 1 provenance write-path helpers. +// +// Type definitions live in `crate::abi` — this module exists for the +// *write-path* code (V-L1-C1 onwards: hooking the target database, +// appending tamper-evident records to the sidecar). The duplicate +// `ProvenanceRecord` struct that previously lived here was removed +// in V-L2-N1 (it shadowed `abi::ProvenanceEntry` and risked drifting +// from the canonical hash function). +// +// Re-export the canonical type so existing `use crate::tier1::provenance::…` +// call sites continue to work. -impl ProvenanceRecord { - /// Compute the hash for this record, chaining from the previous hash. - pub fn compute_hash( - previous_hash: &str, - entity_id: &str, - operation: &str, - timestamp: &str, - ) -> String { - let mut hasher = Sha256::new(); - hasher.update(previous_hash.as_bytes()); - hasher.update(entity_id.as_bytes()); - hasher.update(operation.as_bytes()); - hasher.update(timestamp.as_bytes()); - format!("{:x}", hasher.finalize()) - } +pub use crate::abi::ProvenanceEntry; - /// Verify that this record's hash is consistent with its contents. - pub fn verify(&self) -> bool { - let expected = Self::compute_hash( - &self.previous_hash, - &self.entity_id, - &self.operation, - &self.timestamp.to_rfc3339(), - ); - self.hash == expected - } -} +// Write-path helpers (V-L2-L1) will land here. diff --git a/tests/integration_test.rs b/tests/integration_test.rs index 5cad7e1..2b81905 100644 --- a/tests/integration_test.rs +++ b/tests/integration_test.rs @@ -269,19 +269,27 @@ fn test_provenance_chain_integrity_multi_step() { assert_ne!(update1.hash, update2.hash); assert_ne!(update2.hash, delete.hash); - // Tamper detection: mutating any entry should break verification. + // Tamper detection: every hash-covered field must break verification + // when mutated (V-L2-C1, V-L2-C3, V-L2-C4). let mut tampered = update1.clone(); tampered.actor = "evil-mallory".to_string(); assert!( - tampered.verify(), - "Actor is not part of hash — tamper to actor alone is invisible" + !tampered.verify(), + "actor is part of the hash; tampering with it must break verify" ); - // But modifying a hash-covered field should be detected. + let mut tampered_op = update1.clone(); tampered_op.operation = "delete".to_string(); assert!( !tampered_op.verify(), - "Tampering with operation should break verification" + "tampering with operation must break verify" + ); + + let mut tampered_snap = update1.clone(); + tampered_snap.before_snapshot = Some("{}".into()); + assert!( + !tampered_snap.verify(), + "before_snapshot is part of the hash; tampering with it must break verify" ); }