From f227c60c70436a6c6ccce95f0a7cfd3c8f294b2b Mon Sep 17 00:00:00 2001 From: "Jonathan D.A. Jewell" <6759885+hyperpolymath@users.noreply.github.com> Date: Wed, 13 May 2026 03:00:38 +0200 Subject: [PATCH] =?UTF-8?q?prov:=20tamper-evident=20hash=20chain=20?= =?UTF-8?q?=E2=80=94=20V-L1-B1=20+=20V-L2-N1=20+=20V-L2-C1..C4=20+=20V-L2-?= =?UTF-8?q?L1..L2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Step 2 of the bottom-up plan. Brings the Provenance octad concern up to the claim made in the README: tampering with any audit-relevant field in a logged entry breaks `verify()`. V-L1-B1 — docs/theory/provenance-threat-model.adoc: Four-adversary model (R / SW / SR / SR+CK), per-adversary protection matrix, the field-coverage and canonical-encoding requirements that bind V-L2-C1 + V-L2-C2, the append-serialisation requirement that binds V-L2-L1 + V-L2-L2, anchor/notary future work, open questions (None vs Some(""), chain_id). Each Step 2 issue cites a section. V-L2-N1 — deduplicate ProvenanceRecord vs ProvenanceEntry: Delete src/tier1/provenance.rs::ProvenanceRecord (orphan duplicate of abi::ProvenanceEntry with its own compute_hash that risked drifting). tier1/provenance.rs now re-exports the canonical type; the file is the future home of V-L1-C1's write-path helpers (sqlite3_update_hook → append_provenance). TOPOLOGY.md updated. V-L2-C1 — full-field, domain-separated hash: compute_hash signature changes from (4 strs) to (5 strs + DateTime + 2 Options). New preimage = domain tag b"verisim-prov-v1\0" || length-prefixed (previous_hash, entity_id, operation, actor) || canonical timestamp (V-L2-C2) || length-prefixed (before_snapshot, transformation). All seven fields participate. PROV_DOMAIN_TAG versioning is reserved for a future SHA-256→? migration. verify(), genesis(), chain() all pass the full field set. V-L2-C2 — canonical timestamp: Replace timestamp.to_rfc3339() (multiple valid forms per instant) with i64_le(timestamp()) || u32_le(timestamp_subsec_nanos()), 12 bytes total. Round-trip unit test asserts two construction paths that yield the same instant produce the same hash. V-L2-C3 — positive tamper-detection tests: Eight new unit tests in abi::tests covering each hash-covered field (entity_id, actor, before_snapshot, transformation, operation, previous_hash, timestamp) plus the canonical-encoding property test plus a 4-entry chain mutation-matrix that asserts every field mutation on every entry breaks verify(). 9 new test cases (26 → 35 lib tests). V-L2-C4 — flip the wontfix test: tests/integration_test.rs::test_provenance_chain_integrity_multi_step previously codified the bug ("Actor is not part of hash — tamper to actor alone is invisible"). Replaced with assertions that tampering with actor and with before_snapshot both break verify(). V-L2-L1 — chain_head table + write-path serialisation spec: codegen/overlay.rs emits a new verisimdb_provenance_chain_head (entity_id PK, head_hash, updated_at) alongside the provenance log. The write-path lock (SELECT … FOR UPDATE / BEGIN IMMEDIATE on the head row, INSERT into log, UPDATE head, COMMIT) is specified in the threat-model doc and the table-generator docstring. The library function that performs the transaction is V-L1-C1's job; V-L2-L1 only lands the schema. V-L2-L2 — UNIQUE INDEX makes forks unrepresentable: CREATE UNIQUE INDEX IF NOT EXISTS ux_provenance_chain ON verisimdb_provenance_log(entity_id, previous_hash). Genesis rows all carry previous_hash='' so the same constraint enforces exactly one genesis per entity. Two new DDL tests assert presence of both the UNIQUE INDEX and the chain_head table. Verified locally: - cargo fmt --all -- --check clean - cargo clippy --all-targets -- -D warnings clean - cargo test reports 35 + 9 = 44 tests, 0 failed Closes #25, #26, #27, #28, #29, #30, #31, #32 Co-Authored-By: Claude Opus 4.7 --- docs/architecture/TOPOLOGY.md | 2 +- docs/theory/provenance-threat-model.adoc | 202 +++++++++++++++++++++++ src/abi/mod.rs | 195 ++++++++++++++++++++-- src/codegen/overlay.rs | 44 ++++- src/tier1/provenance.rs | 65 ++------ tests/integration_test.rs | 18 +- 6 files changed, 450 insertions(+), 76 deletions(-) create mode 100644 docs/theory/provenance-threat-model.adoc diff --git a/docs/architecture/TOPOLOGY.md b/docs/architecture/TOPOLOGY.md index 3609305..66827d9 100644 --- a/docs/architecture/TOPOLOGY.md +++ b/docs/architecture/TOPOLOGY.md @@ -12,7 +12,7 @@ verisimiser/ │ ├── src/manifest/ — TOML manifest parsing (verisimiser.toml) │ ├── src/tier1/ — Tier 1 piggyback data types │ │ ├── drift.rs — DriftReport, DriftCategory (8 categories) -│ │ ├── provenance.rs — ProvenanceRecord, SHA-256 hash chain +│ │ ├── provenance.rs — re-exports abi::ProvenanceEntry; future write-path helpers (V-L1-C1) │ │ └── temporal.rs — TemporalVersion, point-in-time snapshots │ ├── src/tier2/ — Tier 2 overlay stubs (graph, vector, tensor, semantic, document, spatial) │ ├── src/intercept/ — Per-backend interception strategies diff --git a/docs/theory/provenance-threat-model.adoc b/docs/theory/provenance-threat-model.adoc new file mode 100644 index 0000000..efd26f4 --- /dev/null +++ b/docs/theory/provenance-threat-model.adoc @@ -0,0 +1,202 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later +// Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) += Provenance threat model +:toc: left +:toclevels: 3 +:icons: font + +This document fixes what the Provenance concern's hash chain proves and +what it doesn't. It binds the design choices made in V-L2-C1, V-L2-C2, +V-L2-L1, V-L2-L2, and the ADR-0004 follow-up. + +Resolves: https://github.com/hyperpolymath/verisimiser/issues/25[V-L1-B1]. + +== Scope + +In scope:: the `Provenance` octad concern as implemented by +`ProvenanceEntry` in `src/abi/mod.rs` plus the sidecar table +`verisimdb_provenance_log` plus (post V-L2-L1) the `chain_head` table. + +Out of scope:: denial-of-service against the sidecar; side-channels +(timing, cache); tampering of the target database itself +(verisimiser only sees what its interceptors intercept); retroactive +provenance for pre-existing rows (the genesis entry for an entity +attests its existence at the moment verisimiser started observing it, +not before); cryptographic compromise of SHA-256. + +== Adversaries + +Four adversaries cover the relevant capability axes. Each is a +*lattice point*; real attackers combine capabilities. + +[cols="1,3"] +|=== +| Tag | Capability + +| **R** | Read-only — can read both the target database and the +sidecar. No write to either. Models: a forensic auditor; +a leaked replica; a debugging copy on a laptop. + +| **SW** | Sidecar-Write — can append new rows to +`verisimdb_provenance_log` and `verisimdb_temporal_versions` but +**cannot delete or rewrite existing rows**. Models: a sidecar +configured append-only (filesystem-level WORM, S3 Object Lock, +SQLite + revoked-DELETE/UPDATE permissions); also models a buggy +verisimiser daemon that double-writes. + +| **SR** | Sidecar-Rewrite — can rewrite or delete arbitrary rows +in the sidecar. Models: root on the sidecar host; compromised +application credential with full sidecar privileges; a backup +operator restoring an older sidecar snapshot. + +| **CK** | Clock-skew — can write entries (via SW or SR) with +timestamps that lie. Models: a system clock that drifts; an +adversary who controls the clock source; coordinated backdating. +|=== + +== Per-adversary protection matrix + +For each adversary, what the chain proves about each field: +**P** = protected (tampering detected), +**N** = not protected, +**C** = conditionally protected (see note). + +[cols="2,1,1,1,1"] +|=== +| Field | R | SW | SR | SR+CK + +| Genesis existence / order | P | P | N | N +| `previous_hash` of any entry | P | P | C-1 | C-1 +| `entity_id` of any entry | P | P | C-1 | C-1 +| `operation` of any entry | P | P | C-1 | C-1 +| `actor` of any entry | P | P | C-1 | C-1 +| `timestamp` of any entry | P | P | C-1 | N (CK falsifies) +| `before_snapshot` of any entry | P | P | C-1 | C-1 +| `transformation` of any entry | P | P | C-1 | C-1 +| Absence of an entry | C-2 | C-2 | N | N +| Total ordering across entities | N | N | N | N +|=== + +**C-1** — under SR (or SR+CK), the adversary can rewrite an +arbitrary suffix of the chain (recomputing hashes as they go). What's +preserved against SR is **only the prefix up to the most-recent +externally attested hash** (e.g. a hash periodically signed by an +out-of-band notary, anchored to an append-only log, or published to +a transparency service). Without an external anchor, the chain +proves *nothing* against SR. + +**C-2** — absence is provable only if every legitimate append goes +through verisimiser. Direct writes to the target database that +bypass interception are invisible to the chain; the chain cannot +attest to what it never saw. + +== Field coverage requirement + +A direct consequence of C-1 / C-2 and the per-adversary matrix: + +[NOTE] +==== +Every field that an auditor will rely on for forensic purposes +**must** participate in the hash. `actor`, `before_snapshot`, and +`transformation` are all such fields — they are the audit. If they +are not in the preimage, the chain protects them against R and SW +only by *coincidence* (because the row itself was hash-keyed in the +DB), not by design. + +This document therefore *requires* V-L2-C1: the preimage must cover +`previous_hash`, `entity_id`, `operation`, `actor`, `timestamp`, +`before_snapshot`, `transformation`. Any future field added to +`ProvenanceEntry` must either be added to the preimage or +explicitly recorded here with a justification for its omission. +==== + +== Canonical encoding requirement + +A direct consequence of "the hash protects the field" being a +*function*, not a relation: + +[NOTE] +==== +Two distinct preimages must produce distinct hashes (collision +resistance is SHA-256's job). Two *equal* preimages must produce +equal hashes (canonicalisation is our job). The encoding must: + +. Domain-separate verisimiser provenance hashes from any other +hash the system computes (`b"verisim-prov-v1\0"`). +. Length-prefix variable-length fields so concatenation is +unambiguous. +. Use a canonical timestamp encoding (V-L2-C2: + `i64_le(secs) || u32_le(nanos)`), not a string representation that + admits multiple valid forms for the same instant. +==== + +== Append serialisation requirement + +A direct consequence of "previous_hash chains entries linearly": + +[NOTE] +==== +Two writers cannot independently chain from the same `previous_hash` +without forking the chain. Verisimiser must serialise appends +per-entity. V-L2-L1 specifies the write-path lock; V-L2-L2 specifies +the database UNIQUE constraint that makes forks structurally +impossible even if the lock is bypassed. + +The chain is *per-entity-serial* but *cross-entity-parallel*. +A global serial order across entities is *not* a requirement +(see "Total ordering" in the matrix above). +==== + +== Anchor / notary (future) + +Protection against SR requires an *external anchor* that the +adversary cannot rewrite. Options, none of which this document +mandates yet: + +. **Periodic notarisation** — every N minutes, sign the latest +chain_head with a key not held on the sidecar host, and publish +the signature to an out-of-band log. +. **Transparency log** — submit each `chain_head` update to an +external append-only log (Sigstore-style). +. **Replication to immutable storage** — write each new entry to +S3 Object Lock (or equivalent) as a defence in depth. + +The threat model leaves the choice for ADR-0005 once a deployment +context exists. + +== Out-of-band assumptions + +. The sidecar host's clock is monotonic and within bounded skew of +real time. Without this, all timestamps are advisory (see CK in the +matrix). +. Verisimiser's process integrity is assumed — a verisimiser binary +that has been swapped for a malicious one can produce a hash-chain +that verifies against itself but attests to nothing real. Binary +provenance is a separate concern (out of scope here). +. SHA-256 is collision-resistant in the cryptographic sense for the +lifetime of the audit window. + +== Open questions + +. Should `Option` fields (`before_snapshot`, `transformation`) +encode `None` vs `Some("")` distinctly? The current proposal collapses +them (both encode as `u64_le(0)` length). Document explicitly that +the chain treats "no snapshot" and "empty snapshot" identically; if a +future use case requires distinguishing them, a single sentinel byte +(`0x00` for None, `0x01` for Some) prefixed inside the length-prefixed +slot resolves it. +. Should the chain include an explicit `chain_id` covering all of an +entity's entries (in addition to chaining via `previous_hash`)? Cheap +defence in depth against entity_id confusion; defer to ADR-0004. + +== Cross-references + +* V-L2-C1 — implements the field coverage + domain separation +* V-L2-C2 — implements canonical timestamp encoding +* V-L2-C3 — positive tamper-detection tests +* V-L2-C4 — removes the wontfix test that codified the C-1 gap +* V-L2-L1 — per-entity write-path serialisation +* V-L2-L2 — UNIQUE INDEX(entity_id, previous_hash) defence in depth +* V-L2-N1 — deduplicates the type used here (ProvenanceEntry vs + ProvenanceRecord) +* ADR-0004 (future) — records the binding choices made here diff --git a/src/abi/mod.rs b/src/abi/mod.rs index b50c83b..d21e3c0 100644 --- a/src/abi/mod.rs +++ b/src/abi/mod.rs @@ -161,34 +161,79 @@ pub struct ProvenanceEntry { pub transformation: Option, } +/// Domain-separation tag for verisimiser provenance hashes (V-L2-C1). +/// +/// Bumping the version suffix (`v1` -> `v2`) constitutes a hash-algorithm +/// migration: existing chains keep verifying with the old tag, new +/// entries use the new tag, and `verify()` dispatches on the stored tag. +/// (No migration is currently planned; the tag exists for future-proofing.) +const PROV_DOMAIN_TAG: &[u8] = b"verisim-prov-v1\0"; + impl ProvenanceEntry { - /// Compute the SHA-256 hash for a provenance entry, chaining from the previous hash. + /// Compute the SHA-256 hash for a provenance entry (V-L2-C1, V-L2-C2). + /// + /// Preimage = domain tag || length-prefixed fields || canonical timestamp: /// - /// The hash covers: previous_hash, entity_id, operation, and timestamp. - /// This ensures that any tampering with the chain is detectable. + /// ```text + /// SHA-256( + /// "verisim-prov-v1\0" + /// || u64_le(len(previous_hash)) || previous_hash + /// || u64_le(len(entity_id)) || entity_id + /// || u64_le(len(operation)) || operation + /// || u64_le(len(actor)) || actor + /// || i64_le(timestamp.timestamp()) + /// || u32_le(timestamp.timestamp_subsec_nanos()) + /// || u64_le(len(before_snapshot.unwrap_or(""))) + /// || before_snapshot.unwrap_or("") + /// || u64_le(len(transformation.unwrap_or(""))) + /// || transformation.unwrap_or("") + /// ) + /// ``` + /// + /// All seven fields participate, so tampering with any of them is + /// detectable. See `docs/theory/provenance-threat-model.adoc` for the + /// adversary matrix and `docs/decisions/ADR-0004` (forthcoming) for + /// the binding choices. pub fn compute_hash( previous_hash: &str, entity_id: &str, operation: &str, - timestamp: &str, + actor: &str, + timestamp: &DateTime, + before_snapshot: Option<&str>, + transformation: Option<&str>, ) -> String { + fn write_lp(hasher: &mut Sha256, bytes: &[u8]) { + hasher.update((bytes.len() as u64).to_le_bytes()); + hasher.update(bytes); + } let mut hasher = Sha256::new(); - hasher.update(previous_hash.as_bytes()); - hasher.update(entity_id.as_bytes()); - hasher.update(operation.as_bytes()); - hasher.update(timestamp.as_bytes()); + hasher.update(PROV_DOMAIN_TAG); + write_lp(&mut hasher, previous_hash.as_bytes()); + write_lp(&mut hasher, entity_id.as_bytes()); + write_lp(&mut hasher, operation.as_bytes()); + write_lp(&mut hasher, actor.as_bytes()); + hasher.update(timestamp.timestamp().to_le_bytes()); + hasher.update(timestamp.timestamp_subsec_nanos().to_le_bytes()); + write_lp(&mut hasher, before_snapshot.unwrap_or("").as_bytes()); + write_lp(&mut hasher, transformation.unwrap_or("").as_bytes()); format!("{:x}", hasher.finalize()) } - /// Verify that this entry's hash is consistent with its contents. + /// Verify that this entry's hash is consistent with all of its contents. /// - /// Returns `true` if the stored hash matches the recomputed hash. + /// Returns `true` iff the stored hash matches the recomputed hash over + /// the full field set (previous_hash, entity_id, operation, actor, + /// timestamp, before_snapshot, transformation). pub fn verify(&self) -> bool { let expected = Self::compute_hash( &self.previous_hash, &self.entity_id, &self.operation, - &self.timestamp.to_rfc3339(), + &self.actor, + &self.timestamp, + self.before_snapshot.as_deref(), + self.transformation.as_deref(), ); self.hash == expected } @@ -196,7 +241,7 @@ impl ProvenanceEntry { /// Create a new genesis entry (first in the chain for an entity). pub fn genesis(entity_id: &str, actor: &str) -> Self { let timestamp = Utc::now(); - let hash = Self::compute_hash("", entity_id, "insert", ×tamp.to_rfc3339()); + let hash = Self::compute_hash("", entity_id, "insert", actor, ×tamp, None, None); Self { hash, previous_hash: String::new(), @@ -216,7 +261,10 @@ impl ProvenanceEntry { &self.hash, &self.entity_id, operation, - ×tamp.to_rfc3339(), + actor, + ×tamp, + None, + None, ); Self { hash, @@ -491,11 +539,126 @@ mod tests { } #[test] - fn test_provenance_tamper_detection() { + fn test_provenance_tamper_entity_id() { let mut entry = ProvenanceEntry::genesis("entity-1", "system"); - // Tamper with the entity_id after hash computation. entry.entity_id = "entity-2".to_string(); - assert!(!entry.verify(), "Tampered entry should fail verification"); + assert!( + !entry.verify(), + "tampering with entity_id must break verify" + ); + } + + /// V-L2-C3: actor is hashed; tampering with it must be detected. + #[test] + fn test_provenance_tamper_actor() { + let mut entry = ProvenanceEntry::genesis("entity-1", "alice"); + entry.actor = "mallory".to_string(); + assert!(!entry.verify(), "tampering with actor must break verify"); + } + + /// V-L2-C3: before_snapshot is hashed; tampering with it must be detected. + #[test] + fn test_provenance_tamper_before_snapshot() { + let mut entry = ProvenanceEntry::genesis("entity-1", "alice"); + // Adding a snapshot (None -> Some) should break the original hash. + entry.before_snapshot = Some("{\"redacted\":true}".to_string()); + assert!( + !entry.verify(), + "tampering with before_snapshot must break verify" + ); + } + + /// V-L2-C3: transformation is hashed; tampering with it must be detected. + #[test] + fn test_provenance_tamper_transformation() { + let mut entry = ProvenanceEntry::genesis("entity-1", "alice"); + entry.transformation = Some("evil-rewrite".to_string()); + assert!( + !entry.verify(), + "tampering with transformation must break verify" + ); + } + + /// V-L2-C3: operation is hashed; tampering with it must be detected. + #[test] + fn test_provenance_tamper_operation() { + let mut entry = ProvenanceEntry::genesis("entity-1", "alice"); + entry.operation = "delete".to_string(); + assert!( + !entry.verify(), + "tampering with operation must break verify" + ); + } + + /// V-L2-C3: previous_hash is hashed; tampering with it must be detected. + #[test] + fn test_provenance_tamper_previous_hash() { + let genesis = ProvenanceEntry::genesis("entity-1", "alice"); + let mut update = genesis.chain("update", "bob"); + update.previous_hash = "deadbeef".to_string(); + assert!( + !update.verify(), + "tampering with previous_hash must break verify" + ); + } + + /// V-L2-C2: hash depends on the canonical (i64+u32) timestamp encoding, + /// not on a string representation that might vary. Two `DateTime` + /// values that represent the same instant — one parsed from RFC3339, + /// one constructed via `from_timestamp` — must produce the same hash. + #[test] + fn test_provenance_hash_timestamp_canonical() { + let parsed: DateTime = "2026-05-13T08:00:00.000000000Z".parse().unwrap(); + let built = DateTime::::from_timestamp(parsed.timestamp(), 0).unwrap(); + assert_eq!( + parsed, built, + "construction paths must yield equal instants" + ); + + let h1 = ProvenanceEntry::compute_hash("", "e1", "insert", "alice", &parsed, None, None); + let h2 = ProvenanceEntry::compute_hash("", "e1", "insert", "alice", &built, None, None); + assert_eq!( + h1, h2, + "canonical timestamp encoding must be path-independent" + ); + } + + /// V-L2-C3: round-trip — build a chain of N entries and assert every + /// mutation of every field breaks verification. + #[test] + fn test_provenance_chain_round_trip_mutation_matrix() { + let g = ProvenanceEntry::genesis("post-7", "system"); + let u1 = g.chain("update", "alice"); + let u2 = u1.chain("update", "bob"); + let d = u2.chain("delete", "alice"); + for entry in [&g, &u1, &u2, &d] { + assert!(entry.verify(), "every legitimate entry must verify"); + } + + for original in [&g, &u1, &u2, &d] { + // Permute each hash-covered field and assert verify fails. + for mutate in [ + |e: &mut ProvenanceEntry| e.actor.push_str("-tamper"), + |e: &mut ProvenanceEntry| e.entity_id.push_str("-tamper"), + |e: &mut ProvenanceEntry| e.operation.push_str("-tamper"), + |e: &mut ProvenanceEntry| { + e.previous_hash = "00".repeat(32); + }, + |e: &mut ProvenanceEntry| { + e.timestamp += chrono::Duration::nanoseconds(1); + }, + |e: &mut ProvenanceEntry| { + e.before_snapshot = Some("tampered".into()); + }, + |e: &mut ProvenanceEntry| { + e.transformation = Some("tampered".into()); + }, + ] { + let mut clone = original.clone(); + mutate(&mut clone); + assert!(!clone.verify(), "field mutation must break verification"); + } + } } #[test] diff --git a/src/codegen/overlay.rs b/src/codegen/overlay.rs index 1d1ea02..0d557a9 100644 --- a/src/codegen/overlay.rs +++ b/src/codegen/overlay.rs @@ -114,7 +114,13 @@ fn generate_metadata_table(schema: &ParsedSchema) -> String { /// /// Stores a SHA-256 hash-chained audit trail of all data modifications. /// Each row chains to its predecessor via `previous_hash`, forming an -/// append-only, tamper-evident log. +/// append-only, tamper-evident log (see +/// `docs/theory/provenance-threat-model.adoc`). +/// +/// The `chain_head` table is the per-entity head pointer used for the +/// write-path lock (V-L2-L1). The UNIQUE INDEX on `(entity_id, +/// previous_hash)` (V-L2-L2) makes chain forks structurally impossible +/// — defence in depth for if the lock is ever bypassed. fn generate_provenance_table() -> String { "-- Provenance: SHA-256 hash-chained audit trail\n\ CREATE TABLE IF NOT EXISTS verisimdb_provenance_log (\n\ @@ -128,8 +134,24 @@ fn generate_provenance_table() -> String { \x20 before_snapshot TEXT, -- JSON of entity state before operation\n\ \x20 transformation TEXT -- description of transformation applied\n\ );\n\ + -- V-L2-L2: forbid chain forks at the DB level. Genesis records all\n\ + -- carry previous_hash='' so this also enforces a single genesis per\n\ + -- entity.\n\ + CREATE UNIQUE INDEX IF NOT EXISTS ux_provenance_chain\n\ + \x20 ON verisimdb_provenance_log(entity_id, previous_hash);\n\ CREATE INDEX IF NOT EXISTS idx_provenance_entity ON verisimdb_provenance_log(entity_id);\n\ - CREATE INDEX IF NOT EXISTS idx_provenance_table ON verisimdb_provenance_log(table_name);\n\n" + CREATE INDEX IF NOT EXISTS idx_provenance_table ON verisimdb_provenance_log(table_name);\n\ + \n\ + -- V-L2-L1: per-entity head pointer. The write path takes a row\n\ + -- lock here (SELECT … FOR UPDATE / BEGIN IMMEDIATE) so concurrent\n\ + -- appenders on the same entity serialise; cross-entity appends\n\ + -- remain parallel. Each successful append updates head_hash in\n\ + -- the same transaction as the INSERT into verisimdb_provenance_log.\n\ + CREATE TABLE IF NOT EXISTS verisimdb_provenance_chain_head (\n\ + \x20 entity_id TEXT PRIMARY KEY,\n\ + \x20 head_hash TEXT NOT NULL,\n\ + \x20 updated_at TEXT NOT NULL\n\ + );\n\n" .to_string() } @@ -321,6 +343,24 @@ mod tests { assert!(ddl.contains("actor")); } + /// V-L2-L2: forks are forbidden by a UNIQUE INDEX on + /// (entity_id, previous_hash). + #[test] + fn test_provenance_table_has_unique_chain_index() { + let ddl = generate_provenance_table(); + assert!(ddl.contains("UNIQUE INDEX")); + assert!(ddl.contains("ux_provenance_chain")); + assert!(ddl.contains("(entity_id, previous_hash)")); + } + + /// V-L2-L1: chain_head table exists for per-entity write serialisation. + #[test] + fn test_provenance_table_has_chain_head() { + let ddl = generate_provenance_table(); + assert!(ddl.contains("verisimdb_provenance_chain_head")); + assert!(ddl.contains("head_hash")); + } + #[test] fn test_temporal_table_has_versioning() { let ddl = generate_temporal_table(); diff --git a/src/tier1/provenance.rs b/src/tier1/provenance.rs index 4886e18..283e7ee 100644 --- a/src/tier1/provenance.rs +++ b/src/tier1/provenance.rs @@ -1,57 +1,18 @@ // SPDX-License-Identifier: PMPL-1.0-or-later // Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) // -// Provenance tracking via SHA-256 hash chains. -// Write-path observer: records what happened, never changes what happened. - -use serde::{Deserialize, Serialize}; -use sha2::{Digest, Sha256}; - -/// A single link in the provenance hash chain. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ProvenanceRecord { - /// Hash of this record (SHA-256 of previous_hash + entity_id + operation + timestamp). - pub hash: String, - /// Hash of the previous record in the chain (empty string for genesis). - pub previous_hash: String, - /// Entity this record is about. - pub entity_id: String, - /// What happened: "create", "update", "delete", "transform". - pub operation: String, - /// Who did it (user, service, or system identifier). - pub actor: String, - /// When it happened. - pub timestamp: chrono::DateTime, - /// Optional: what the entity looked like before (for updates/deletes). - pub before_snapshot: Option, - /// Optional: transformation description (for derived data). - pub transformation: Option, -} +// Tier 1 provenance write-path helpers. +// +// Type definitions live in `crate::abi` — this module exists for the +// *write-path* code (V-L1-C1 onwards: hooking the target database, +// appending tamper-evident records to the sidecar). The duplicate +// `ProvenanceRecord` struct that previously lived here was removed +// in V-L2-N1 (it shadowed `abi::ProvenanceEntry` and risked drifting +// from the canonical hash function). +// +// Re-export the canonical type so existing `use crate::tier1::provenance::…` +// call sites continue to work. -impl ProvenanceRecord { - /// Compute the hash for this record, chaining from the previous hash. - pub fn compute_hash( - previous_hash: &str, - entity_id: &str, - operation: &str, - timestamp: &str, - ) -> String { - let mut hasher = Sha256::new(); - hasher.update(previous_hash.as_bytes()); - hasher.update(entity_id.as_bytes()); - hasher.update(operation.as_bytes()); - hasher.update(timestamp.as_bytes()); - format!("{:x}", hasher.finalize()) - } +pub use crate::abi::ProvenanceEntry; - /// Verify that this record's hash is consistent with its contents. - pub fn verify(&self) -> bool { - let expected = Self::compute_hash( - &self.previous_hash, - &self.entity_id, - &self.operation, - &self.timestamp.to_rfc3339(), - ); - self.hash == expected - } -} +// Write-path helpers (V-L2-L1) will land here. diff --git a/tests/integration_test.rs b/tests/integration_test.rs index 5cad7e1..2b81905 100644 --- a/tests/integration_test.rs +++ b/tests/integration_test.rs @@ -269,19 +269,27 @@ fn test_provenance_chain_integrity_multi_step() { assert_ne!(update1.hash, update2.hash); assert_ne!(update2.hash, delete.hash); - // Tamper detection: mutating any entry should break verification. + // Tamper detection: every hash-covered field must break verification + // when mutated (V-L2-C1, V-L2-C3, V-L2-C4). let mut tampered = update1.clone(); tampered.actor = "evil-mallory".to_string(); assert!( - tampered.verify(), - "Actor is not part of hash — tamper to actor alone is invisible" + !tampered.verify(), + "actor is part of the hash; tampering with it must break verify" ); - // But modifying a hash-covered field should be detected. + let mut tampered_op = update1.clone(); tampered_op.operation = "delete".to_string(); assert!( !tampered_op.verify(), - "Tampering with operation should break verification" + "tampering with operation must break verify" + ); + + let mut tampered_snap = update1.clone(); + tampered_snap.before_snapshot = Some("{}".into()); + assert!( + !tampered_snap.verify(), + "before_snapshot is part of the hash; tampering with it must break verify" ); }