From 149ec3988be73472e2fd5aa0092955e73b67a0a8 Mon Sep 17 00:00:00 2001 From: "Jonathan D.A. Jewell" <6759885+hyperpolymath@users.noreply.github.com> Date: Thu, 14 May 2026 15:30:25 +0100 Subject: [PATCH] fix(codegen): lineage table CHECK refuses self-references Closes #42. `verisimdb_lineage_graph` had no constraint preventing `(source_entity, source_table) == (target_entity, target_table)`. Any row inserted with matching source/target tuples is a self-loop and falsifies the README's "DAG" framing at the structural level. Add a CHECK constraint: CHECK (source_entity <> target_entity OR source_table <> target_table) i.e. at least one component of the (entity, table) pair must differ. Two entities with the same id across different tables (a legitimate cross-table derivation) still pass. Multi-hop cycle prevention is a runtime concern (any number of CHECKs can't catch chains); kept as a separate follow-up (V-L2-I2). Test `test_lineage_table_has_self_reference_check` asserts the exact CHECK clause appears in the emitted DDL with lineage enabled. `cargo clippy --all-targets -- -D warnings` clean; 35 unit tests pass. Co-Authored-By: Claude Opus 4.7 --- src/codegen/overlay.rs | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/src/codegen/overlay.rs b/src/codegen/overlay.rs index 6b6da6d..5af04e6 100644 --- a/src/codegen/overlay.rs +++ b/src/codegen/overlay.rs @@ -139,6 +139,10 @@ fn generate_provenance_table() -> String { /// Together, these edges form a DAG that can be traversed to answer /// "where did this data come from?" and "what is affected if this changes?" fn generate_lineage_table() -> String { + // The CHECK constraint refuses edges whose source and target are the + // same (entity, table) pair — i.e. self-loops, which would falsify + // the README's "DAG" claim at the structural level. Closes #42. + // (Multi-hop cycle prevention is a runtime concern tracked separately.) "-- Lineage: data derivation DAG\n\ CREATE TABLE IF NOT EXISTS verisimdb_lineage_graph (\n\ \x20 edge_id TEXT PRIMARY KEY,\n\ @@ -148,7 +152,8 @@ fn generate_lineage_table() -> String { \x20 target_table TEXT NOT NULL,\n\ \x20 derivation_type TEXT NOT NULL, -- copy, transform, aggregate, join, filter\n\ \x20 description TEXT,\n\ - \x20 created_at TEXT NOT NULL -- ISO 8601\n\ + \x20 created_at TEXT NOT NULL, -- ISO 8601\n\ + \x20 CHECK (source_entity <> target_entity OR source_table <> target_table)\n\ );\n\ CREATE INDEX IF NOT EXISTS idx_lineage_source ON verisimdb_lineage_graph(source_entity);\n\ CREATE INDEX IF NOT EXISTS idx_lineage_target ON verisimdb_lineage_graph(target_entity);\n\n" @@ -280,6 +285,29 @@ mod tests { assert!(ddl.contains("verisimdb_simulation_branches")); } + /// Lineage edges must refuse self-loops at the storage layer + /// (closes #42). The DAG claim in the README would be unenforced + /// without this check. + #[test] + fn test_lineage_table_has_self_reference_check() { + let schema = test_schema(); + let octad = OctadConfig { + enable_provenance: false, + enable_lineage: true, + enable_temporal: false, + enable_access_control: false, + enable_constraints: false, + enable_simulation: false, + }; + let ddl = generate_sidecar_schema(&schema, &octad); + assert!(ddl.contains("verisimdb_lineage_graph")); + // The exact CHECK clause must be present in the emitted DDL. + assert!( + ddl.contains("CHECK (source_entity <> target_entity OR source_table <> target_table)"), + "lineage table is missing the self-reference CHECK constraint" + ); + } + #[test] fn test_generate_minimal_dimensions() { let schema = test_schema();