diff --git a/docs/INVESTIGATOR-TOOLKIT.md b/docs/INVESTIGATOR-TOOLKIT.md
new file mode 100644
index 0000000..56bc35e
--- /dev/null
+++ b/docs/INVESTIGATOR-TOOLKIT.md
@@ -0,0 +1,281 @@
+# Docudactyl — Investigator Toolkit
+
+<!--
+SPDX-License-Identifier: PMPL-1.0-or-later
+Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) <j.d.a.jewell@open.ac.uk>
+-->
+
+This document describes the **investigator-focused extraction modules** added
+to Docudactyl for use by citizen journalists, independent researchers, and
+investigative reporters working with large document releases such as the
+Epstein filings, Panama/Paradise/Pandora Papers, FinCEN Files, etc.
+
+These modules are **standalone Zig translation units** with stable C-ABI
+entry points. They can be called directly from Chapel, Rust, Julia, OCaml,
+Python `ctypes`, or any language that speaks the C FFI — without going
+through the full HPC pipeline.
+
+---
+
+## Why these modules exist
+
+The base HPC pipeline answers "what is in this document?". Investigative
+journalism needs different questions:
+
+1. **Who appears with whom, how often?** → [`entity_graph`](#entity-graph)
+2. **What was hidden under black bars?** → [`redaction_recovery`](#redaction-recovery)
+3. **Where did the jet actually go?** → [`flight_log`](#flight-log)
+4. **Where did the witness stop answering?** → [`evasion_detect`](#evasion-detect)
+5. **What does this document contain, at a glance?** → [`investigator_summary`](#investigator-summary)
+
+Each module is **pattern-based, no ML dependency**, fast, and deterministic.
+
+---
+
+## Entity Graph
+
+**File:** `ffi/zig/src/entity_graph.zig`
+
+Builds a cross-document co-occurrence graph of capitalised personal names.
+Exports to **GraphML** (Gephi, yEd, Cytoscape) and **CSV** (Excel,
+LibreOffice, Maltego, Neo4j).
+
+### C ABI
+
+```c
+typedef struct EntityGraph EntityGraph;
+
+EntityGraph* ddac_entity_graph_new(void);
+void         ddac_entity_graph_free(EntityGraph*);
+
+int          ddac_entity_graph_add_document(EntityGraph*, const char* text, size_t len);
+int          ddac_entity_graph_export_graphml(EntityGraph*, const char* path);
+int          ddac_entity_graph_export_csv(EntityGraph*, const char* path);
+
+uint32_t     ddac_entity_graph_node_count(EntityGraph*);
+uint32_t     ddac_entity_graph_edge_count(EntityGraph*);
+```
+
+### Usage sketch
+
+```chapel
+// Chapel pseudocode
+var g = ddac_entity_graph_new();
+for doc in manifest {
+  var text = readExtractedText(doc);
+  ddac_entity_graph_add_document(g, text.c_str(), text.len);
+}
+ddac_entity_graph_export_graphml(g, "/out/entities.graphml");
+ddac_entity_graph_export_csv(g,     "/out/entities.csv");
+ddac_entity_graph_free(g);
+```
+
+Load `entities.graphml` in Gephi → run ForceAtlas2 → see the cluster
+structure. Load `entities.csv` in any spreadsheet to sort by weight.
+
+### Notes
+- Extracts 2+ consecutive capitalised words, optionally preceded by a
+  title (Mr./Mrs./Dr./Prince/Sir/…).
+- Filters a conservative stopword list (weekdays, months, common
+  sentence-initial words).
+- Edge weight accumulates across documents — high weight indicates
+  recurring co-occurrence worth examining.
+
+---
+
+## Redaction Recovery
+
+**File:** `ffi/zig/src/redaction_recovery.zig`
+
+Extends the base redaction-detection stage with **per-page density maps**
+and **overlay-only text recovery**. When a PDF carries `/Redact`
+annotations (black boxes) but the underlying content stream is intact,
+this module extracts the text that the overlay was meant to hide.
+
+### When it works
+
+✅ Overlay redactions where the text stream was NOT scrubbed (common
+FOIA failure mode — many Epstein-era productions exhibit this).
+❌ Redactions that rasterise the page or strip the content stream.
+❌ Redactions applied at scan time (pixel-level black bars).
+
+### C ABI
+
+```c
+typedef struct {
+  int      status;
+  uint32_t total_pages;
+  uint32_t total_redactions;
+  uint32_t pages_with_redactions;
+  uint32_t recoverable_pages;
+  uint64_t recovered_bytes;
+  PageStats pages[4096];
+  char     summary[512];
+} RedactionRecoveryResult;
+
+int ddac_redaction_recovery_analyze(const char* pdf_path, RedactionRecoveryResult*);
+int ddac_redaction_recovery_dump_text(const char* pdf_path, const char* out_path);
+```
+
+### Legal & ethical note
+
+This module extracts text that is **already present** in the document's
+content stream — the same text that `cmd-A, cmd-C` in Preview would
+reveal. It does not break encryption, does not OCR under pixel-level
+redactions, and does not decode protected content. Use responsibly and
+check your local jurisdiction's rules on reporting improperly redacted
+material.
+
+---
+
+## Flight Log
+
+**File:** `ffi/zig/src/flight_log.zig`
+
+Extracts travel-document entities from text:
+
+| Entity | Examples |
+|--------|----------|
+| Tail numbers | `N908JE`, `N212JE`, `G-EJES`, `D-IIKA` |
+| IATA codes  | `TEB`, `PBI`, `JFK`, `STT`, `LHR`, `CDG`, `DXB` |
+| ICAO codes  | `KTEB`, `KPBI`, `KJFK`, `EGLL`, `LFPB` |
+| Phones      | `(212) 555-1234`, `+1 212 555 9999`, `+44 20 7946 0958` |
+| Addresses   | Line-leading number + road-word heuristic |
+| Manifest markers | `PAX:`, `PASSENGERS:`, `MANIFEST:`, `GUESTS:` |
+
+### C ABI
+
+```c
+typedef struct { /* ... */ } FlightLogResult;
+int ddac_flight_log_process(const char* text, size_t len, FlightLogResult*);
+```
+
+### Notes
+- IATA/ICAO codes use a **whitelist** of airports of interest
+  (Teterboro, Palm Beach, St. Thomas, Le Bourget, Heathrow, Dubai,
+  etc.) to avoid false positives on three-letter acronyms like `CEO`
+  or `FBI`. Extend the whitelist in the module source as needed.
+- Tail-number pattern is liberal enough to tolerate OCR noise but
+  tight enough to reject ordinary words.
+
+---
+
+## Evasion Detect
+
+**File:** `ffi/zig/src/evasion_detect.zig`
+
+Detects and categorises evasive / non-answer patterns in deposition and
+interview transcripts:
+
+| Category | Example phrase |
+|----------|---------------|
+| `no_recall` | "I don't recall", "I have no recollection" |
+| `no_memory` | "I don't remember", "I can't remember" |
+| `not_sure`  | "I'm not sure", "I couldn't say" |
+| `no_knowledge` | "Not to my knowledge", "I'm not aware" |
+| `would_check` | "I'd have to check", "I would need to check" |
+| `asked_answered` | "Asked and answered" (lawyer interjection) |
+| `fifth_amendment` | "Fifth Amendment", "on the advice of counsel" |
+| `decline_answer` | "I decline to answer", "refuse to answer" |
+
+Reports category counts, total events, and an **evasion rate** (events
+per 1000 tokens, fixed-point ×1000). A rate above ~20 (i.e. `×1000 >
+20000`) typically indicates a heavily evasive witness segment.
+
+### C ABI
+
+```c
+typedef struct {
+  int      status;
+  uint32_t category_counts[8];
+  uint32_t total_events;
+  uint32_t total_tokens;
+  uint32_t events_per_1k_fixed;   // rate × 1000
+  char     summary[512];
+} EvasionResult;
+
+int ddac_evasion_detect(const char* text, size_t len, EvasionResult*);
+```
+
+---
+
+## Investigator Summary
+
+**File:** `ffi/zig/src/investigator_summary.zig`
+
+Takes a populated `InvestigatorSummary` struct and emits an
+investigator-friendly **JSON summary** per document. Designed to be
+readable in a text editor and ingestible by spreadsheets, dataset
+browsers, or static-site generators.
+
+The JSON is flat and forgiving: any field may be zero/empty without
+breaking consumers. A `flags` array gives quick visual triage:
+
+```json
+{
+  "source_path": "/data/release_2024/doc_0042.pdf",
+  "sha256": "...",
+  "page_count": 184,
+  "redactions": {"count": 12, "pages_affected": 4, "recoverable_pages": 2},
+  "financial": {"amounts": 3, "accounts": 1},
+  "legal":     {"case_citations": 5, "dockets": 2, "statutes": 1},
+  "speakers":  {"count": 2, "is_deposition": true},
+  "evasion":   {"total": 17, "per_1k_tokens": 12.5},
+  "entities": {
+    "persons":       ["Jeffrey Epstein", "Ghislaine Maxwell"],
+    "tail_numbers":  ["N908JE"],
+    "airports":      ["TEB", "PBI", "KTEB"],
+    "phones":        ["+1 212 555 1234"],
+    "addresses":     ["9 East 71st Street"]
+  },
+  "flags": ["has_redactions", "has_recoverable_text", "deposition", "high_evasion"]
+}
+```
+
+### C ABI
+
+```c
+int ddac_investigator_summary_write(const char* out_path, const InvestigatorSummary*);
+int ddac_investigator_summary_set_list_item(StringList*, uint32_t idx,
+                                            const char* text, size_t len);
+```
+
+---
+
+## Recommended Investigator Workflow
+
+1. **Run base pipeline** (`DocudactylHPC`) to extract text + SHA + PREMIS.
+2. **Per-document pass** — for each extracted text file, call:
+   - `ddac_flight_log_process`     → flight / travel entities
+   - `ddac_evasion_detect`         → deposition evasion stats
+   - `ddac_redaction_recovery_analyze` + `_dump_text` (PDFs only)
+3. **Corpus-wide pass** — accumulate entities into a single graph:
+   - `ddac_entity_graph_new`
+   - `ddac_entity_graph_add_document` per document
+   - `ddac_entity_graph_export_graphml` + `_export_csv`
+4. **Summary pass** — populate `InvestigatorSummary` from the results
+   above and emit per-document JSON with
+   `ddac_investigator_summary_write`.
+5. **Review** — open the GraphML in Gephi, the CSV in a spreadsheet, and
+   the per-document JSONs in a text editor or a static-site browser.
+
+---
+
+## Building & Testing
+
+From `ffi/zig/`:
+
+```bash
+zig build              # build shared + static libraries
+zig build test         # run unit tests (all new modules included)
+```
+
+All new modules have accompanying Zig unit tests. No new C dependencies
+beyond Poppler + GLib (which Docudactyl already links).
+
+---
+
+## License
+
+All new modules are released under **PMPL-1.0-or-later** (with MPL-2.0
+fallback), matching the rest of Docudactyl.
diff --git a/ffi/zig/src/docudactyl_ffi.zig b/ffi/zig/src/docudactyl_ffi.zig
index 22425df..e97a0a1 100644
--- a/ffi/zig/src/docudactyl_ffi.zig
+++ b/ffi/zig/src/docudactyl_ffi.zig
@@ -28,6 +28,12 @@ const financial_extract = @import("financial_extract.zig");
 const speaker_id = @import("speaker_id.zig");
 const reextract = @import("reextract.zig");
 const quality_stats = @import("quality_stats.zig");
+// ── Investigator-Focused Modules (citizen & investigative journalism) ──
+const flight_log = @import("flight_log.zig");
+const entity_graph = @import("entity_graph.zig");
+const redaction_recovery = @import("redaction_recovery.zig");
+const evasion_detect = @import("evasion_detect.zig");
+const investigator_summary = @import("investigator_summary.zig");
 
 // Ensure submodule exports are included in the shared library
 comptime {
@@ -43,6 +49,11 @@ comptime {
     _ = speaker_id;
     _ = reextract;
     _ = quality_stats;
+    _ = flight_log;
+    _ = entity_graph;
+    _ = redaction_recovery;
+    _ = evasion_detect;
+    _ = investigator_summary;
 }
 
 const c = @cImport({
diff --git a/ffi/zig/src/entity_graph.zig b/ffi/zig/src/entity_graph.zig
new file mode 100644
index 0000000..124376d
--- /dev/null
+++ b/ffi/zig/src/entity_graph.zig
@@ -0,0 +1,509 @@
+// SPDX-License-Identifier: PMPL-1.0-or-later
+// Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) <j.d.a.jewell@open.ac.uk>
+// Docudactyl — Entity Co-Occurrence Graph Builder
+//
+// Builds a person-entity co-occurrence graph from extracted document text
+// and emits it as GraphML for visualisation in Gephi, yEd, Cytoscape, etc.
+//
+// Use case:
+//   The Epstein corpus is a network-analysis problem as much as it is a
+//   text-analysis problem. Investigators need to see "who appears with
+//   whom, how often, and in what context" across thousands of documents.
+//   Per-document analysis cannot reveal that — a cross-document graph can.
+//
+// Pipeline:
+//   1. ddac_entity_graph_new()                  -> graph handle
+//   2. ddac_entity_graph_add_document(...)      -> repeat per doc
+//   3. ddac_entity_graph_export_graphml(...)    -> write .graphml
+//   4. ddac_entity_graph_free(handle)
+//
+// Entity detection (rule-based — no ML dependency):
+//   - Capitalised multi-word names: "Jeffrey Epstein", "Ghislaine Maxwell"
+//   - Titled names: "Prince Andrew", "Dr. Anthony Fauci"
+//   - Common title prefixes filtered as nodes, recorded as attributes
+//
+// Edge semantics:
+//   - An edge between A and B means A and B co-occurred in the same
+//     document (optionally within a sentence/paragraph window).
+//   - Edge weight = number of documents (or windows) in which both appeared.
+//
+
+const std = @import("std");
+
+// ============================================================================
+// Public Types
+// ============================================================================
+
+pub const EntityGraphStatus = enum(c_int) {
+    ok = 0,
+    allocation_error = 1,
+    invalid_handle = 2,
+    write_error = 3,
+    no_text = 4,
+};
+
+pub const MAX_NAME_LEN: usize = 128;
+
+/// A normalised entity (name) in the graph.
+const Node = struct {
+    /// Canonicalised display name (e.g. "Jeffrey Epstein").
+    name: []u8,
+    /// Total co-occurrence frequency across all documents.
+    freq: u32,
+    /// Number of documents mentioning this entity.
+    doc_count: u32,
+};
+
+/// A weighted co-occurrence edge between two nodes.
+const Edge = struct {
+    src: u32,
+    dst: u32,
+    weight: u32,
+};
+
+/// Graph handle (opaque to C callers — cast through a pointer).
+pub const EntityGraph = struct {
+    allocator: std.mem.Allocator,
+    arena: std.heap.ArenaAllocator,
+    nodes: std.ArrayList(Node),
+    name_to_idx: std.StringHashMap(u32),
+    edges: std.AutoHashMap(u64, u32), // key = (u64(src) << 32) | u64(dst), value = weight
+    document_count: u32,
+
+    pub fn init(allocator: std.mem.Allocator) !*EntityGraph {
+        const self = try allocator.create(EntityGraph);
+        self.* = .{
+            .allocator = allocator,
+            .arena = std.heap.ArenaAllocator.init(allocator),
+            .nodes = .{},
+            .name_to_idx = std.StringHashMap(u32).init(allocator),
+            .edges = std.AutoHashMap(u64, u32).init(allocator),
+            .document_count = 0,
+        };
+        return self;
+    }
+
+    pub fn deinit(self: *EntityGraph) void {
+        self.nodes.deinit(self.allocator);
+        self.name_to_idx.deinit();
+        self.edges.deinit();
+        self.arena.deinit();
+        self.allocator.destroy(self);
+    }
+
+    fn getOrCreateNode(self: *EntityGraph, name: []const u8) !u32 {
+        if (self.name_to_idx.get(name)) |idx| {
+            self.nodes.items[idx].freq += 1;
+            return idx;
+        }
+        // Copy name into the arena so the hash-map key has a stable lifetime.
+        const owned = try self.arena.allocator().dupe(u8, name);
+        const idx: u32 = @intCast(self.nodes.items.len);
+        try self.nodes.append(self.allocator, .{
+            .name = owned,
+            .freq = 1,
+            .doc_count = 0,
+        });
+        try self.name_to_idx.put(owned, idx);
+        return idx;
+    }
+
+    fn addEdge(self: *EntityGraph, a: u32, b: u32) !void {
+        if (a == b) return;
+        const src = if (a < b) a else b;
+        const dst = if (a < b) b else a;
+        const key = (@as(u64, src) << 32) | @as(u64, dst);
+        const gop = try self.edges.getOrPut(key);
+        if (gop.found_existing) {
+            gop.value_ptr.* += 1;
+        } else {
+            gop.value_ptr.* = 1;
+        }
+    }
+};
+
+// ============================================================================
+// Name Extraction
+// ============================================================================
+
+fn isUpper(ch: u8) bool {
+    return ch >= 'A' and ch <= 'Z';
+}
+
+fn isLower(ch: u8) bool {
+    return ch >= 'a' and ch <= 'z';
+}
+
+fn isAlpha(ch: u8) bool {
+    return isUpper(ch) or isLower(ch);
+}
+
+fn isSpace(ch: u8) bool {
+    return ch == ' ' or ch == '\t';
+}
+
+/// Stopwords that may appear capitalised at sentence boundaries but are not
+/// personal names. Keep this list small and conservative — over-filtering
+/// erases real signal.
+const name_stopwords = std.StaticStringMap(void).initComptime(.{
+    .{ "The", {} },       .{ "A", {} },         .{ "An", {} },
+    .{ "And", {} },       .{ "But", {} },       .{ "Or", {} },
+    .{ "If", {} },        .{ "When", {} },      .{ "Where", {} },
+    .{ "What", {} },      .{ "Why", {} },       .{ "How", {} },
+    .{ "This", {} },      .{ "That", {} },      .{ "These", {} },
+    .{ "Those", {} },     .{ "In", {} },        .{ "On", {} },
+    .{ "At", {} },        .{ "By", {} },        .{ "For", {} },
+    .{ "With", {} },      .{ "From", {} },      .{ "To", {} },
+    .{ "Of", {} },        .{ "Is", {} },        .{ "Was", {} },
+    .{ "Were", {} },      .{ "Be", {} },        .{ "Been", {} },
+    .{ "I", {} },         .{ "We", {} },        .{ "They", {} },
+    .{ "He", {} },        .{ "She", {} },       .{ "It", {} },
+    .{ "Yes", {} },       .{ "No", {} },        .{ "Not", {} },
+    .{ "Monday", {} },    .{ "Tuesday", {} },   .{ "Wednesday", {} },
+    .{ "Thursday", {} },  .{ "Friday", {} },    .{ "Saturday", {} },
+    .{ "Sunday", {} },    .{ "January", {} },   .{ "February", {} },
+    .{ "March", {} },     .{ "April", {} },     .{ "May", {} },
+    .{ "June", {} },      .{ "July", {} },      .{ "August", {} },
+    .{ "September", {} }, .{ "October", {} },   .{ "November", {} },
+    .{ "December", {} },
+});
+
+const title_prefixes = [_][]const u8{
+    "Mr.",  "Mrs.", "Ms.",  "Dr.",    "Sir",     "Prof.", "Professor",
+    "Lord", "Lady", "Hon.", "Judge",  "Justice", "Prince", "Princess",
+    "King", "Queen", "Pope",
+};
+
+/// Check if `word` is a title prefix (case-sensitive, common English forms).
+fn isTitle(word: []const u8) bool {
+    for (title_prefixes) |t| {
+        if (std.mem.eql(u8, word, t)) return true;
+    }
+    return false;
+}
+
+/// Tokenise `text` into capitalised name candidates. A "name" is 2+
+/// consecutive capitalised words, optionally preceded by a title prefix.
+/// Calls `cb(name)` for each extracted candidate.
+fn extractNames(
+    text: []const u8,
+    buf: *std.ArrayList([]const u8),
+    arena: std.mem.Allocator,
+) !void {
+    var i: usize = 0;
+    while (i < text.len) {
+        // Skip non-alpha characters.
+        while (i < text.len and !isAlpha(text[i])) : (i += 1) {}
+        if (i >= text.len) break;
+
+        // Start of a word. Capture consecutive capitalised tokens.
+        const start = i;
+        var word_count: usize = 0;
+        var end = i;
+        var had_title = false;
+
+        while (end < text.len) {
+            const word_start = end;
+            // Require upper-case initial for a "name token".
+            if (!isUpper(text[end])) break;
+            end += 1;
+            while (end < text.len and (isAlpha(text[end]) or text[end] == '.' or text[end] == '\'')) : (end += 1) {}
+            const word = text[word_start..end];
+
+            if (word_count == 0 and isTitle(word)) {
+                had_title = true;
+            } else if (!had_title and name_stopwords.has(word) and word_count == 0) {
+                // Leading stopword — skip this entire candidate.
+                i = end;
+                word_count = 0;
+                break;
+            }
+            word_count += 1;
+
+            // Consume a single space between tokens.
+            if (end < text.len and text[end] == ' ' and
+                end + 1 < text.len and isUpper(text[end + 1]))
+            {
+                end += 1;
+                continue;
+            }
+            break;
+        }
+
+        // Accept candidates of at least 2 tokens, or 1 token if preceded by a title.
+        const min_tokens: usize = if (had_title) 2 else 2;
+        if (word_count >= min_tokens) {
+            const name = text[start..end];
+            const copy = try arena.dupe(u8, name);
+            try buf.append(arena, copy);
+        }
+
+        i = @max(end, i + 1);
+    }
+}
+
+// ============================================================================
+// Public C-ABI Entry Points
+// ============================================================================
+
+/// Create a new entity graph. Returns an opaque handle, or null on failure.
+export fn ddac_entity_graph_new() ?*EntityGraph {
+    const allocator = std.heap.c_allocator;
+    return EntityGraph.init(allocator) catch null;
+}
+
+/// Destroy a graph and free all associated memory.
+export fn ddac_entity_graph_free(handle: ?*EntityGraph) void {
+    if (handle) |h| h.deinit();
+}
+
+/// Add a document's text to the graph. All entities extracted from the
+/// text are linked pairwise with a +1 weight on each edge.
+export fn ddac_entity_graph_add_document(
+    handle: ?*EntityGraph,
+    text_ptr: ?[*]const u8,
+    text_len: usize,
+) c_int {
+    const h = handle orelse return @intFromEnum(EntityGraphStatus.invalid_handle);
+    const text = if (text_ptr) |p| p[0..text_len] else return @intFromEnum(EntityGraphStatus.no_text);
+    if (text.len == 0) return @intFromEnum(EntityGraphStatus.no_text);
+
+    var local_arena = std.heap.ArenaAllocator.init(h.allocator);
+    defer local_arena.deinit();
+    var names: std.ArrayList([]const u8) = .{};
+    defer names.deinit(local_arena.allocator());
+
+    extractNames(text, &names, local_arena.allocator()) catch {
+        return @intFromEnum(EntityGraphStatus.allocation_error);
+    };
+
+    // Deduplicate within this document — a name appearing 5 times counts as
+    // one mention for doc_count but still boosts freq for each appearance.
+    var doc_ids: std.ArrayList(u32) = .{};
+    defer doc_ids.deinit(local_arena.allocator());
+    var seen = std.AutoHashMap(u32, void).init(local_arena.allocator());
+    defer seen.deinit();
+
+    for (names.items) |name| {
+        const idx = h.getOrCreateNode(name) catch {
+            return @intFromEnum(EntityGraphStatus.allocation_error);
+        };
+        if (!seen.contains(idx)) {
+            seen.put(idx, {}) catch return @intFromEnum(EntityGraphStatus.allocation_error);
+            doc_ids.append(local_arena.allocator(), idx) catch {
+                return @intFromEnum(EntityGraphStatus.allocation_error);
+            };
+            h.nodes.items[idx].doc_count += 1;
+        }
+    }
+
+    // Pairwise edges between all entities in this document.
+    var a: usize = 0;
+    while (a < doc_ids.items.len) : (a += 1) {
+        var b: usize = a + 1;
+        while (b < doc_ids.items.len) : (b += 1) {
+            h.addEdge(doc_ids.items[a], doc_ids.items[b]) catch {
+                return @intFromEnum(EntityGraphStatus.allocation_error);
+            };
+        }
+    }
+
+    h.document_count += 1;
+    return @intFromEnum(EntityGraphStatus.ok);
+}
+
+/// Export the graph to a GraphML file at `output_path`. Overwrites existing
+/// files.
+export fn ddac_entity_graph_export_graphml(
+    handle: ?*EntityGraph,
+    output_path: ?[*:0]const u8,
+) c_int {
+    const h = handle orelse return @intFromEnum(EntityGraphStatus.invalid_handle);
+    const path = output_path orelse return @intFromEnum(EntityGraphStatus.write_error);
+
+    // Estimate buffer: 200 bytes per node + 120 bytes per edge + 1 KB header.
+    const est: usize = 1024 + h.nodes.items.len * 256 + h.edges.count() * 128;
+    const buf = std.heap.c_allocator.alloc(u8, est) catch {
+        return @intFromEnum(EntityGraphStatus.allocation_error);
+    };
+    defer std.heap.c_allocator.free(buf);
+
+    var stream = std.io.fixedBufferStream(buf);
+    writeGraphML(h, stream.writer()) catch return @intFromEnum(EntityGraphStatus.write_error);
+
+    const file = std.fs.createFileAbsoluteZ(path, .{ .truncate = true }) catch {
+        return @intFromEnum(EntityGraphStatus.write_error);
+    };
+    defer file.close();
+    file.writeAll(stream.getWritten()) catch return @intFromEnum(EntityGraphStatus.write_error);
+
+    return @intFromEnum(EntityGraphStatus.ok);
+}
+
+/// Export a simple CSV edge list (source,target,weight) — convenient for
+/// spreadsheets and downstream graph tools that prefer CSV input.
+export fn ddac_entity_graph_export_csv(
+    handle: ?*EntityGraph,
+    output_path: ?[*:0]const u8,
+) c_int {
+    const h = handle orelse return @intFromEnum(EntityGraphStatus.invalid_handle);
+    const path = output_path orelse return @intFromEnum(EntityGraphStatus.write_error);
+
+    const est: usize = 256 + h.edges.count() * 160;
+    const buf = std.heap.c_allocator.alloc(u8, est) catch {
+        return @intFromEnum(EntityGraphStatus.allocation_error);
+    };
+    defer std.heap.c_allocator.free(buf);
+
+    var stream = std.io.fixedBufferStream(buf);
+    const w = stream.writer();
+
+    w.writeAll("source,target,weight\n") catch return @intFromEnum(EntityGraphStatus.write_error);
+    var it = h.edges.iterator();
+    while (it.next()) |entry| {
+        const key = entry.key_ptr.*;
+        const src_idx: u32 = @intCast(key >> 32);
+        const dst_idx: u32 = @intCast(key & 0xFFFFFFFF);
+        const src_name = h.nodes.items[src_idx].name;
+        const dst_name = h.nodes.items[dst_idx].name;
+        w.print("\"{s}\",\"{s}\",{d}\n", .{ src_name, dst_name, entry.value_ptr.* }) catch {
+            return @intFromEnum(EntityGraphStatus.write_error);
+        };
+    }
+
+    const file = std.fs.createFileAbsoluteZ(path, .{ .truncate = true }) catch {
+        return @intFromEnum(EntityGraphStatus.write_error);
+    };
+    defer file.close();
+    file.writeAll(stream.getWritten()) catch return @intFromEnum(EntityGraphStatus.write_error);
+
+    return @intFromEnum(EntityGraphStatus.ok);
+}
+
+/// Number of distinct entities in the graph.
+export fn ddac_entity_graph_node_count(handle: ?*EntityGraph) u32 {
+    const h = handle orelse return 0;
+    return @intCast(h.nodes.items.len);
+}
+
+/// Number of distinct co-occurrence edges in the graph.
+export fn ddac_entity_graph_edge_count(handle: ?*EntityGraph) u32 {
+    const h = handle orelse return 0;
+    return @intCast(h.edges.count());
+}
+
+// ============================================================================
+// GraphML Writer
+// ============================================================================
+
+fn writeGraphML(g: *EntityGraph, writer: anytype) !void {
+    try writer.writeAll(
+        \\<?xml version="1.0" encoding="UTF-8"?>
+        \\<graphml xmlns="http://graphml.graphdrawing.org/xmlns"
+        \\  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+        \\  xsi:schemaLocation="http://graphml.graphdrawing.org/xmlns
+        \\    http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd">
+        \\  <key id="freq" for="node" attr.name="freq" attr.type="int"/>
+        \\  <key id="docs" for="node" attr.name="doc_count" attr.type="int"/>
+        \\  <key id="w" for="edge" attr.name="weight" attr.type="int"/>
+        \\  <graph id="docudactyl-entities" edgedefault="undirected">
+        \\
+    );
+
+    for (g.nodes.items, 0..) |node, idx| {
+        try writer.print(
+            "    <node id=\"n{d}\"><data key=\"freq\">{d}</data><data key=\"docs\">{d}</data><data key=\"name\">",
+            .{ idx, node.freq, node.doc_count },
+        );
+        try writeXmlEscaped(writer, node.name);
+        try writer.writeAll("</data></node>\n");
+    }
+
+    var it = g.edges.iterator();
+    var edge_id: usize = 0;
+    while (it.next()) |entry| : (edge_id += 1) {
+        const key = entry.key_ptr.*;
+        const src: u32 = @intCast(key >> 32);
+        const dst: u32 = @intCast(key & 0xFFFFFFFF);
+        try writer.print(
+            "    <edge id=\"e{d}\" source=\"n{d}\" target=\"n{d}\"><data key=\"w\">{d}</data></edge>\n",
+            .{ edge_id, src, dst, entry.value_ptr.* },
+        );
+    }
+
+    try writer.writeAll(
+        \\  </graph>
+        \\</graphml>
+        \\
+    );
+}
+
+fn writeXmlEscaped(writer: anytype, text: []const u8) !void {
+    for (text) |ch| {
+        switch (ch) {
+            '<' => try writer.writeAll("&lt;"),
+            '>' => try writer.writeAll("&gt;"),
+            '&' => try writer.writeAll("&amp;"),
+            '"' => try writer.writeAll("&quot;"),
+            '\'' => try writer.writeAll("&apos;"),
+            else => try writer.writeByte(ch),
+        }
+    }
+}
+
+// ============================================================================
+// Tests
+// ============================================================================
+
+test "add document extracts capitalised names" {
+    const g = try EntityGraph.init(std.testing.allocator);
+    defer g.deinit();
+
+    const text = "Jeffrey Epstein met Ghislaine Maxwell. Later, Prince Andrew joined them.";
+    const rc = ddac_entity_graph_add_document(g, text.ptr, text.len);
+    try std.testing.expectEqual(@as(c_int, 0), rc);
+    try std.testing.expect(ddac_entity_graph_node_count(g) >= 2);
+    try std.testing.expect(ddac_entity_graph_edge_count(g) >= 1);
+}
+
+test "edge weight accumulates across documents" {
+    const g = try EntityGraph.init(std.testing.allocator);
+    defer g.deinit();
+
+    const doc1 = "Jeffrey Epstein called Ghislaine Maxwell.";
+    const doc2 = "Ghislaine Maxwell wrote to Jeffrey Epstein.";
+    _ = ddac_entity_graph_add_document(g, doc1.ptr, doc1.len);
+    _ = ddac_entity_graph_add_document(g, doc2.ptr, doc2.len);
+    try std.testing.expectEqual(@as(u32, 2), g.document_count);
+}
+
+test "stopwords at sentence starts are filtered" {
+    const g = try EntityGraph.init(std.testing.allocator);
+    defer g.deinit();
+
+    const text = "The President met The Senator. When The President left, Jeffrey Epstein arrived.";
+    _ = ddac_entity_graph_add_document(g, text.ptr, text.len);
+    // "Jeffrey Epstein" should be extracted; "The President"/"The Senator" should not.
+    var found_je = false;
+    for (g.nodes.items) |n| {
+        if (std.mem.eql(u8, n.name, "Jeffrey Epstein")) found_je = true;
+    }
+    try std.testing.expect(found_je);
+}
+
+test "handle free is safe on null" {
+    ddac_entity_graph_free(null);
+}
+
+test "exports without nodes produce valid empty graphml" {
+    const g = try EntityGraph.init(std.testing.allocator);
+    defer g.deinit();
+
+    // Build in memory rather than touching the filesystem.
+    var buf: [4096]u8 = undefined;
+    var stream = std.io.fixedBufferStream(&buf);
+    try writeGraphML(g, stream.writer());
+    const written = stream.getWritten();
+    try std.testing.expect(std.mem.indexOf(u8, written, "<graphml") != null);
+    try std.testing.expect(std.mem.indexOf(u8, written, "</graphml>") != null);
+}
diff --git a/ffi/zig/src/evasion_detect.zig b/ffi/zig/src/evasion_detect.zig
new file mode 100644
index 0000000..04417f9
--- /dev/null
+++ b/ffi/zig/src/evasion_detect.zig
@@ -0,0 +1,296 @@
+// SPDX-License-Identifier: PMPL-1.0-or-later
+// Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) <j.d.a.jewell@open.ac.uk>
+// Docudactyl — Evasion / Non-Answer Detection for Depositions
+//
+// Detects common evasive-answer patterns in deposition transcripts and
+// interview records:
+//   - "I don't recall" / "I do not recall"
+//   - "I don't remember"
+//   - "Not to my knowledge"
+//   - "I'm not sure"
+//   - "I have no recollection"
+//   - "I couldn't say"
+//   - "I'd have to check"
+//   - "Asked and answered" (lawyer interjection)
+//   - Fifth-Amendment invocations
+//
+// Output:
+//   - Per-category counts
+//   - Total evasion events
+//   - Evasion rate (events per 1000 tokens) as a crude metric
+//
+// Rationale:
+//   In Epstein-era depositions, evasive answers cluster around specific
+//   topics (names of associates, specific dates, financial arrangements).
+//   Surfacing the density of evasion lets investigators prioritise which
+//   sections deserve close reading or cross-deposition comparison.
+//
+// Pattern-based — case-insensitive matching. Works on any extracted text.
+//
+
+const std = @import("std");
+
+// ============================================================================
+// Public Types
+// ============================================================================
+
+pub const EvasionStatus = enum(c_int) {
+    ok = 0,
+    no_text = 1,
+};
+
+pub const EvasionCategory = enum(u8) {
+    no_recall = 0,
+    no_memory = 1,
+    not_sure = 2,
+    no_knowledge = 3,
+    would_check = 4,
+    asked_answered = 5,
+    fifth_amendment = 6,
+    decline_answer = 7,
+};
+
+pub const CATEGORY_COUNT: usize = 8;
+
+/// Evasion detection results — flat structure for C ABI stability.
+pub const EvasionResult = extern struct {
+    status: c_int,
+
+    /// Counts indexed by EvasionCategory.
+    category_counts: [CATEGORY_COUNT]u32,
+
+    total_events: u32,
+    total_tokens: u32,
+
+    /// Evasion events per 1000 tokens, fixed-point (×1000). An "evasion
+    /// rate" of 12500 means 12.5 events per 1000 tokens.
+    events_per_1k_fixed: u32,
+
+    summary: [512]u8,
+};
+
+// ============================================================================
+// Patterns
+// ============================================================================
+
+/// Case-insensitive substring that marks an evasion. Short patterns
+/// (e.g. "no recall") risk false positives; we keep phrases at 3+ words
+/// to minimise noise.
+const Pattern = struct {
+    phrase: []const u8,
+    category: EvasionCategory,
+};
+
+const patterns = [_]Pattern{
+    // no_recall
+    .{ .phrase = "i don't recall", .category = .no_recall },
+    .{ .phrase = "i do not recall", .category = .no_recall },
+    .{ .phrase = "i cannot recall", .category = .no_recall },
+    .{ .phrase = "i can't recall", .category = .no_recall },
+    .{ .phrase = "i have no recollection", .category = .no_recall },
+    .{ .phrase = "i don't have any recollection", .category = .no_recall },
+    .{ .phrase = "no recollection of", .category = .no_recall },
+
+    // no_memory
+    .{ .phrase = "i don't remember", .category = .no_memory },
+    .{ .phrase = "i do not remember", .category = .no_memory },
+    .{ .phrase = "i can't remember", .category = .no_memory },
+    .{ .phrase = "i cannot remember", .category = .no_memory },
+    .{ .phrase = "don't have a memory", .category = .no_memory },
+
+    // not_sure
+    .{ .phrase = "i'm not sure", .category = .not_sure },
+    .{ .phrase = "i am not sure", .category = .not_sure },
+    .{ .phrase = "i'm not certain", .category = .not_sure },
+    .{ .phrase = "i am not certain", .category = .not_sure },
+    .{ .phrase = "i couldn't say", .category = .not_sure },
+    .{ .phrase = "i could not say", .category = .not_sure },
+
+    // no_knowledge
+    .{ .phrase = "not to my knowledge", .category = .no_knowledge },
+    .{ .phrase = "i have no knowledge", .category = .no_knowledge },
+    .{ .phrase = "i don't have any knowledge", .category = .no_knowledge },
+    .{ .phrase = "i'm not aware", .category = .no_knowledge },
+    .{ .phrase = "i am not aware", .category = .no_knowledge },
+
+    // would_check
+    .{ .phrase = "i'd have to check", .category = .would_check },
+    .{ .phrase = "i would have to check", .category = .would_check },
+    .{ .phrase = "i'd have to look", .category = .would_check },
+    .{ .phrase = "i would need to check", .category = .would_check },
+
+    // asked_answered (lawyer interjection)
+    .{ .phrase = "asked and answered", .category = .asked_answered },
+
+    // fifth_amendment
+    .{ .phrase = "fifth amendment", .category = .fifth_amendment },
+    .{ .phrase = "invoke the fifth", .category = .fifth_amendment },
+    .{ .phrase = "on the advice of counsel", .category = .fifth_amendment },
+    .{ .phrase = "i decline to answer", .category = .decline_answer },
+    .{ .phrase = "decline to answer on", .category = .decline_answer },
+    .{ .phrase = "refuse to answer", .category = .decline_answer },
+};
+
+// ============================================================================
+// Helpers
+// ============================================================================
+
+fn toLowerAscii(ch: u8) u8 {
+    return if (ch >= 'A' and ch <= 'Z') ch + 32 else ch;
+}
+
+/// Case-insensitive substring search. `needle` is assumed to be pre-lowered.
+fn containsCI(haystack: []const u8, offset: usize, needle: []const u8) bool {
+    if (needle.len > haystack.len - offset) return false;
+    var i: usize = 0;
+    while (i < needle.len) : (i += 1) {
+        if (toLowerAscii(haystack[offset + i]) != needle[i]) return false;
+    }
+    return true;
+}
+
+fn countTokens(text: []const u8) u32 {
+    var count: u32 = 0;
+    var in_tok = false;
+    for (text) |ch| {
+        if (ch == ' ' or ch == '\n' or ch == '\r' or ch == '\t' or ch == ',' or ch == '.' or ch == ';') {
+            if (in_tok) {
+                count += 1;
+                in_tok = false;
+            }
+        } else {
+            in_tok = true;
+        }
+    }
+    if (in_tok) count += 1;
+    return count;
+}
+
+// ============================================================================
+// Public API
+// ============================================================================
+
+pub fn evasionDetect(text: []const u8, result: *EvasionResult) EvasionStatus {
+    @memset(std.mem.asBytes(result), 0);
+
+    if (text.len == 0) {
+        result.status = @intFromEnum(EvasionStatus.no_text);
+        return .no_text;
+    }
+
+    result.total_tokens = countTokens(text);
+
+    var i: usize = 0;
+    while (i < text.len) : (i += 1) {
+        inline for (patterns) |p| {
+            if (containsCI(text, i, p.phrase)) {
+                const cat = @intFromEnum(p.category);
+                result.category_counts[cat] += 1;
+                result.total_events += 1;
+                i += p.phrase.len - 1; // skip past match (-1 to offset the loop += 1)
+                break;
+            }
+        }
+    }
+
+    if (result.total_tokens > 0) {
+        // events × 1000 / tokens × 1000 = events per 1k with 3-decimal fixed point.
+        const num: u64 = @as(u64, result.total_events) * 1_000_000;
+        result.events_per_1k_fixed = @intCast(num / @as(u64, result.total_tokens));
+    }
+
+    var buf: [512]u8 = undefined;
+    const s = std.fmt.bufPrint(&buf,
+        "{d} evasion event(s) in {d} tokens (no_recall={d}, no_memory={d}, not_sure={d}, no_knowledge={d}, would_check={d}, asked_answered={d}, fifth={d}, declined={d})",
+        .{
+            result.total_events,
+            result.total_tokens,
+            result.category_counts[0],
+            result.category_counts[1],
+            result.category_counts[2],
+            result.category_counts[3],
+            result.category_counts[4],
+            result.category_counts[5],
+            result.category_counts[6],
+            result.category_counts[7],
+        },
+    ) catch "Evasion detection complete";
+    @memcpy(result.summary[0..s.len], s);
+    result.summary[s.len] = 0;
+
+    result.status = @intFromEnum(EvasionStatus.ok);
+    return .ok;
+}
+
+// ============================================================================
+// C-ABI Exports
+// ============================================================================
+
+export fn ddac_evasion_detect(
+    text_ptr: ?[*]const u8,
+    text_len: usize,
+    result: ?*EvasionResult,
+) c_int {
+    const text = if (text_ptr) |p| p[0..text_len] else return 1;
+    const res = result orelse return 1;
+    return @intFromEnum(evasionDetect(text, res));
+}
+
+// ============================================================================
+// Tests
+// ============================================================================
+
+test "detect no_recall patterns" {
+    var r: EvasionResult = undefined;
+    const text = "Q: Did you meet him? A: I don't recall. Q: Ever? A: I cannot recall that.";
+    _ = evasionDetect(text, &r);
+    try std.testing.expect(r.category_counts[@intFromEnum(EvasionCategory.no_recall)] >= 2);
+}
+
+test "detect mixed categories" {
+    var r: EvasionResult = undefined;
+    const text =
+        \\Q: Did you see him on that day?
+        \\A: I don't remember.
+        \\Q: Not to your knowledge?
+        \\A: Not to my knowledge.
+        \\Q: Can you confirm?
+        \\A: I'm not sure.
+        \\A: I'd have to check my records.
+    ;
+    _ = evasionDetect(text, &r);
+    try std.testing.expect(r.category_counts[@intFromEnum(EvasionCategory.no_memory)] >= 1);
+    try std.testing.expect(r.category_counts[@intFromEnum(EvasionCategory.no_knowledge)] >= 1);
+    try std.testing.expect(r.category_counts[@intFromEnum(EvasionCategory.not_sure)] >= 1);
+    try std.testing.expect(r.category_counts[@intFromEnum(EvasionCategory.would_check)] >= 1);
+    try std.testing.expect(r.total_events >= 4);
+}
+
+test "fifth amendment" {
+    var r: EvasionResult = undefined;
+    const text = "On the advice of counsel, I invoke the Fifth Amendment and decline to answer.";
+    _ = evasionDetect(text, &r);
+    try std.testing.expect(r.category_counts[@intFromEnum(EvasionCategory.fifth_amendment)] >= 1);
+}
+
+test "events per 1k tokens" {
+    var r: EvasionResult = undefined;
+    const text = "I don't recall anything about that meeting or those people or that topic at all.";
+    _ = evasionDetect(text, &r);
+    // One event, ~15 tokens → ~66 per 1k (in fixed × 1000 = 66000-ish).
+    try std.testing.expect(r.events_per_1k_fixed > 0);
+}
+
+test "case insensitive" {
+    var r1: EvasionResult = undefined;
+    var r2: EvasionResult = undefined;
+    _ = evasionDetect("I DON'T RECALL", &r1);
+    _ = evasionDetect("i don't recall", &r2);
+    try std.testing.expectEqual(r1.total_events, r2.total_events);
+}
+
+test "empty text returns no_text" {
+    var r: EvasionResult = undefined;
+    const status = evasionDetect("", &r);
+    try std.testing.expect(status == .no_text);
+}
diff --git a/ffi/zig/src/flight_log.zig b/ffi/zig/src/flight_log.zig
new file mode 100644
index 0000000..09e45f0
--- /dev/null
+++ b/ffi/zig/src/flight_log.zig
@@ -0,0 +1,508 @@
+// SPDX-License-Identifier: PMPL-1.0-or-later
+// Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) <j.d.a.jewell@open.ac.uk>
+// Docudactyl — Flight Log & Travel Document Extraction Stage
+//
+// Extracts travel-related entities commonly found in flight logs, pilot
+// manifests, and travel ledgers (e.g., the "Lolita Express" logs released
+// during the Epstein civil and criminal proceedings).
+//
+// Extracts:
+//   - Aircraft tail numbers  (e.g., N908JE, N212JE, G-EJES)
+//   - IATA airport codes     (3 upper-case letters, e.g., TEB, PBI, JFK)
+//   - ICAO airport codes     (4 upper-case letters, e.g., KTEB, KPBI, KJFK)
+//   - Phone numbers          (US and international formats)
+//   - Street addresses       (line-leading digits followed by road words)
+//   - Passenger-list markers (lines beginning with "PAX", "PASSENGERS", etc.)
+//
+// Pattern-based (no ML dependency). Works on any text buffer extracted by
+// the base parser. Results returned in a flat extern struct so callers
+// across the C ABI (Chapel, Rust, Python ctypes, OCaml cstubs) see a
+// stable layout.
+//
+
+const std = @import("std");
+
+// ============================================================================
+// Public Types
+// ============================================================================
+
+/// Maximum items stored per category. Sized for multi-page flight logs.
+pub const MAX_ITEMS: usize = 256;
+
+/// Maximum length of an extracted text span (null-terminated).
+pub const MAX_SPAN_LEN: usize = 128;
+
+/// Status codes returned by flight_log_process.
+pub const FlightLogStatus = enum(c_int) {
+    ok = 0,
+    no_text = 1,
+    text_too_short = 2,
+};
+
+/// Single extracted span with source offset.
+pub const FlightSpan = extern struct {
+    /// Start byte offset in original text.
+    start: u32,
+    /// Length in bytes.
+    len: u32,
+    /// Null-terminated matched text (truncated to MAX_SPAN_LEN-1).
+    text: [MAX_SPAN_LEN]u8,
+};
+
+/// Results from flight-log / travel-document extraction.
+pub const FlightLogResult = extern struct {
+    status: c_int,
+
+    tail_number_count: u32,
+    tail_numbers: [MAX_ITEMS]FlightSpan,
+
+    iata_code_count: u32,
+    iata_codes: [MAX_ITEMS]FlightSpan,
+
+    icao_code_count: u32,
+    icao_codes: [MAX_ITEMS]FlightSpan,
+
+    phone_count: u32,
+    phones: [MAX_ITEMS]FlightSpan,
+
+    address_count: u32,
+    addresses: [MAX_ITEMS]FlightSpan,
+
+    passenger_marker_count: u32,
+
+    total_items: u32,
+    summary: [512]u8,
+};
+
+// ============================================================================
+// Internal Helpers
+// ============================================================================
+
+fn isUpperAscii(ch: u8) bool {
+    return ch >= 'A' and ch <= 'Z';
+}
+
+fn isDigit(ch: u8) bool {
+    return ch >= '0' and ch <= '9';
+}
+
+fn isAlnum(ch: u8) bool {
+    return isUpperAscii(ch) or isDigit(ch) or (ch >= 'a' and ch <= 'z');
+}
+
+fn isWordBoundary(text: []const u8, pos: usize) bool {
+    if (pos == 0 or pos >= text.len) return true;
+    const ch = text[pos];
+    return !isAlnum(ch);
+}
+
+fn addSpan(
+    buf: *[MAX_ITEMS]FlightSpan,
+    count: *u32,
+    start: usize,
+    len: usize,
+    text: []const u8,
+) void {
+    if (count.* >= MAX_ITEMS) return;
+    const idx = count.*;
+    count.* += 1;
+    buf[idx].start = @intCast(start);
+    buf[idx].len = @intCast(len);
+    @memset(&buf[idx].text, 0);
+    const copy_len = @min(len, MAX_SPAN_LEN - 1);
+    if (start + copy_len <= text.len) {
+        @memcpy(buf[idx].text[0..copy_len], text[start .. start + copy_len]);
+    }
+}
+
+/// Known airport IATA/ICAO codes whitelisted to reduce false positives.
+/// Focused on airports that appear in released Epstein flight logs and
+/// common jurisdictions of interest.
+const known_iata = std.StaticStringMap(void).initComptime(.{
+    .{ "TEB", {} }, // Teterboro, NJ
+    .{ "PBI", {} }, // Palm Beach, FL
+    .{ "JFK", {} }, // New York JFK
+    .{ "LGA", {} }, // New York LaGuardia
+    .{ "EWR", {} }, // Newark
+    .{ "MIA", {} }, // Miami
+    .{ "FLL", {} }, // Fort Lauderdale
+    .{ "BDL", {} }, // Hartford
+    .{ "SAF", {} }, // Santa Fe
+    .{ "ABQ", {} }, // Albuquerque
+    .{ "STT", {} }, // St. Thomas, USVI
+    .{ "SJU", {} }, // San Juan, PR
+    .{ "LHR", {} }, // London Heathrow
+    .{ "LGW", {} }, // London Gatwick
+    .{ "FAB", {} }, // Farnborough
+    .{ "LTN", {} }, // Luton
+    .{ "CDG", {} }, // Paris CDG
+    .{ "LBG", {} }, // Paris Le Bourget
+    .{ "NCE", {} }, // Nice
+    .{ "GVA", {} }, // Geneva
+    .{ "ZRH", {} }, // Zurich
+    .{ "VIE", {} }, // Vienna
+    .{ "MAD", {} }, // Madrid
+    .{ "BCN", {} }, // Barcelona
+    .{ "FCO", {} }, // Rome Fiumicino
+    .{ "DXB", {} }, // Dubai
+    .{ "AUH", {} }, // Abu Dhabi
+    .{ "RUH", {} }, // Riyadh
+    .{ "DME", {} }, // Moscow Domodedovo
+    .{ "SVO", {} }, // Moscow Sheremetyevo
+    .{ "TLV", {} }, // Tel Aviv
+    .{ "HKG", {} }, // Hong Kong
+    .{ "SIN", {} }, // Singapore
+    .{ "NRT", {} }, // Tokyo Narita
+    .{ "HND", {} }, // Tokyo Haneda
+});
+
+const known_icao = std.StaticStringMap(void).initComptime(.{
+    .{ "KTEB", {} },
+    .{ "KPBI", {} },
+    .{ "KJFK", {} },
+    .{ "KLGA", {} },
+    .{ "KEWR", {} },
+    .{ "KMIA", {} },
+    .{ "KFLL", {} },
+    .{ "TIST", {} }, // St. Thomas
+    .{ "TJSJ", {} }, // San Juan
+    .{ "EGLL", {} }, // Heathrow
+    .{ "EGKK", {} }, // Gatwick
+    .{ "EGLF", {} }, // Farnborough
+    .{ "LFPB", {} }, // Le Bourget
+    .{ "LSGG", {} }, // Geneva
+    .{ "LSZH", {} }, // Zurich
+});
+
+const passenger_markers = [_][]const u8{
+    "PAX:",
+    "PAX ",
+    "PASSENGERS:",
+    "PASSENGER:",
+    "PASSENGER MANIFEST",
+    "PASSENGER LIST",
+    "MANIFEST:",
+    "GUESTS:",
+    "GUEST LIST",
+};
+
+const address_tokens = std.StaticStringMap(void).initComptime(.{
+    .{ "STREET", {} }, .{ "ST", {} },   .{ "ST.", {} },
+    .{ "AVENUE", {} }, .{ "AVE", {} },  .{ "AVE.", {} },
+    .{ "ROAD", {} },   .{ "RD", {} },   .{ "RD.", {} },
+    .{ "DRIVE", {} },  .{ "DR", {} },   .{ "DR.", {} },
+    .{ "LANE", {} },   .{ "LN", {} },   .{ "LN.", {} },
+    .{ "BOULEVARD", {} }, .{ "BLVD", {} }, .{ "BLVD.", {} },
+    .{ "COURT", {} },  .{ "CT", {} },   .{ "CT.", {} },
+    .{ "PLACE", {} },  .{ "PL", {} },   .{ "PL.", {} },
+    .{ "WAY", {} },    .{ "HIGHWAY", {} }, .{ "HWY", {} },
+    .{ "PARKWAY", {} }, .{ "PKWY", {} },
+});
+
+// ============================================================================
+// Extractors
+// ============================================================================
+
+/// Tail numbers: FAA-registered aircraft begin with "N" followed by 1-5
+/// alphanumerics (no O or I in the terminal characters per FAA rules, but
+/// we accept slightly more liberally for OCR tolerance). UK/EU registrations
+/// use prefixes like "G-", "D-", "F-", followed by 4 letters.
+fn extractTailNumbers(text: []const u8, result: *FlightLogResult) void {
+    var i: usize = 0;
+    while (i < text.len) : (i += 1) {
+        if (!isWordBoundary(text, i)) continue;
+        const start = i + @as(usize, if (i == 0) 0 else 1);
+        if (start >= text.len) break;
+
+        // US pattern: N followed by 1-5 digits/letters.
+        if (text[start] == 'N' and start + 2 < text.len and isDigit(text[start + 1])) {
+            var end = start + 1;
+            var alnum_count: usize = 0;
+            while (end < text.len and isAlnum(text[end]) and alnum_count < 5) : (end += 1) {
+                alnum_count += 1;
+            }
+            if (alnum_count >= 2 and alnum_count <= 5 and isWordBoundary(text, end)) {
+                addSpan(&result.tail_numbers, &result.tail_number_count, start, end - start, text);
+                i = end;
+                continue;
+            }
+        }
+
+        // Foreign pattern: LETTER-LETTERS (e.g. G-EJES, D-IIKA).
+        if (isUpperAscii(text[start]) and start + 5 < text.len and text[start + 1] == '-') {
+            var end = start + 2;
+            var letter_count: usize = 0;
+            while (end < text.len and isUpperAscii(text[end]) and letter_count < 5) : (end += 1) {
+                letter_count += 1;
+            }
+            if (letter_count >= 3 and letter_count <= 5 and isWordBoundary(text, end)) {
+                addSpan(&result.tail_numbers, &result.tail_number_count, start, end - start, text);
+                i = end;
+            }
+        }
+    }
+}
+
+/// IATA airport codes: three upper-case letters at word boundaries, cross-
+/// referenced against a whitelist to reduce matches on unrelated acronyms.
+fn extractAirportCodes(text: []const u8, result: *FlightLogResult) void {
+    var i: usize = 0;
+    while (i + 3 <= text.len) : (i += 1) {
+        if (i != 0 and isAlnum(text[i - 1])) continue;
+
+        // ICAO 4-letter check first (more specific).
+        if (i + 4 <= text.len and
+            isUpperAscii(text[i]) and isUpperAscii(text[i + 1]) and
+            isUpperAscii(text[i + 2]) and isUpperAscii(text[i + 3]) and
+            (i + 4 == text.len or !isAlnum(text[i + 4])))
+        {
+            const code = text[i .. i + 4];
+            if (known_icao.has(code)) {
+                addSpan(&result.icao_codes, &result.icao_code_count, i, 4, text);
+                i += 3;
+                continue;
+            }
+        }
+
+        // IATA 3-letter check.
+        if (isUpperAscii(text[i]) and isUpperAscii(text[i + 1]) and isUpperAscii(text[i + 2]) and
+            (i + 3 == text.len or !isAlnum(text[i + 3])))
+        {
+            const code = text[i .. i + 3];
+            if (known_iata.has(code)) {
+                addSpan(&result.iata_codes, &result.iata_code_count, i, 3, text);
+                i += 2;
+            }
+        }
+    }
+}
+
+/// Phone numbers — accepts common US and international formats:
+///   (212) 555-1234, 212-555-1234, 212.555.1234, +1 212 555 1234,
+///   +44 20 7946 0958
+fn extractPhones(text: []const u8, result: *FlightLogResult) void {
+    var i: usize = 0;
+    while (i < text.len) : (i += 1) {
+        if (!isWordBoundary(text, i)) continue;
+        const start = if (i == 0) i else i + 1;
+        if (start >= text.len) break;
+
+        var digit_count: usize = 0;
+        var end = start;
+        if (text[start] == '+') end += 1;
+        if (end < text.len and text[end] == '(') end += 1;
+
+        const scan_start = end;
+        while (end < text.len and end - start < 20) : (end += 1) {
+            const ch = text[end];
+            if (isDigit(ch)) digit_count += 1
+            else if (ch == ' ' or ch == '-' or ch == '.' or ch == '(' or ch == ')') {
+                // separator
+            } else break;
+        }
+
+        if (digit_count >= 10 and digit_count <= 15 and end > scan_start) {
+            addSpan(&result.phones, &result.phone_count, start, end - start, text);
+            i = end;
+        }
+    }
+}
+
+/// Street addresses — line-oriented heuristic: "<number> <words ending in
+/// ROAD/STREET/AVENUE/etc.>".
+fn extractAddresses(text: []const u8, result: *FlightLogResult) void {
+    var line_start: usize = 0;
+    while (line_start < text.len) {
+        var line_end = line_start;
+        while (line_end < text.len and text[line_end] != '\n') : (line_end += 1) {}
+        const line = text[line_start..line_end];
+
+        // Skip leading whitespace.
+        var off: usize = 0;
+        while (off < line.len and (line[off] == ' ' or line[off] == '\t')) : (off += 1) {}
+
+        // Require the line to start with digits.
+        if (off < line.len and isDigit(line[off])) {
+            var scan = off;
+            while (scan < line.len and isDigit(line[scan])) : (scan += 1) {}
+            // At least one digit, followed by a space.
+            if (scan > off and scan < line.len and line[scan] == ' ') {
+                // Scan line for an address token (case-insensitive via upper-case compare).
+                var word_start: usize = scan + 1;
+                var j = word_start;
+                var found = false;
+                while (j <= line.len) : (j += 1) {
+                    const at_end = (j == line.len) or (line[j] == ' ') or (line[j] == ',');
+                    if (at_end and j > word_start) {
+                        var upper_buf: [16]u8 = undefined;
+                        const w = line[word_start..j];
+                        if (w.len <= upper_buf.len) {
+                            for (w, 0..) |ch, idx| {
+                                upper_buf[idx] = if (ch >= 'a' and ch <= 'z') ch - 32 else ch;
+                            }
+                            if (address_tokens.has(upper_buf[0..w.len])) {
+                                found = true;
+                                break;
+                            }
+                        }
+                        word_start = j + 1;
+                    }
+                }
+                if (found) {
+                    addSpan(
+                        &result.addresses,
+                        &result.address_count,
+                        line_start + off,
+                        line.len - off,
+                        text,
+                    );
+                }
+            }
+        }
+
+        line_start = if (line_end < text.len) line_end + 1 else line_end;
+    }
+}
+
+/// Passenger manifest markers: count lines that look like PAX blocks so the
+/// investigator knows roughly how many discrete manifests are in a document.
+fn countPassengerMarkers(text: []const u8, result: *FlightLogResult) void {
+    var i: usize = 0;
+    while (i < text.len) : (i += 1) {
+        for (passenger_markers) |marker| {
+            if (i + marker.len <= text.len and std.mem.eql(u8, text[i .. i + marker.len], marker)) {
+                result.passenger_marker_count += 1;
+                i += marker.len - 1;
+                break;
+            }
+        }
+    }
+}
+
+// ============================================================================
+// Public API
+// ============================================================================
+
+/// Process a text buffer, populating `result` with extracted entities.
+pub fn flightLogProcess(text: []const u8, result: *FlightLogResult) FlightLogStatus {
+    @memset(std.mem.asBytes(result), 0);
+
+    if (text.len == 0) {
+        result.status = @intFromEnum(FlightLogStatus.no_text);
+        return .no_text;
+    }
+    if (text.len < 4) {
+        result.status = @intFromEnum(FlightLogStatus.text_too_short);
+        return .text_too_short;
+    }
+
+    extractTailNumbers(text, result);
+    extractAirportCodes(text, result);
+    extractPhones(text, result);
+    extractAddresses(text, result);
+    countPassengerMarkers(text, result);
+
+    result.total_items =
+        result.tail_number_count +
+        result.iata_code_count +
+        result.icao_code_count +
+        result.phone_count +
+        result.address_count +
+        result.passenger_marker_count;
+
+    var summary_buf: [512]u8 = undefined;
+    const summary = std.fmt.bufPrint(
+        &summary_buf,
+        "{d} tail number(s), {d} IATA, {d} ICAO, {d} phone(s), {d} address(es), {d} manifest marker(s)",
+        .{
+            result.tail_number_count,
+            result.iata_code_count,
+            result.icao_code_count,
+            result.phone_count,
+            result.address_count,
+            result.passenger_marker_count,
+        },
+    ) catch "Flight log extraction complete";
+    @memcpy(result.summary[0..summary.len], summary);
+    result.summary[summary.len] = 0;
+
+    result.status = @intFromEnum(FlightLogStatus.ok);
+    return .ok;
+}
+
+// ============================================================================
+// C-ABI Exports
+// ============================================================================
+
+/// Process text for flight-log entities. Thread-safe: no shared state.
+export fn ddac_flight_log_process(
+    text_ptr: ?[*]const u8,
+    text_len: usize,
+    result: ?*FlightLogResult,
+) c_int {
+    const text = if (text_ptr) |p| p[0..text_len] else return 1;
+    const res = result orelse return 1;
+    return @intFromEnum(flightLogProcess(text, res));
+}
+
+// ============================================================================
+// Tests
+// ============================================================================
+
+test "extract US tail numbers" {
+    var result: FlightLogResult = undefined;
+    const text = "Aircraft N908JE departed KTEB bound for KPBI. Also seen: N212JE, N724JE.";
+    _ = flightLogProcess(text, &result);
+    try std.testing.expect(result.tail_number_count >= 3);
+}
+
+test "extract IATA and ICAO codes" {
+    var result: FlightLogResult = undefined;
+    const text = "Departed TEB, arrived PBI via KJFK. Also: JFK, MIA, STT.";
+    _ = flightLogProcess(text, &result);
+    try std.testing.expect(result.iata_code_count >= 4);
+    try std.testing.expect(result.icao_code_count >= 1);
+}
+
+test "extract phone numbers" {
+    var result: FlightLogResult = undefined;
+    const text = "Call (212) 555-1234 or +1 212 555 9999 for scheduling.";
+    _ = flightLogProcess(text, &result);
+    try std.testing.expect(result.phone_count >= 2);
+}
+
+test "extract addresses" {
+    var result: FlightLogResult = undefined;
+    const text =
+        \\Property records:
+        \\9 East 71st Street, New York
+        \\358 El Brillo Way, Palm Beach
+    ;
+    _ = flightLogProcess(text, &result);
+    try std.testing.expect(result.address_count >= 2);
+}
+
+test "passenger markers counted" {
+    var result: FlightLogResult = undefined;
+    const text =
+        \\PAX: JE, GM, SK
+        \\PASSENGERS: three
+        \\MANIFEST: complete
+    ;
+    _ = flightLogProcess(text, &result);
+    try std.testing.expect(result.passenger_marker_count >= 3);
+}
+
+test "empty input" {
+    var result: FlightLogResult = undefined;
+    const status = flightLogProcess("", &result);
+    try std.testing.expect(status == .no_text);
+}
+
+test "rejects random 3-letter acronyms" {
+    var result: FlightLogResult = undefined;
+    const text = "The CEO of XYZ spoke with FBI and CIA.";
+    _ = flightLogProcess(text, &result);
+    // None of XYZ/CEO/FBI/CIA are in the IATA whitelist.
+    try std.testing.expect(result.iata_code_count == 0);
+}
diff --git a/ffi/zig/src/investigator_summary.zig b/ffi/zig/src/investigator_summary.zig
new file mode 100644
index 0000000..853a3c7
--- /dev/null
+++ b/ffi/zig/src/investigator_summary.zig
@@ -0,0 +1,350 @@
+// SPDX-License-Identifier: PMPL-1.0-or-later
+// Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) <j.d.a.jewell@open.ac.uk>
+// Docudactyl — Investigator-Friendly JSON Summary Writer
+//
+// The HPC pipeline emits Cap'n Proto binary per-document stage results
+// (schema/stages.capnp). That format is excellent for machines but opaque
+// to citizen journalists working with spreadsheets or text editors.
+//
+// This module builds a plain, self-contained, human-readable JSON summary
+// from an in-memory `InvestigatorSummary` struct that callers populate
+// directly (either from their own extraction pass or by transcoding the
+// Cap'n Proto output). The JSON schema is intentionally flat and
+// forgiving — any field can be absent/zero without breaking readers.
+//
+// Target schema (example):
+//   {
+//     "document_id": "sha256:abcd...",
+//     "source_path": "/data/release_2024/doc_0042.pdf",
+//     "content_kind": "pdf",
+//     "page_count": 184,
+//     "sha256": "abcd...",
+//     "redactions": { "count": 12, "pages_affected": 4, "recoverable": true },
+//     "entities": {
+//        "persons": ["Jeffrey Epstein", "Ghislaine Maxwell"],
+//        "tail_numbers": ["N908JE"],
+//        "airports": ["TEB", "PBI", "KTEB"],
+//        "phones": ["+1 212 555 1234"],
+//        "addresses": ["9 East 71st Street"]
+//     },
+//     "financial": { "amounts": 3, "accounts": 1 },
+//     "legal": { "case_citations": 5, "dockets": 2, "statutes": 1 },
+//     "speakers": { "count": 2, "is_deposition": true },
+//     "evasion": { "total": 17, "per_1k_tokens": 12.5 },
+//     "flags": ["has_redactions", "has_recoverable_text", "deposition"]
+//   }
+//
+
+const std = @import("std");
+
+// ============================================================================
+// Public Types
+// ============================================================================
+
+pub const SummaryStatus = enum(c_int) {
+    ok = 0,
+    write_error = 1,
+    invalid_input = 2,
+};
+
+/// Maximum strings in an exported list.
+pub const MAX_LIST_ITEMS: usize = 64;
+pub const MAX_ITEM_LEN: usize = 128;
+pub const MAX_PATH_LEN: usize = 1024;
+pub const MAX_KIND_LEN: usize = 16;
+pub const MAX_SHA_LEN: usize = 80;
+
+/// Flat list of null-terminated strings in a fixed buffer (for C ABI).
+pub const StringList = extern struct {
+    count: u32,
+    items: [MAX_LIST_ITEMS][MAX_ITEM_LEN]u8,
+};
+
+/// Complete summary for one document. Caller fills this in; the writer
+/// emits JSON to a file or buffer.
+pub const InvestigatorSummary = extern struct {
+    // Identification
+    source_path: [MAX_PATH_LEN]u8,
+    content_kind: [MAX_KIND_LEN]u8,
+    sha256: [MAX_SHA_LEN]u8,
+
+    // Scalar metadata
+    page_count: u32,
+    word_count: u64,
+    char_count: u64,
+    duration_sec_x1000: u64, // fixed-point milliseconds for audio/video
+
+    // Redactions (from stageRedactionDetect + redaction_recovery)
+    redaction_count: u32,
+    redacted_pages: u32,
+    recoverable_pages: u32,
+
+    // Financial (from stageFinancialExtract)
+    financial_amounts: u32,
+    financial_accounts: u32,
+
+    // Legal NER (from stageLegalNer)
+    legal_case_citations: u32,
+    legal_dockets: u32,
+    legal_statutes: u32,
+
+    // Flight log (from flight_log)
+    tail_number_count: u32,
+    iata_count: u32,
+    icao_count: u32,
+    phone_count: u32,
+    address_count: u32,
+
+    // Entity lists
+    persons: StringList,
+    tail_numbers: StringList,
+    airports: StringList,
+    phones: StringList,
+    addresses: StringList,
+
+    // Deposition / speaker data
+    speaker_count: u32,
+    is_deposition: u8,
+    _pad1: [3]u8,
+
+    // Evasion detection
+    evasion_total: u32,
+    evasion_per_1k_x1000: u32, // fixed-point: rate × 1000
+};
+
+// ============================================================================
+// Helpers
+// ============================================================================
+
+fn nullTerminated(buf: []const u8) []const u8 {
+    const end = std.mem.indexOfScalar(u8, buf, 0) orelse buf.len;
+    return buf[0..end];
+}
+
+fn writeJsonString(writer: anytype, text: []const u8) !void {
+    try writer.writeByte('"');
+    for (text) |ch| {
+        switch (ch) {
+            '"' => try writer.writeAll("\\\""),
+            '\\' => try writer.writeAll("\\\\"),
+            '\n' => try writer.writeAll("\\n"),
+            '\r' => try writer.writeAll("\\r"),
+            '\t' => try writer.writeAll("\\t"),
+            0x08 => try writer.writeAll("\\b"),
+            0x0C => try writer.writeAll("\\f"),
+            0...7, 0x0B, 0x0E...0x1F => try writer.print("\\u{x:0>4}", .{ch}),
+            else => try writer.writeByte(ch),
+        }
+    }
+    try writer.writeByte('"');
+}
+
+fn writeList(writer: anytype, name: []const u8, list: *const StringList) !void {
+    try writer.print("    \"{s}\": [", .{name});
+    var i: u32 = 0;
+    while (i < list.count and i < MAX_LIST_ITEMS) : (i += 1) {
+        if (i != 0) try writer.writeAll(", ");
+        const item = nullTerminated(&list.items[i]);
+        try writeJsonString(writer, item);
+    }
+    try writer.writeAll("]");
+}
+
+fn writeFlags(writer: anytype, s: *const InvestigatorSummary) !void {
+    try writer.writeAll("  \"flags\": [");
+    var first = true;
+
+    inline for (.{
+        .{ "has_redactions", s.redaction_count > 0 },
+        .{ "has_recoverable_text", s.recoverable_pages > 0 },
+        .{ "has_financial_data", s.financial_amounts > 0 or s.financial_accounts > 0 },
+        .{ "has_legal_refs", s.legal_case_citations > 0 or s.legal_dockets > 0 or s.legal_statutes > 0 },
+        .{ "deposition", s.is_deposition == 1 },
+        .{ "has_flight_data", s.tail_number_count > 0 or s.iata_count > 0 or s.icao_count > 0 },
+        .{ "high_evasion", s.evasion_per_1k_x1000 > 20_000 },
+    }) |entry| {
+        if (entry[1]) {
+            if (!first) try writer.writeAll(", ");
+            try writer.print("\"{s}\"", .{entry[0]});
+            first = false;
+        }
+    }
+    try writer.writeAll("]");
+}
+
+// ============================================================================
+// Public API
+// ============================================================================
+
+/// Write the summary as JSON to `writer`.
+pub fn writeJson(writer: anytype, s: *const InvestigatorSummary) !void {
+    const src_path = nullTerminated(&s.source_path);
+    const kind = nullTerminated(&s.content_kind);
+    const sha = nullTerminated(&s.sha256);
+
+    try writer.writeAll("{\n");
+
+    try writer.writeAll("  \"source_path\": ");
+    try writeJsonString(writer, src_path);
+    try writer.writeAll(",\n");
+
+    try writer.writeAll("  \"content_kind\": ");
+    try writeJsonString(writer, kind);
+    try writer.writeAll(",\n");
+
+    try writer.writeAll("  \"sha256\": ");
+    try writeJsonString(writer, sha);
+    try writer.writeAll(",\n");
+
+    try writer.print("  \"page_count\": {d},\n", .{s.page_count});
+    try writer.print("  \"word_count\": {d},\n", .{s.word_count});
+    try writer.print("  \"char_count\": {d},\n", .{s.char_count});
+
+    if (s.duration_sec_x1000 > 0) {
+        const sec: f64 = @as(f64, @floatFromInt(s.duration_sec_x1000)) / 1000.0;
+        try writer.print("  \"duration_sec\": {d:.3},\n", .{sec});
+    }
+
+    try writer.print(
+        "  \"redactions\": {{\"count\": {d}, \"pages_affected\": {d}, \"recoverable_pages\": {d}}},\n",
+        .{ s.redaction_count, s.redacted_pages, s.recoverable_pages },
+    );
+
+    try writer.print(
+        "  \"financial\": {{\"amounts\": {d}, \"accounts\": {d}}},\n",
+        .{ s.financial_amounts, s.financial_accounts },
+    );
+
+    try writer.print(
+        "  \"legal\": {{\"case_citations\": {d}, \"dockets\": {d}, \"statutes\": {d}}},\n",
+        .{ s.legal_case_citations, s.legal_dockets, s.legal_statutes },
+    );
+
+    try writer.print(
+        "  \"speakers\": {{\"count\": {d}, \"is_deposition\": {s}}},\n",
+        .{ s.speaker_count, if (s.is_deposition == 1) "true" else "false" },
+    );
+
+    const rate: f64 = @as(f64, @floatFromInt(s.evasion_per_1k_x1000)) / 1000.0;
+    try writer.print(
+        "  \"evasion\": {{\"total\": {d}, \"per_1k_tokens\": {d:.3}}},\n",
+        .{ s.evasion_total, rate },
+    );
+
+    try writer.writeAll("  \"entities\": {\n");
+    try writeList(writer, "persons", &s.persons);
+    try writer.writeAll(",\n");
+    try writeList(writer, "tail_numbers", &s.tail_numbers);
+    try writer.writeAll(",\n");
+    try writeList(writer, "airports", &s.airports);
+    try writer.writeAll(",\n");
+    try writeList(writer, "phones", &s.phones);
+    try writer.writeAll(",\n");
+    try writeList(writer, "addresses", &s.addresses);
+    try writer.writeAll("\n  },\n");
+
+    try writeFlags(writer, s);
+    try writer.writeAll("\n}\n");
+}
+
+/// Write the summary to a file.
+pub fn writeJsonFile(path: [*:0]const u8, s: *const InvestigatorSummary) SummaryStatus {
+    // A fully-populated summary fits comfortably in 64 KB.
+    var buf: [65536]u8 = undefined;
+    var stream = std.io.fixedBufferStream(&buf);
+    writeJson(stream.writer(), s) catch return .write_error;
+
+    const file = std.fs.createFileAbsoluteZ(path, .{ .truncate = true }) catch return .write_error;
+    defer file.close();
+    file.writeAll(stream.getWritten()) catch return .write_error;
+    return .ok;
+}
+
+// ============================================================================
+// C-ABI Exports
+// ============================================================================
+
+export fn ddac_investigator_summary_write(
+    output_path: ?[*:0]const u8,
+    summary: ?*const InvestigatorSummary,
+) c_int {
+    const path = output_path orelse return @intFromEnum(SummaryStatus.invalid_input);
+    const s = summary orelse return @intFromEnum(SummaryStatus.invalid_input);
+    return @intFromEnum(writeJsonFile(path, s));
+}
+
+/// Helper for callers that want to populate a StringList slot from C.
+/// `idx` is the slot to write. Returns 0 on success, 1 on out-of-bounds.
+export fn ddac_investigator_summary_set_list_item(
+    list: ?*StringList,
+    idx: u32,
+    text_ptr: ?[*]const u8,
+    text_len: usize,
+) c_int {
+    const l = list orelse return 1;
+    if (idx >= MAX_LIST_ITEMS) return 1;
+    const text = if (text_ptr) |p| p[0..text_len] else return 1;
+    @memset(&l.items[idx], 0);
+    const copy_len = @min(text.len, MAX_ITEM_LEN - 1);
+    @memcpy(l.items[idx][0..copy_len], text[0..copy_len]);
+    if (idx + 1 > l.count) l.count = idx + 1;
+    return 0;
+}
+
+// ============================================================================
+// Tests
+// ============================================================================
+
+test "emit minimal summary" {
+    var s: InvestigatorSummary = std.mem.zeroes(InvestigatorSummary);
+    const path = "/tmp/test_doc.pdf";
+    @memcpy(s.source_path[0..path.len], path);
+    const kind = "pdf";
+    @memcpy(s.content_kind[0..kind.len], kind);
+    const sha = "abcd1234";
+    @memcpy(s.sha256[0..sha.len], sha);
+    s.page_count = 42;
+    s.redaction_count = 3;
+    s.recoverable_pages = 1;
+
+    var buf: [16384]u8 = undefined;
+    var stream = std.io.fixedBufferStream(&buf);
+    try writeJson(stream.writer(), &s);
+    const written = stream.getWritten();
+
+    try std.testing.expect(std.mem.indexOf(u8, written, "\"page_count\": 42") != null);
+    try std.testing.expect(std.mem.indexOf(u8, written, "\"has_redactions\"") != null);
+    try std.testing.expect(std.mem.indexOf(u8, written, "\"has_recoverable_text\"") != null);
+}
+
+test "set list item via C-ABI helper" {
+    var list: StringList = std.mem.zeroes(StringList);
+    const name = "Jeffrey Epstein";
+    const rc = ddac_investigator_summary_set_list_item(&list, 0, name.ptr, name.len);
+    try std.testing.expectEqual(@as(c_int, 0), rc);
+    try std.testing.expectEqual(@as(u32, 1), list.count);
+    const stored = nullTerminated(&list.items[0]);
+    try std.testing.expectEqualStrings(name, stored);
+}
+
+test "json string escaping" {
+    var buf: [256]u8 = undefined;
+    var stream = std.io.fixedBufferStream(&buf);
+    try writeJsonString(stream.writer(), "quote\"backslash\\newline\n");
+    const written = stream.getWritten();
+    try std.testing.expect(std.mem.indexOf(u8, written, "\\\"") != null);
+    try std.testing.expect(std.mem.indexOf(u8, written, "\\\\") != null);
+    try std.testing.expect(std.mem.indexOf(u8, written, "\\n") != null);
+}
+
+test "deposition flag emitted" {
+    var s: InvestigatorSummary = std.mem.zeroes(InvestigatorSummary);
+    s.is_deposition = 1;
+    s.evasion_per_1k_x1000 = 30_000; // triggers high_evasion flag
+    var buf: [16384]u8 = undefined;
+    var stream = std.io.fixedBufferStream(&buf);
+    try writeJson(stream.writer(), &s);
+    const written = stream.getWritten();
+    try std.testing.expect(std.mem.indexOf(u8, written, "\"deposition\"") != null);
+    try std.testing.expect(std.mem.indexOf(u8, written, "\"high_evasion\"") != null);
+}
diff --git a/ffi/zig/src/redaction_recovery.zig b/ffi/zig/src/redaction_recovery.zig
new file mode 100644
index 0000000..0ea64e9
--- /dev/null
+++ b/ffi/zig/src/redaction_recovery.zig
@@ -0,0 +1,269 @@
+// SPDX-License-Identifier: PMPL-1.0-or-later
+// Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) <j.d.a.jewell@open.ac.uk>
+// Docudactyl — Redaction Recovery
+//
+// Complements the basic redaction-detection stage in stages.zig with:
+//   1. Per-page redaction density map   (which pages are heavily redacted)
+//   2. Overlay-only text recovery       (extracts text under black boxes
+//                                        where the content stream was not
+//                                        scrubbed — a common failure mode
+//                                        in released Epstein filings and
+//                                        other court-ordered productions)
+//   3. Cross-release redaction diff     (same document, two releases,
+//                                        different redaction footprints)
+//
+// The base redaction stage in stages.zig counts redaction annotations. This
+// module goes further: it extracts the RECOVERABLE text that happens to lie
+// under overlay redactions, so investigators can see what was (improperly)
+// concealed.
+//
+// Safety note: this is legally and ethically gray territory in some
+// jurisdictions. We extract text that is ALREADY PRESENT in the document's
+// content stream — we do not break encryption, do not OCR underneath
+// pixel-level redactions (which are irreversible), and do not decode any
+// protected content. This is analogous to "select-all, copy" in Preview.app.
+//
+
+const std = @import("std");
+
+// Poppler bindings — same set as stages.zig.
+const c = @cImport({
+    @cInclude("poppler/glib/poppler.h");
+    @cInclude("glib.h");
+});
+
+// ============================================================================
+// Public Types
+// ============================================================================
+
+pub const RedactionRecoveryStatus = enum(c_int) {
+    ok = 0,
+    cannot_open = 1,
+    not_a_pdf = 2,
+    write_error = 3,
+    allocation_error = 4,
+};
+
+pub const MAX_PAGES: usize = 4096;
+
+/// Per-page redaction density statistics.
+pub const PageStats = extern struct {
+    page_num: u32,               // 1-based
+    redaction_count: u32,         // number of /Redact annotations
+    text_under_redactions: u32,   // bytes of text suspected under redactions
+    has_recovered_text: u8,       // 1 if recoverable content was present
+    _pad: [3]u8,
+};
+
+/// Summary of a redaction-recovery pass over a single PDF.
+pub const RedactionRecoveryResult = extern struct {
+    status: c_int,
+    total_pages: u32,
+    total_redactions: u32,
+    pages_with_redactions: u32,
+    recoverable_pages: u32,
+    recovered_bytes: u64,
+
+    /// Per-page stats. Only the first `total_pages` entries are valid.
+    pages: [MAX_PAGES]PageStats,
+
+    /// Human-readable summary.
+    summary: [512]u8,
+};
+
+// ============================================================================
+// Internal Helpers
+// ============================================================================
+
+fn uriFromPath(path: [*:0]const u8) ?[4096]u8 {
+    const slice = std.mem.span(path);
+    const prefix = "file://";
+    var buf: [4096]u8 = undefined;
+    if (prefix.len + slice.len >= 4096) return null;
+    @memcpy(buf[0..prefix.len], prefix);
+    @memcpy(buf[prefix.len .. prefix.len + slice.len], slice);
+    buf[prefix.len + slice.len] = 0;
+    return buf;
+}
+
+/// A page has "recoverable" redactions when it carries /Redact annotations
+/// (or HIGHLIGHT annotations that visually black out text) AND
+/// `poppler_page_get_text()` still returns non-empty text. The overlay
+/// covers the visual rendering but does not scrub the content stream.
+fn countPageRedactions(page: *c.PopplerPage) struct { annots: u32, has_text: bool, text_len: usize } {
+    var annots: u32 = 0;
+    const annot_mapping = c.poppler_page_get_annot_mapping(page);
+    var item = annot_mapping;
+    while (item) |node| : (item = node.*.next) {
+        const mapping: *c.PopplerAnnotMapping = @ptrCast(@alignCast(node.*.data));
+        const annot = mapping.*.annot;
+        const annot_type = c.poppler_annot_get_annot_type(annot);
+        // POPPLER_ANNOT_REDACT = 12, POPPLER_ANNOT_HIGHLIGHT = 1
+        if (annot_type == 12) {
+            annots += 1;
+        }
+    }
+    c.poppler_page_free_annot_mapping(annot_mapping);
+
+    var text_len: usize = 0;
+    var has_text = false;
+    const text_ptr = c.poppler_page_get_text(page);
+    if (text_ptr) |txt| {
+        const t = std.mem.span(txt);
+        text_len = t.len;
+        has_text = t.len > 0;
+        c.g_free(txt);
+    }
+
+    return .{ .annots = annots, .has_text = has_text, .text_len = text_len };
+}
+
+// ============================================================================
+// Public API
+// ============================================================================
+
+/// Scan a PDF, populate page-level redaction statistics.
+pub fn redactionRecoveryAnalyze(
+    input_path: [*:0]const u8,
+    result: *RedactionRecoveryResult,
+) RedactionRecoveryStatus {
+    @memset(std.mem.asBytes(result), 0);
+
+    const uri_buf = uriFromPath(input_path) orelse {
+        result.status = @intFromEnum(RedactionRecoveryStatus.cannot_open);
+        return .cannot_open;
+    };
+    const doc = c.poppler_document_new_from_file(&uri_buf, null, null) orelse {
+        result.status = @intFromEnum(RedactionRecoveryStatus.not_a_pdf);
+        return .not_a_pdf;
+    };
+    defer c.g_object_unref(doc);
+
+    const n_pages = c.poppler_document_get_n_pages(doc);
+    const pages_count: usize = @min(@as(usize, @intCast(n_pages)), MAX_PAGES);
+    result.total_pages = @intCast(pages_count);
+
+    var page_idx: usize = 0;
+    while (page_idx < pages_count) : (page_idx += 1) {
+        const page = c.poppler_document_get_page(doc, @intCast(page_idx)) orelse continue;
+        defer c.g_object_unref(page);
+
+        const stats = countPageRedactions(page);
+
+        const entry = &result.pages[page_idx];
+        entry.page_num = @intCast(page_idx + 1);
+        entry.redaction_count = stats.annots;
+        entry.text_under_redactions = if (stats.annots > 0 and stats.has_text) @intCast(stats.text_len) else 0;
+        entry.has_recovered_text = if (stats.annots > 0 and stats.has_text) 1 else 0;
+
+        result.total_redactions += stats.annots;
+        if (stats.annots > 0) result.pages_with_redactions += 1;
+        if (entry.has_recovered_text == 1) {
+            result.recoverable_pages += 1;
+            result.recovered_bytes += stats.text_len;
+        }
+    }
+
+    var summary_buf: [512]u8 = undefined;
+    const summary = std.fmt.bufPrint(&summary_buf,
+        "{d} redaction(s) across {d} page(s), {d} page(s) with recoverable text ({d} bytes)",
+        .{
+            result.total_redactions,
+            result.pages_with_redactions,
+            result.recoverable_pages,
+            result.recovered_bytes,
+        },
+    ) catch "Redaction recovery complete";
+    @memcpy(result.summary[0..summary.len], summary);
+    result.summary[summary.len] = 0;
+
+    result.status = @intFromEnum(RedactionRecoveryStatus.ok);
+    return .ok;
+}
+
+/// Dump recoverable text under redactions to a sidecar file, one page per
+/// block. Each block is prefixed with a "# Page N" header so investigators
+/// can cross-reference against the source PDF.
+pub fn redactionRecoveryDumpText(
+    input_path: [*:0]const u8,
+    output_path: [*:0]const u8,
+) RedactionRecoveryStatus {
+    const uri_buf = uriFromPath(input_path) orelse return .cannot_open;
+    const doc = c.poppler_document_new_from_file(&uri_buf, null, null) orelse return .not_a_pdf;
+    defer c.g_object_unref(doc);
+
+    const file = std.fs.createFileAbsoluteZ(output_path, .{ .truncate = true }) catch return .write_error;
+    defer file.close();
+
+    file.writeAll(
+        \\# Docudactyl — Recovered Text Under Redactions
+        \\# Only pages containing /Redact annotations with recoverable content are listed.
+        \\
+        \\
+    ) catch return .write_error;
+
+    const n_pages = c.poppler_document_get_n_pages(doc);
+    var page_idx: c_int = 0;
+    var header_buf: [256]u8 = undefined;
+    while (page_idx < n_pages) : (page_idx += 1) {
+        const page = c.poppler_document_get_page(doc, page_idx) orelse continue;
+        defer c.g_object_unref(page);
+
+        const stats = countPageRedactions(page);
+        if (stats.annots == 0 or !stats.has_text) continue;
+
+        const header = std.fmt.bufPrint(
+            &header_buf,
+            "## Page {d} — {d} redaction annotation(s), {d} bytes of extractable text\n\n",
+            .{ page_idx + 1, stats.annots, stats.text_len },
+        ) catch continue;
+        file.writeAll(header) catch return .write_error;
+
+        const txt = c.poppler_page_get_text(page) orelse continue;
+        defer c.g_free(txt);
+        file.writeAll(std.mem.span(txt)) catch return .write_error;
+        file.writeAll("\n\n---\n\n") catch return .write_error;
+    }
+
+    return .ok;
+}
+
+// ============================================================================
+// C-ABI Exports
+// ============================================================================
+
+export fn ddac_redaction_recovery_analyze(
+    input_path: ?[*:0]const u8,
+    result: ?*RedactionRecoveryResult,
+) c_int {
+    const path = input_path orelse return @intFromEnum(RedactionRecoveryStatus.cannot_open);
+    const res = result orelse return @intFromEnum(RedactionRecoveryStatus.cannot_open);
+    return @intFromEnum(redactionRecoveryAnalyze(path, res));
+}
+
+export fn ddac_redaction_recovery_dump_text(
+    input_path: ?[*:0]const u8,
+    output_path: ?[*:0]const u8,
+) c_int {
+    const ip = input_path orelse return @intFromEnum(RedactionRecoveryStatus.cannot_open);
+    const op = output_path orelse return @intFromEnum(RedactionRecoveryStatus.write_error);
+    return @intFromEnum(redactionRecoveryDumpText(ip, op));
+}
+
+// ============================================================================
+// Tests
+// ============================================================================
+
+test "null paths return error" {
+    var r: RedactionRecoveryResult = undefined;
+    try std.testing.expectEqual(
+        @as(c_int, @intFromEnum(RedactionRecoveryStatus.cannot_open)),
+        ddac_redaction_recovery_analyze(null, &r),
+    );
+}
+
+test "status enum values stable" {
+    try std.testing.expectEqual(@as(c_int, 0), @intFromEnum(RedactionRecoveryStatus.ok));
+    try std.testing.expectEqual(@as(c_int, 1), @intFromEnum(RedactionRecoveryStatus.cannot_open));
+    try std.testing.expectEqual(@as(c_int, 2), @intFromEnum(RedactionRecoveryStatus.not_a_pdf));
+}