EnforceAuth · boorad · May 5, 2026 · May 5, 2026 · May 5, 2026 · May 5, 2026
diff --git a/plans/todo/05-corpus-shakedown-followups.md → plans/done/05-corpus-shakedown-followups.md b/plans/todo/05-corpus-shakedown-followups.md → plans/done/05-corpus-shakedown-followups.md
diff --git a/src/deep/candidate.rs b/src/deep/candidate.rs
@@ -378,7 +378,7 @@ fn overlaps_any(
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::types::{AuthCategory, Confidence, Language, ScanPass};
+    use crate::types::{AuthCategory, Confidence, Language, ScanPass, Surface};
     use std::fs;
     use tempfile::tempdir;
 
@@ -396,6 +396,7 @@ mod tests {
             pattern_rule: None,
             rego_stub: None,
             pass: ScanPass::Structural,
+            surface: Surface::Backend,
         }
     }
 
@@ -769,7 +770,7 @@ mod tests {
         // Regression: a structural finding pointing at a deleted file used to
         // propagate `DeepError::Io` through `?`, killing the entire deep pass
         // even though deep mode is otherwise best-effort.
-        use crate::types::{AuthCategory, Confidence, Finding, ScanPass};
+        use crate::types::{AuthCategory, Confidence, Finding, ScanPass, Surface};
         let dir = tempdir().unwrap();
         // One escalation finding pointing at a file that doesn't exist.
         let bad = Finding {
@@ -785,6 +786,7 @@ mod tests {
             pattern_rule: None,
             rego_stub: None,
             pass: ScanPass::Structural,
+            surface: Surface::Backend,
         };
         // Should NOT propagate Io; should return Ok with the bad escalation
         // skipped. (No cold-region files either, so result is empty.)

diff --git a/src/deep/context.rs b/src/deep/context.rs
@@ -195,7 +195,7 @@ fn expand_inner(
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::types::{AuthCategory, Confidence, ScanPass};
+    use crate::types::{AuthCategory, Confidence, ScanPass, Surface};
     use std::fs;
     use std::path::PathBuf;
     use tempfile::tempdir;
@@ -214,6 +214,7 @@ mod tests {
             pattern_rule: None,
             rego_stub: None,
             pass: ScanPass::Structural,
+            surface: Surface::Backend,
         }
     }
 

diff --git a/src/deep/finding.rs b/src/deep/finding.rs
@@ -2,7 +2,7 @@
 
 use crate::deep::candidate::Candidate;
 use crate::scanner::matcher::compute_finding_id;
-use crate::types::{AuthCategory, Confidence, Finding, ScanPass};
+use crate::types::{AuthCategory, Confidence, Finding, ScanPass, Surface};
 use serde::Deserialize;
 use std::path::Path;
 
@@ -32,8 +32,14 @@ pub struct SemanticFinding {
 ///
 /// `scan_root` is required to read the file at `candidate.file` (relative)
 /// to populate `code_snippet` from the lines the model identified. If the
-/// file is unreadable (e.g. moved between scan and analyze), `code_snippet`
-/// falls back to the empty string — best-effort, do not fail the finding.
+/// file is unreadable (e.g. moved between scan and analyze), the snippet
+/// falls back to slicing [`Candidate::source_snippet`] for the same line
+/// range — that buffer was loaded by the structural pass so it's already
+/// in memory and represents the same source the model just analyzed.
+/// Empty `code_snippet` remains the last resort; downstream tools that
+/// surface "what was flagged" need *something* on every finding (corpus
+/// shakedown turned up 28 semantic findings with `code_snippet: ""`,
+/// which made deep-only buckets unreviewable).
 pub fn into_finding(
     sem: SemanticFinding,
     candidate: &Candidate,
@@ -85,8 +91,14 @@ pub fn into_finding(
         _ => format!("semantic-{}", sem.category.slug()),
     };
 
-    let code_snippet =
-        extract_lines(scan_root, &candidate.file, sem.line_start, sem.line_end).unwrap_or_default();
+    // Try the filesystem first — that's the source of truth and produces the
+    // same byte range a structural finding would. Fall back to slicing the
+    // candidate's expanded snippet (already in memory) when the file moved,
+    // permissions changed, or extract_lines bailed for any other reason.
+    // Last resort: empty string. We never *fail* a finding on snippet read.
+    let code_snippet = extract_lines(scan_root, &candidate.file, sem.line_start, sem.line_end)
+        .or_else(|| slice_candidate_snippet(candidate, sem.line_start, sem.line_end))
+        .unwrap_or_default();
 
     let id = compute_finding_id(
         &rule_id,
@@ -112,9 +124,53 @@ pub fn into_finding(
         pattern_rule: Some(rule_id),
         rego_stub: None, // structural-only; semantic findings have no rego template
         pass: ScanPass::Semantic,
+        // Surface follows the source file, not the pass — same path
+        // heuristic as structural findings so a deep-pass `web/src/foo.ts`
+        // finding is tagged Frontend just like its structural twin would be.
+        surface: Surface::classify(&candidate.file),
     }
 }
 
+/// Slice [`candidate.source_snippet`] to the model-reported line range,
+/// translating absolute (1-based, file-relative) line numbers into offsets
+/// within the snippet. Returns `None` if the snippet is empty, the model's
+/// range falls outside the candidate window, or the requested offsets land
+/// past the end of the snippet (which can happen when the snippet was
+/// truncated to fit `max_prompt_chars`).
+///
+/// `clamp_to_candidate` (in `deep::mod`) already keeps `sem` inside the
+/// candidate window before this gets called, but we re-check defensively
+/// rather than panic if a future caller skips the clamp.
+fn slice_candidate_snippet(
+    candidate: &crate::deep::candidate::Candidate,
+    sem_start: usize,
+    sem_end: usize,
+) -> Option<String> {
+    if candidate.source_snippet.is_empty() {
+        return None;
+    }
+    if sem_start == 0
+        || sem_end < sem_start
+        || sem_start < candidate.line_start
+        || sem_end > candidate.line_end
+    {
+        return None;
+    }
+    let lines: Vec<&str> = candidate.source_snippet.lines().collect();
+    if lines.is_empty() {
+        return None;
+    }
+    // Translate file-relative 1-based line numbers into snippet-relative
+    // 0-based offsets. Snippet line 0 corresponds to `candidate.line_start`.
+    let start_idx = sem_start - candidate.line_start;
+    let end_idx = sem_end - candidate.line_start;
+    if start_idx >= lines.len() {
+        return None;
+    }
+    let end_inclusive = end_idx.min(lines.len() - 1);
+    Some(lines[start_idx..=end_inclusive].join("\n"))
+}
+
 /// Read the file at `scan_root.join(relative)` and return lines `[start, end]`
 /// joined by `\n`. Returns `None` on read error or out-of-range input.
 fn extract_lines(scan_root: &Path, relative: &Path, start: usize, end: usize) -> Option<String> {
@@ -126,7 +182,16 @@ fn extract_lines(scan_root: &Path, relative: &Path, start: usize, end: usize) ->
     if lines.is_empty() {
         return None;
     }
-    let s = (start - 1).min(lines.len() - 1);
+    // Bail (instead of clamping to the last line) when the requested range
+    // starts past the end of the file. Otherwise the caller's fallback chain
+    // — `extract_lines(...).or_else(slice_candidate_snippet(...))` in
+    // `build_finding_from_semantic` — never reaches the candidate snippet on
+    // a shrunken/replaced file, and we'd silently attach an unrelated
+    // last-line excerpt to the finding.
+    if start > lines.len() {
+        return None;
+    }
+    let s = start - 1;
     let e = end.min(lines.len()).max(s + 1);
     Some(lines[s..e].join("\n"))
 }
@@ -180,6 +245,7 @@ mod tests {
             pattern_rule: pattern_rule.map(String::from),
             rego_stub: None,
             pass: ScanPass::Structural,
+            surface: Surface::Backend,
         }
     }
 
@@ -317,7 +383,8 @@ mod tests {
     #[test]
     fn into_finding_falls_back_to_empty_snippet_on_read_error() {
         let dir = tempdir().unwrap();
-        // File doesn't exist.
+        // File doesn't exist AND the candidate has no source_snippet to
+        // fall back to → last-resort empty string.
         let cand = make_candidate("nonexistent.ts", Language::TypeScript);
         let f = into_finding(make_semantic(1, 5), &cand, None, dir.path());
         assert_eq!(f.code_snippet, "");
@@ -327,6 +394,79 @@ mod tests {
         assert_eq!(f.line_end, 5);
     }
 
+    #[test]
+    fn into_finding_falls_back_to_candidate_snippet_when_file_unreadable() {
+        // Regression for corpus shakedown: 28 semantic findings shipped with
+        // `code_snippet: ""` because filesystem read failed and we had no
+        // fallback. The candidate's `source_snippet` is the same source the
+        // model just analyzed — slice it instead of dropping the snippet.
+        let dir = tempdir().unwrap();
+        // Note: file is *not* created — extract_lines must fail.
+        let mut cand = make_candidate("missing.ts", Language::TypeScript);
+        cand.line_start = 10;
+        cand.line_end = 14;
+        cand.source_snippet = "line 10\nline 11\nline 12\nline 13\nline 14".to_string();
+
+        // Model reports lines 11-12 within the candidate window.
+        let sem = make_semantic(11, 12);
+        let f = into_finding(sem, &cand, None, dir.path());
+
+        assert!(f.code_snippet.contains("line 11"));
+        assert!(f.code_snippet.contains("line 12"));
+        assert!(!f.code_snippet.contains("line 10"));
+        assert!(!f.code_snippet.contains("line 13"));
+    }
+
+    #[test]
+    fn slice_candidate_snippet_rejects_ranges_outside_window() {
+        let cand = Candidate {
+            kind: CandidateKind::ColdRegion,
+            file: PathBuf::from("a.ts"),
+            language: Language::TypeScript,
+            line_start: 10,
+            line_end: 14,
+            source_snippet: "line 10\nline 11\nline 12\nline 13\nline 14".to_string(),
+            imports: Vec::new(),
+            original_finding_id: None,
+            seed_category: None,
+        };
+        // Below window.
+        assert!(slice_candidate_snippet(&cand, 5, 8).is_none());
+        // Above window.
+        assert!(slice_candidate_snippet(&cand, 20, 22).is_none());
+        // Reversed.
+        assert!(slice_candidate_snippet(&cand, 12, 11).is_none());
+        // Zero start (defensive — clamp_to_candidate normally drops these).
+        assert!(slice_candidate_snippet(&cand, 0, 12).is_none());
+        // Empty snippet.
+        let mut empty = cand.clone();
+        empty.source_snippet.clear();
+        assert!(slice_candidate_snippet(&empty, 11, 12).is_none());
+    }
+
+    #[test]
+    fn slice_candidate_snippet_clamps_when_snippet_was_truncated() {
+        // Truncation at `max_prompt_chars` can leave the snippet shorter
+        // than `[candidate.line_start, candidate.line_end]` would imply.
+        // Tail offsets must clamp instead of panicking on out-of-bounds.
+        let cand = Candidate {
+            kind: CandidateKind::ColdRegion,
+            file: PathBuf::from("a.ts"),
+            language: Language::TypeScript,
+            line_start: 10,
+            line_end: 20, // candidate window claims 11 lines …
+            source_snippet: "line 10\nline 11\nline 12".to_string(), // … but snippet has 3
+            imports: Vec::new(),
+            original_finding_id: None,
+            seed_category: None,
+        };
+        // Model points at lines 11-15 — only 11 and 12 are in the truncated
+        // snippet, so we should get those two and not panic.
+        let got = slice_candidate_snippet(&cand, 11, 15).unwrap();
+        assert!(got.contains("line 11"));
+        assert!(got.contains("line 12"));
+    }
+
     #[test]
     fn ranges_overlap_covers_inclusive_boundaries() {
         // Inclusive on both ends: touching at a single line counts as overlap.

diff --git a/src/deep/merge.rs b/src/deep/merge.rs
@@ -56,7 +56,7 @@ fn range_overlap_fraction(a_start: usize, a_end: usize, b_start: usize, b_end: u
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::types::{AuthCategory, Confidence, Language, ScanPass};
+    use crate::types::{AuthCategory, Confidence, Language, ScanPass, Surface};
     use std::path::PathBuf;
 
     fn finding(
@@ -79,6 +79,7 @@ mod tests {
             pattern_rule: None,
             rego_stub: None,
             pass,
+            surface: Surface::Backend,
         }
     }
 

diff --git a/src/deep/prompt.rs b/src/deep/prompt.rs
@@ -305,7 +305,7 @@ fn language_fence(lang: Language) -> &'static str {
 mod tests {
     use super::*;
     use crate::deep::candidate::{Candidate, CandidateKind};
-    use crate::types::{AuthCategory, Confidence, ScanPass};
+    use crate::types::{AuthCategory, Confidence, ScanPass, Surface};
     use std::path::PathBuf;
 
     fn candidate_with_imports(language: Language, imports: Vec<String>) -> Candidate {
@@ -336,6 +336,7 @@ mod tests {
             pattern_rule: Some("ts-custom-1".into()),
             rego_stub: None,
             pass: ScanPass::Structural,
+            surface: Surface::Backend,
         }
     }
 

diff --git a/src/mcp/tools.rs b/src/mcp/tools.rs
@@ -25,7 +25,7 @@ use crate::rego::templates::{apply_confidence_wrapping, generate_default_stub, r
 use crate::rego::validator::validate_rego;
 use crate::rules::PatternRule;
 use crate::scanner;
-use crate::types::{AuthCategory, Confidence, Finding, Language, ScanPass};
+use crate::types::{AuthCategory, Confidence, Finding, Language, ScanPass, Surface};
 
 const DEFAULT_MAX_PROMPT_CHARS: usize = 16_000;
 
@@ -607,6 +607,7 @@ fn analyze_snippet(_ctx: &ServerContext, args: &Value) -> Result<Value, String>
         pattern_rule: s.pattern_rule.clone(),
         rego_stub: None,
         pass: ScanPass::Structural,
+        surface: Surface::classify(&PathBuf::from(&parsed.file)),
     });
 
     let rendered = render(&PromptInputs {

diff --git a/src/output/text.rs b/src/output/text.rs
@@ -2,7 +2,7 @@ use std::io::Write;
 use std::path::Path;
 
 use crate::error::Result;
-use crate::types::Finding;
+use crate::types::{Finding, Surface};
 
 pub fn print(
     findings: &[Finding],
@@ -21,13 +21,22 @@ pub fn print(
             writeln!(writer)?;
         }
 
+        // Tag frontend-surface findings inline so reviewers can scan past
+        // the UI-state noise (e.g. `web/src/foo.ts` ownership matches that
+        // are "is this me?" checks, not security gates) without diving into
+        // the JSON. Backend is the default — no tag, less visual noise.
+        let surface_tag = match finding.surface {
+            Surface::Frontend => "  [frontend]",
+            Surface::Backend => "",
+        };
         writeln!(
             writer,
-            "  {}:{}  [{}]  {}",
+            "  {}:{}  [{}]  {}{}",
             finding.file.display(),
             finding.line_start,
             finding.category,
             finding.confidence,
+            surface_tag,
         )?;
 
         writeln!(writer, "  {}", finding.description)?;

diff --git a/src/rego/grouping.rs b/src/rego/grouping.rs
@@ -234,6 +234,7 @@ mod tests {
             pattern_rule: Some("test-rule".into()),
             rego_stub: None,
             pass: ScanPass::Structural,
+            surface: Surface::Backend,
         }];
 
         let files = group_findings(&findings, "app", Path::new("./policies"));
@@ -260,6 +261,7 @@ mod tests {
                 pattern_rule: None,
                 rego_stub: None,
                 pass: ScanPass::Structural,
+                surface: Surface::Backend,
             },
             Finding {
                 id: "b".into(),
@@ -274,6 +276,7 @@ mod tests {
                 pattern_rule: None,
                 rego_stub: None,
                 pass: ScanPass::Structural,
+                surface: Surface::Backend,
             },
         ];
 

diff --git a/src/scanner/matcher.rs b/src/scanner/matcher.rs
@@ -8,7 +8,7 @@ use tree_sitter::{Query, QueryCursor, Tree};
 
 use crate::error::{Result, ZiftError};
 use crate::rules::{CrossPredicate, PatternRule, Predicate};
-use crate::types::{Confidence, Finding, Language, ScanPass};
+use crate::types::{Confidence, Finding, Language, ScanPass, Surface};
 
 pub struct CompiledRule<'a> {
     pub rule: &'a PatternRule,
@@ -163,6 +163,7 @@ pub fn execute_query(
                 crate::rego::render_template(tmpl, &owned)
             }),
             pass: ScanPass::Structural,
+            surface: Surface::classify(file_path),
         });
     }
 
@@ -1277,6 +1278,7 @@ match = ".*"
             pattern_rule: None,
             rego_stub: None,
             pass: ScanPass::Structural,
+            surface: Surface::Backend,
         };
         let findings = vec![f.clone(), f];
         let deduped = dedup_findings(findings);