From 647c207397c8332ca205477936f769fe678bea73 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Wed, 13 May 2026 21:28:28 -0400 Subject: [PATCH 1/4] feat(scanner): skip files inside policy implementation directories MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Files that live under a policy-engine implementation directory (`internal/authz/**`, `pkg/policy/**`, etc.) are themselves the policy engine, not consumers of one. Running structural rules against them only produces noise since the rules flag the embedded-authz shape that those files exist to implement. Add `is_policy_implementation_path` mirroring `is_policy_path` for filesystem paths (any directory component contains a `POLICY_INDICATORS` substring, filename intentionally excluded) and short-circuit such files before structural matching runs. They contribute neither findings nor enforcement points. On the OCP corpus this drops externalized_pct from 65% to 78% (4 fewer findings, all from `internal/authz/authz_test.go`). The remaining 4 findings are case #1 in #66 — cross-file flow within a Go package — which is the follow-up. Refs #66 --- src/scanner/imports.rs | 77 +++++++++++++++++++++++++++++ src/scanner/mod.rs | 15 ++++++ tests/scanner_enforcement_points.rs | 46 +++++++++++++++++ 3 files changed, 138 insertions(+) diff --git a/src/scanner/imports.rs b/src/scanner/imports.rs index b4412a6..29fa9ef 100644 --- a/src/scanner/imports.rs +++ b/src/scanner/imports.rs @@ -1,4 +1,5 @@ use std::collections::HashSet; +use std::path::Path; use streaming_iterator::StreamingIterator; @@ -23,6 +24,28 @@ fn is_policy_path(source: &str) -> bool { POLICY_INDICATORS.iter().any(|ind| lower.contains(ind)) } +/// Check if a file's path places it *inside* a policy-engine implementation +/// module — e.g. `internal/authz/authz_test.go` lives under a directory whose +/// name contains `authz`. Files like these are themselves the policy engine, +/// so the structural rules (which flag *consumers* of authz primitives) would +/// only produce noise. The scanner uses this to skip such files entirely. +/// +/// Heuristic: any directory component along the path (excluding the filename +/// itself) contains a `POLICY_INDICATORS` substring, case-insensitive. The +/// filename is intentionally excluded — a top-level `authz.go` could plausibly +/// be either the implementation or a consumer; if it really is the +/// implementation, the user can exclude it via config. Bypassing here is +/// reserved for the unambiguous "in a policy directory" shape. +pub fn is_policy_implementation_path(rel_path: &Path) -> bool { + let Some(parent) = rel_path.parent() else { + return false; + }; + parent.components().any(|component| { + let s = component.as_os_str().to_string_lossy().to_lowercase(); + POLICY_INDICATORS.iter().any(|ind| s.contains(ind)) + }) +} + /// Tree-sitter query for named imports: `import { foo } from 'bar'` /// and aliased: `import { foo as bar } from 'baz'`. /// Captures the binding actually used in code (the alias when renamed, @@ -1777,4 +1800,58 @@ const cached = handler; let imports = find_policy_imports(&tree, source.as_bytes(), Language::TypeScript); assert!(imports.is_empty(), "got: {imports:?}"); } + + // ---------- Policy-implementation path bypass ---------- + + #[test] + fn implementation_path_matches_policy_directory_components() { + // The shapes that motivated this check: files that live under a + // policy-engine module directory, where structural rules would + // only flag the engine's own internals. + assert!(is_policy_implementation_path(Path::new( + "internal/authz/authz_test.go" + ))); + assert!(is_policy_implementation_path(Path::new( + "pkg/policy/check.go" + ))); + assert!(is_policy_implementation_path(Path::new("src/opa/eval.ts"))); + assert!(is_policy_implementation_path(Path::new( + "lib/cedar/engine.ts" + ))); + // Indicator-bearing component anywhere along the path counts — + // not just the immediate parent directory. + assert!(is_policy_implementation_path(Path::new( + "services/policy/src/check.ts" + ))); + } + + #[test] + fn implementation_path_is_case_insensitive() { + assert!(is_policy_implementation_path(Path::new( + "Internal/Authz/Handler.cs" + ))); + assert!(is_policy_implementation_path(Path::new( + "src/Policy/Engine.java" + ))); + } + + #[test] + fn implementation_path_ignores_filename_only_match() { + // A top-level `authz.go` shouldn't auto-bypass just because the + // filename mentions a policy term — the directory placement is the + // signal, not the basename. Users with a top-level implementation + // file can exclude it via config. + assert!(!is_policy_implementation_path(Path::new("authz.go"))); + assert!(!is_policy_implementation_path(Path::new("policy.ts"))); + } + + #[test] + fn implementation_path_skips_non_policy_directories() { + assert!(!is_policy_implementation_path(Path::new( + "src/handlers/orders.ts" + ))); + assert!(!is_policy_implementation_path(Path::new( + "internal/database/bundle_status.go" + ))); + } } diff --git a/src/scanner/mod.rs b/src/scanner/mod.rs index 7c55649..45b247a 100644 --- a/src/scanner/mod.rs +++ b/src/scanner/mod.rs @@ -117,6 +117,21 @@ pub fn scan( let rel_path = file.path.strip_prefix(root).unwrap_or(&file.path); + // Files that live *inside* a policy-engine implementation directory + // (`internal/authz/**`, `pkg/policy/**`, etc.) are themselves the + // policy engine, not consumers of one. Structural rules flag the + // embedded-authz shape, so running them here is guaranteed noise. + // Skip the file entirely — these don't count as findings or as + // enforcement points; they don't participate in the externalization + // metric at all. + if imports::is_policy_implementation_path(rel_path) { + tracing::debug!( + "skipping policy implementation file: {}", + rel_path.display(), + ); + continue; + } + // Check for policy-engine imports in this file let policy_imports = imports::find_policy_imports(&tree, source.as_bytes(), file.language); diff --git a/tests/scanner_enforcement_points.rs b/tests/scanner_enforcement_points.rs index 2eb5b0e..877ac00 100644 --- a/tests/scanner_enforcement_points.rs +++ b/tests/scanner_enforcement_points.rs @@ -336,6 +336,52 @@ public class OrderController { ); } +#[test] +fn in_package_policy_implementation_file_is_skipped() { + // OCP case: `internal/authz/authz_test.go` lives in `package authz` and + // calls policy constructors directly (no import — same package). The + // structural rules still match those call sites, but the file *is* the + // policy engine, not a consumer. The path-based bypass should drop it + // before structural matching runs: no findings, no enforcement points. + let dir = tempdir().unwrap(); + let policy_dir = dir.path().join("internal").join("authz"); + fs::create_dir_all(&policy_dir).unwrap(); + fs::write( + policy_dir.join("authz_test.go"), + r#"package authz + +func TestNewAccess(t *testing.T) { + a := NewAccess() + _ = a.WithPrincipal("bob").WithResource("doc").Allow() +} +"#, + ) + .unwrap(); + + let config = ZiftConfig::default(); + let loaded_rules = rules::load_rules(None, &config).expect("embedded rules load"); + let args = ScanArgs { + path: dir.path().to_path_buf(), + ..ScanArgs::default() + }; + + let result = scanner::scan(dir.path(), &loaded_rules, &args, &config).unwrap(); + + assert!( + result.findings.is_empty(), + "in-package policy implementation file leaked findings: {:?}", + result + .findings + .iter() + .map(|f| (f.pattern_rule.clone(), f.line_start)) + .collect::>(), + ); + assert_eq!( + result.enforcement_points, 0, + "in-package policy implementation file should not be counted as an enforcement point", + ); +} + #[test] fn externalized_rules_count_without_policy_import_shortcut() { let cases = [ From 0c53a61a2edf0ac1f2c1fadef3fd794974dc0a2d Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Wed, 13 May 2026 21:48:26 -0400 Subject: [PATCH 2/4] feat(scanner): propagate policy bindings across files in a Go package Go's idiomatic DI pattern wires a policy primitive in one file (`database.go`: `&Database{accessFactory: authz.NewAccess}`) and calls it from another (`bundle_status.go`: `d.accessFactory()...`). Per-file propagation never sees the wiring edge from the consumer's perspective, so the call site looks like raw embedded authz. Treat the Go package directory as one propagation domain: union the local imports and propagation edges across every `.go` file under the same parent dir, then propagate to fixed point. The resulting binding set is what each file in the package uses for the enforcement-point check. On the OCP corpus repo this takes externalization from 65% to 100% (the four `bundle_status.go`-style cross-file consumers that the per-file pass missed). Closes part of #66 (case #1: cross-file flow within a package). The path-bypass for in-package implementation files (case #2) shipped in the previous commit on this branch. --- src/scanner/imports.rs | 107 ++++++++++++++++++++++++++++ src/scanner/mod.rs | 100 ++++++++++++++++++++++++-- tests/scanner_enforcement_points.rs | 82 +++++++++++++++++++++ 3 files changed, 285 insertions(+), 4 deletions(-) diff --git a/src/scanner/imports.rs b/src/scanner/imports.rs index 29fa9ef..ab9854b 100644 --- a/src/scanner/imports.rs +++ b/src/scanner/imports.rs @@ -171,6 +171,39 @@ pub fn find_policy_imports( bindings } +/// Compute policy bindings for a Go package by treating every `.go` file in +/// the package directory as one propagation domain. Mirrors +/// `find_policy_imports` for the single-file case but unions the local +/// imports and propagation edges across the whole package before +/// propagating to fixed point. +/// +/// Why: Go's idiomatic DI shape wires a policy primitive in one file and +/// calls it from another. The OCP corpus repo is the motivating case — +/// `internal/database/database.go` does +/// `&Database{accessFactory: authz.NewAccess}`, and 40+ call sites in +/// sibling files (`bundle_status.go`, etc.) reach the same field via +/// `d.accessFactory()...`. Per-file propagation never sees the +/// `accessFactory: authz.NewAccess` edge from the consumer's perspective, +/// so the call site looks like raw embedded authz. Unioning at the package +/// level matches Go's own scoping rules: package-private identifiers are +/// visible across files in the same directory, not across packages. +pub fn find_go_package_policy_imports<'a, I>(files: I) -> HashSet +where + I: IntoIterator, +{ + let mut bindings: HashSet = HashSet::new(); + let mut edges: Vec<(String, String)> = Vec::new(); + for (tree, source) in files { + bindings.extend(find_go_policy_imports(tree, source)); + edges.extend(extract_propagation_edges(tree, source, Language::Go)); + } + if bindings.is_empty() { + return bindings; + } + propagate_to_fixed_point(&mut bindings, &edges); + bindings +} + /// Iteratively grow `bindings` by adding any edge LHS whose RHS textually /// mentions a known binding. Stops when no edge changed the set. /// @@ -1801,6 +1834,80 @@ const cached = handler; assert!(imports.is_empty(), "got: {imports:?}"); } + // ---------- Go package-scoped propagation ---------- + + #[test] + fn go_package_propagates_across_files() { + // OCP-shaped: one file in the package imports `authz` and stashes + // its constructor on a struct field; a sibling file with no policy + // imports of its own calls the field via the struct receiver. The + // per-file pass would miss the consumer entirely; the package pass + // must surface `accessFactory` as a binding visible to both files. + let producer = r#" +package database + +import "github.com/example/authz" + +type Database struct { + accessFactory func() Access +} + +func New() *Database { + return &Database{accessFactory: authz.NewAccess} +} +"#; + let consumer = r#" +package database + +func (d *Database) checkBundle() { + d.accessFactory().WithPrincipal("bob") +} +"#; + let producer_tree = parse_lang(producer, Language::Go); + let consumer_tree = parse_lang(consumer, Language::Go); + + let bindings = find_go_package_policy_imports([ + (&producer_tree, producer.as_bytes()), + (&consumer_tree, consumer.as_bytes()), + ]); + + assert!(bindings.contains("authz")); + assert!( + bindings.contains("accessFactory"), + "expected the cross-file edge to propagate; got: {bindings:?}" + ); + assert!(is_enforcement_point( + "d.accessFactory().WithPrincipal(\"bob\")", + &bindings, + )); + } + + #[test] + fn go_package_returns_empty_when_no_file_imports_policy() { + // If no file in the package has a policy import, propagation has no + // seed and the result must be empty even when files reference each + // other through edges. + let a = r#" +package svc + +import "github.com/example/utils" + +func wire() any { + return utils.New +} +"#; + let b = r#" +package svc + +var helper = wire +"#; + let at = parse_lang(a, Language::Go); + let bt = parse_lang(b, Language::Go); + + let bindings = find_go_package_policy_imports([(&at, a.as_bytes()), (&bt, b.as_bytes())]); + assert!(bindings.is_empty(), "got: {bindings:?}"); + } + // ---------- Policy-implementation path bypass ---------- #[test] diff --git a/src/scanner/mod.rs b/src/scanner/mod.rs index 45b247a..678ed25 100644 --- a/src/scanner/mod.rs +++ b/src/scanner/mod.rs @@ -3,8 +3,8 @@ pub mod imports; pub mod matcher; pub mod parser; -use std::collections::HashMap; -use std::path::Path; +use std::collections::{HashMap, HashSet}; +use std::path::{Path, PathBuf}; use crate::cli::ScanArgs; use crate::config::ZiftConfig; @@ -71,6 +71,14 @@ pub fn scan( compiled_cache.insert((*lang, *is_tsx_jsx), compiled_rules); } + // Pre-compute per-Go-package policy bindings. Go's scoping makes the + // package directory the natural propagation domain — bindings declared + // in one file are visible to siblings via package-private identifiers. + // Without this pass, a consumer file with no policy imports of its own + // would never recognize a call like `d.accessFactory()` even when a + // sibling file wired `accessFactory: authz.NewAccess`. + let go_package_bindings = build_go_package_bindings(root, &files); + let mut ts_parser = tree_sitter::Parser::new(); let mut all_findings = Vec::new(); // `enforcement_points` counts call sites that *would* have matched a @@ -132,8 +140,20 @@ pub fn scan( continue; } - // Check for policy-engine imports in this file - let policy_imports = imports::find_policy_imports(&tree, source.as_bytes(), file.language); + // Check for policy-engine imports. For Go we use the package-level + // binding set computed in the pre-pass (a superset of this file's + // local bindings, including any propagated from sibling files). For + // every other language we stay per-file — cross-file flow there + // needs a project-wide symbol table that isn't built yet. + let policy_imports = if file.language == Language::Go { + file.path + .parent() + .and_then(|d| go_package_bindings.get(d)) + .cloned() + .unwrap_or_default() + } else { + imports::find_policy_imports(&tree, source.as_bytes(), file.language) + }; let compiled_rules = &compiled_cache[&(file.language, file.is_tsx_jsx)]; for compiled in compiled_rules { @@ -197,3 +217,75 @@ pub fn scan( enforcement_points, }) } + +/// Group Go files by their parent directory (=Go package) and compute the +/// union of imports + propagation edges across each group, returning the +/// per-package binding set keyed by directory. +/// +/// Files inside policy-engine implementation directories (`internal/authz/` +/// etc.) are skipped here for the same reason the main loop skips them: the +/// files themselves *are* the policy engine, and any bindings they contribute +/// are noise relative to consumer-side detection. +/// +/// We re-parse Go files here rather than threading a tree cache through the +/// main loop. The double parse is cheap (tree-sitter Go is fast and the +/// number of `.go` files in real repos is bounded) and the simpler control +/// flow is worth it; if profiling later flags this, the obvious next step +/// is to pre-parse once and pass the trees through. +fn build_go_package_bindings( + root: &Path, + files: &[discovery::DiscoveredFile], +) -> HashMap> { + let mut by_dir: HashMap> = HashMap::new(); + for file in files { + if file.language != Language::Go { + continue; + } + let rel = file.path.strip_prefix(root).unwrap_or(&file.path); + if imports::is_policy_implementation_path(rel) { + continue; + } + let Some(dir) = file.path.parent() else { + continue; + }; + by_dir + .entry(dir.to_path_buf()) + .or_default() + .push(&file.path); + } + + let mut result: HashMap> = HashMap::new(); + let mut ts_parser = tree_sitter::Parser::new(); + + for (dir, paths) in by_dir { + let mut parsed: Vec<(tree_sitter::Tree, Vec)> = Vec::with_capacity(paths.len()); + for path in paths { + let source = match std::fs::read_to_string(path) { + Ok(s) => s, + Err(e) => { + tracing::warn!("skipping {} during package scan: {}", path.display(), e); + continue; + } + }; + let tree = match parser::parse_source( + &mut ts_parser, + source.as_bytes(), + Language::Go, + false, + ) { + Ok(t) => t, + Err(e) => { + tracing::warn!("skipping {} during package scan: {}", path.display(), e); + continue; + } + }; + parsed.push((tree, source.into_bytes())); + } + let bindings = + imports::find_go_package_policy_imports(parsed.iter().map(|(t, s)| (t, s.as_slice()))); + if !bindings.is_empty() { + result.insert(dir, bindings); + } + } + result +} diff --git a/tests/scanner_enforcement_points.rs b/tests/scanner_enforcement_points.rs index 877ac00..6b0d7b2 100644 --- a/tests/scanner_enforcement_points.rs +++ b/tests/scanner_enforcement_points.rs @@ -226,6 +226,88 @@ func check() { ); } +#[test] +fn go_package_propagation_reroutes_cross_file_consumer() { + // OCP-shaped: one file in `package database` imports `authz` and wires + // its constructor onto a struct field; a sibling consumer file with no + // imports of its own calls the field via the receiver. Per-file scan + // sees nothing in the consumer. Package-scoped propagation must surface + // `accessFactory` as a policy binding so the consumer's call counts as + // an enforcement point, not an embedded finding. + let dir = tempdir().unwrap(); + let pkg_dir = dir.path().join("internal").join("database"); + fs::create_dir_all(&pkg_dir).unwrap(); + fs::write( + pkg_dir.join("database.go"), + r#"package database + +import "github.com/example/authz" + +type Database struct { + accessFactory func() any +} + +func New() *Database { + return &Database{accessFactory: authz.NewAccess} +} +"#, + ) + .unwrap(); + fs::write( + pkg_dir.join("bundle_status.go"), + r#"package database + +func (d *Database) checkBundle() { + _ = d.accessFactory().WithPrincipal("bob").WithResource("bundles") +} +"#, + ) + .unwrap(); + + let config = ZiftConfig::default(); + let loaded_rules = rules::load_rules(None, &config).expect("embedded rules load"); + let args = ScanArgs { + path: dir.path().to_path_buf(), + ..ScanArgs::default() + }; + + let result = scanner::scan(dir.path(), &loaded_rules, &args, &config).unwrap(); + + assert!( + result.enforcement_points >= 1, + "expected the consumer's d.accessFactory().WithPrincipal(...) chain to \ + count as an enforcement point via cross-file propagation; got {} (findings: {:?})", + result.enforcement_points, + result + .findings + .iter() + .map(|f| ( + f.pattern_rule.clone(), + f.file.display().to_string(), + f.line_start + )) + .collect::>(), + ); + assert!( + !result + .findings + .iter() + .any(|f| f.file.ends_with("bundle_status.go") + && f.pattern_rule.as_deref() == Some("go-access-descriptor-builder")), + "consumer file leaked an embedded finding — package-scoped propagation \ + should have rerouted the call: {:?}", + result + .findings + .iter() + .map(|f| ( + f.pattern_rule.clone(), + f.file.display().to_string(), + f.line_start + )) + .collect::>(), + ); +} + #[test] fn enforcement_points_increments_for_python_authz_import() { // Use a `check_*_permission` shape so it actually trips a structural rule From 7c8959d6a389a31ac85854090518136e648bea8a Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Wed, 13 May 2026 21:56:50 -0400 Subject: [PATCH 3/4] refactor(scanner): address review feedback on cross-file policy bindings - Upgrade the policy-implementation-path skip log from debug to warn so accidental bypasses are visible (the indicator set includes broad terms like `enforce`/`opa`/`policy` that can match legitimate consumer directories). Document the false-positive risk in the rustdoc for is_policy_implementation_path. - Reorder find_go_package_policy_imports to collect bindings first and short-circuit on empty before walking propagation edges, matching the per-file find_policy_imports path and avoiding wasted edge extraction on policy-free packages. - Note in build_go_package_bindings that same-directory does not imply same Go package (`_test.go` and build constraints can split files), and explain why the union is safe under today's monotonic propagation. --- src/scanner/imports.rs | 29 ++++++++++++++++++++++------- src/scanner/mod.rs | 14 ++++++++++++-- 2 files changed, 34 insertions(+), 9 deletions(-) diff --git a/src/scanner/imports.rs b/src/scanner/imports.rs index ab9854b..a88d682 100644 --- a/src/scanner/imports.rs +++ b/src/scanner/imports.rs @@ -36,6 +36,14 @@ fn is_policy_path(source: &str) -> bool { /// be either the implementation or a consumer; if it really is the /// implementation, the user can exclude it via config. Bypassing here is /// reserved for the unambiguous "in a policy directory" shape. +/// +/// False-positive risk: the indicator set is shared with import-path matching +/// and includes broad terms (`enforce`, `opa`, `policy`). Consumer directories +/// like `services/enforcement/` or any path with `opa` as a substring will be +/// silently skipped — no findings, no enforcement points. The scanner surfaces +/// a `warn!` per skipped file so accidental bypasses are visible in logs; if a +/// legitimate consumer directory is being dropped, the user can either rename +/// it or exclude it via config (and we can narrow the indicator set later). pub fn is_policy_implementation_path(rel_path: &Path) -> bool { let Some(parent) = rel_path.parent() else { return false; @@ -172,10 +180,10 @@ pub fn find_policy_imports( } /// Compute policy bindings for a Go package by treating every `.go` file in -/// the package directory as one propagation domain. Mirrors -/// `find_policy_imports` for the single-file case but unions the local -/// imports and propagation edges across the whole package before -/// propagating to fixed point. +/// the package directory as one propagation domain. Same shape as +/// `find_policy_imports` for the single-file case — collect imports, then +/// (if any) collect propagation edges and run to fixed point — but with +/// imports and edges unioned across every file in the package. /// /// Why: Go's idiomatic DI shape wires a policy primitive in one file and /// calls it from another. The OCP corpus repo is the motivating case — @@ -191,15 +199,22 @@ pub fn find_go_package_policy_imports<'a, I>(files: I) -> HashSet where I: IntoIterator, { + // Materialize so we can do two passes — bindings first, then edges + // only when there's something to propagate. + let files: Vec<(&tree_sitter::Tree, &[u8])> = files.into_iter().collect(); + let mut bindings: HashSet = HashSet::new(); - let mut edges: Vec<(String, String)> = Vec::new(); - for (tree, source) in files { + for (tree, source) in &files { bindings.extend(find_go_policy_imports(tree, source)); - edges.extend(extract_propagation_edges(tree, source, Language::Go)); } if bindings.is_empty() { return bindings; } + + let mut edges: Vec<(String, String)> = Vec::new(); + for (tree, source) in &files { + edges.extend(extract_propagation_edges(tree, source, Language::Go)); + } propagate_to_fixed_point(&mut bindings, &edges); bindings } diff --git a/src/scanner/mod.rs b/src/scanner/mod.rs index 678ed25..dec35d6 100644 --- a/src/scanner/mod.rs +++ b/src/scanner/mod.rs @@ -133,8 +133,9 @@ pub fn scan( // enforcement points; they don't participate in the externalization // metric at all. if imports::is_policy_implementation_path(rel_path) { - tracing::debug!( - "skipping policy implementation file: {}", + tracing::warn!( + "skipping policy implementation file: {} (matched policy-indicator directory; \ + exclude via config if this is consumer code)", rel_path.display(), ); continue; @@ -232,6 +233,15 @@ pub fn scan( /// number of `.go` files in real repos is bounded) and the simpler control /// flow is worth it; if profiling later flags this, the obvious next step /// is to pre-parse once and pass the trees through. +/// +/// Same-directory ≠ same Go package: `_test.go` files can declare +/// `package foo_test` alongside `package foo`, and build constraints can +/// gate files to different OS/arch combos. We deliberately union bindings +/// across all of them. The union is monotonic for consumer detection +/// (extra bindings only reduce false embedded findings; they can't turn a +/// real finding into a false enforcement point), so this is safe today. +/// Revisit if propagation ever gains non-monotonic logic (scoring, +/// subtraction, etc.). fn build_go_package_bindings( root: &Path, files: &[discovery::DiscoveredFile], From eaedb66ee71cb712a1ebf6733ce9a78f8af0fb5a Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Wed, 13 May 2026 22:10:05 -0400 Subject: [PATCH 4/4] refactor(scanner): skip policy-impl files before read/parse Move is_policy_implementation_path() ahead of the read + tree-sitter parse so we don't pay I/O or parse cost for files we immediately drop. The check only needs rel_path (derived from file.path), so it's safe to run first. Behavior unchanged. --- src/scanner/mod.rs | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/src/scanner/mod.rs b/src/scanner/mod.rs index dec35d6..43b7a04 100644 --- a/src/scanner/mod.rs +++ b/src/scanner/mod.rs @@ -98,6 +98,25 @@ pub fn scan( std::collections::HashSet::new(); for file in &files { + let rel_path = file.path.strip_prefix(root).unwrap_or(&file.path); + + // Files that live *inside* a policy-engine implementation directory + // (`internal/authz/**`, `pkg/policy/**`, etc.) are themselves the + // policy engine, not consumers of one. Structural rules flag the + // embedded-authz shape, so running them here is guaranteed noise. + // Skip the file entirely — these don't count as findings or as + // enforcement points; they don't participate in the externalization + // metric at all. Done before read/parse so we don't pay I/O or + // tree-sitter cost for files we'll immediately drop. + if imports::is_policy_implementation_path(rel_path) { + tracing::warn!( + "skipping policy implementation file: {} (matched policy-indicator directory; \ + exclude via config if this is consumer code)", + rel_path.display(), + ); + continue; + } + let source = match std::fs::read_to_string(&file.path) { Ok(s) => s, Err(e) => { @@ -123,24 +142,6 @@ pub fn scan( tracing::debug!("parse errors in {}, scanning anyway", file.path.display()); } - let rel_path = file.path.strip_prefix(root).unwrap_or(&file.path); - - // Files that live *inside* a policy-engine implementation directory - // (`internal/authz/**`, `pkg/policy/**`, etc.) are themselves the - // policy engine, not consumers of one. Structural rules flag the - // embedded-authz shape, so running them here is guaranteed noise. - // Skip the file entirely — these don't count as findings or as - // enforcement points; they don't participate in the externalization - // metric at all. - if imports::is_policy_implementation_path(rel_path) { - tracing::warn!( - "skipping policy implementation file: {} (matched policy-indicator directory; \ - exclude via config if this is consumer code)", - rel_path.display(), - ); - continue; - } - // Check for policy-engine imports. For Go we use the package-level // binding set computed in the pre-pass (a superset of this file's // local bindings, including any propagated from sibling files). For