From a12113f32a0cef688eb4bcbc8c9c0abea6912799 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Wed, 6 May 2026 13:57:45 -0400 Subject: [PATCH 1/7] docs: add issue-checking step to /pr command The /pr command was missing a step to check for related open issues, so PRs created via the command were not getting Fixes/Closes references and issues had to be closed manually. Port the missing step from spicy/ea. --- .agents/commands/pr.md | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/.agents/commands/pr.md b/.agents/commands/pr.md index 488d1e0..aa97720 100644 --- a/.agents/commands/pr.md +++ b/.agents/commands/pr.md @@ -11,22 +11,36 @@ When activated, create a pull request for the current branch: - Ensure we're not on `main` (abort if so) - Run `git log main..HEAD --oneline` to see commits to include -2. **Push the branch** (if not already pushed): +2. **Move completed plan to done** (if applicable): + - Check `plans/todo/` for a plan file related to this branch (match by issue number or feature name) + - If found, check whether all checklist items (`- [ ]`) are complete (`- [x]`) + - If all complete: `mkdir -p plans/done && git mv plans/todo/ plans/done/` + - Commit: `git commit -m "docs: move completed plan to plans/done/"` + - If not all complete, leave it in `plans/todo/` + +3. **Push the branch** (if not already pushed): - Run `git push -u origin ` -3. **Generate PR title and body**: +4. **Check for related issues**: + - Look at the branch name for issue numbers (e.g., `fix/896-buffer-import` references #896) + - Check commit messages for issue references + - Run `gh issue list --state open --limit 20` to see recent open issues that might be related + - If the PR resolves an issue, note it for the body + +5. **Generate PR title and body**: - Title: Use conventional commit format based on the primary change (e.g., `feat: add tree-sitter Java pattern matching`) - Keep the title under 72 characters - Body should include: - **Summary**: Brief description of what this PR does - **Changes**: Bullet list of key changes - **Testing**: How to test the changes (e.g., `cargo test`, manual CLI usage) + - **Issue references**: Add `Fixes #XXX` or `Closes #XXX` for any issues this PR resolves (these will auto-close the issues when merged) -4. **Create the PR**: +6. **Create the PR**: ```bash gh pr create --title "" --body "<body>" --base main --assignee @me ``` -5. **Report the PR URL** to the user +7. **Report the PR URL** to the user If the user provides arguments (e.g., `/pr "Custom title"`), use that as the PR title instead of generating one. From 9346d44d5e36a211776abf2764db0affd718f897 Mon Sep 17 00:00:00 2001 From: Brad Anderson <brad@enforceauth.com> Date: Wed, 6 May 2026 14:07:32 -0400 Subject: [PATCH 2/7] refactor: collapse Finding stub fields into policy_outputs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase B (#71) part 1. Replace the parallel `Finding::rego_stub` / `Finding::cedar_stub` fields with a single `policy_outputs: Vec<PolicyOutput>` keyed by `PolicyEngine`, so adding an Nth engine no longer means adding an Nth optional field. Backward compatibility is preserved on the read side via a `FindingShim` (`#[serde(from = ...)]`): legacy findings JSON with `rego_stub` / `cedar_stub` continues to deserialize and the legacy fields fold into `policy_outputs`. The serialize side emits only `policy_outputs` — downstream consumers reading the legacy keys will need to update. `PolicyEngine` moves from `cli` to `types` (it's a domain type, not a CLI concern) and gains `Serialize` / `Deserialize`. `cli` re-exports it so existing imports keep working. --- src/cedar/grouping.rs | 9 +- src/cli.rs | 15 +- src/commands/extract.rs | 28 ++-- src/deep/candidate.rs | 6 +- src/deep/context.rs | 3 +- src/deep/finding.rs | 7 +- src/deep/merge.rs | 3 +- src/deep/prompt.rs | 3 +- src/mcp/tools.rs | 3 +- src/output/json.rs | 3 +- src/output/text.rs | 3 +- src/rego/grouping.rs | 15 +- src/scanner/matcher.rs | 58 ++++---- src/types.rs | 213 ++++++++++++++++++++++++++- tests/deep_http_integration.rs | 3 +- tests/deep_subprocess_integration.rs | 3 +- 16 files changed, 275 insertions(+), 100 deletions(-) diff --git a/src/cedar/grouping.rs b/src/cedar/grouping.rs index d7ab140..5f20352 100644 --- a/src/cedar/grouping.rs +++ b/src/cedar/grouping.rs @@ -6,7 +6,7 @@ use std::collections::BTreeMap; use std::path::{Path, PathBuf}; -use crate::types::Finding; +use crate::types::{Finding, PolicyEngine}; use super::templates; @@ -130,8 +130,8 @@ fn build_cedar_content(source_file: &Path, findings: &[&Finding]) -> String { }; lines.push(format!("// Original: {}", truncated.trim())); - let stub = match &finding.cedar_stub { - Some(s) => s.clone(), + let stub = match finding.policy_output(PolicyEngine::Cedar) { + Some(s) => s.to_string(), None => templates::generate_default_stub(finding.category, &finding.code_snippet), }; let wrapped = templates::apply_confidence_wrapping(&stub, finding.confidence); @@ -161,8 +161,7 @@ mod tests { confidence: Confidence::High, description: "test".into(), pattern_rule: Some("test-rule".into()), - rego_stub: None, - cedar_stub: None, + policy_outputs: vec![], pass: ScanPass::Structural, surface: Surface::Backend, } diff --git a/src/cli.rs b/src/cli.rs index ec2784d..21b00e5 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -165,20 +165,7 @@ pub struct ExtractArgs { pub min_confidence: Option<Confidence>, } -#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)] -pub enum PolicyEngine { - Rego, - Cedar, -} - -impl std::fmt::Display for PolicyEngine { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - PolicyEngine::Rego => write!(f, "rego"), - PolicyEngine::Cedar => write!(f, "cedar"), - } - } -} +pub use crate::types::PolicyEngine; // -- Report -- diff --git a/src/commands/extract.rs b/src/commands/extract.rs index 79027c2..a140e75 100644 --- a/src/commands/extract.rs +++ b/src/commands/extract.rs @@ -3,10 +3,10 @@ use std::path::Path; use serde::Deserialize; -use crate::cli::{ExtractArgs, PolicyEngine}; +use crate::cli::ExtractArgs; use crate::config::ZiftConfig; use crate::error::{Result, ZiftError}; -use crate::types::Finding; +use crate::types::{Finding, PolicyEngine}; /// Minimal struct for deserializing scan output — we only need the findings. #[derive(Deserialize)] @@ -56,11 +56,9 @@ fn extract_rego(findings: &mut [Finding], policy_prefix: &str, output_dir: &Path use crate::rego::{self, templates}; for finding in findings.iter_mut() { - if finding.rego_stub.is_none() { - finding.rego_stub = Some(templates::generate_default_stub( - finding.category, - &finding.code_snippet, - )); + if finding.policy_output(PolicyEngine::Rego).is_none() { + let stub = templates::generate_default_stub(finding.category, &finding.code_snippet); + finding.set_policy_output(PolicyEngine::Rego, stub); } } @@ -117,16 +115,14 @@ fn extract_rego(findings: &mut [Finding], policy_prefix: &str, output_dir: &Path fn extract_cedar(findings: &mut [Finding], policy_prefix: &str, output_dir: &Path) -> Result<()> { use crate::cedar::{self, templates}; - // Mirror of the Rego pre-fill: if a finding doesn't carry a - // cedar_stub yet (e.g. it came from an older scan, or its rule has no - // `cedar_template` block), synthesize one from the category default. - // This keeps Cedar coverage at 100% — no rule produces zero output. + // Mirror of the Rego pre-fill: if a finding has no Cedar policy_output + // yet (e.g. it came from an older scan, or its rule has no Cedar + // template), synthesize one from the category default. This keeps Cedar + // coverage at 100% — no rule produces zero output. for finding in findings.iter_mut() { - if finding.cedar_stub.is_none() { - finding.cedar_stub = Some(templates::generate_default_stub( - finding.category, - &finding.code_snippet, - )); + if finding.policy_output(PolicyEngine::Cedar).is_none() { + let stub = templates::generate_default_stub(finding.category, &finding.code_snippet); + finding.set_policy_output(PolicyEngine::Cedar, stub); } } diff --git a/src/deep/candidate.rs b/src/deep/candidate.rs index 173188f..d9508aa 100644 --- a/src/deep/candidate.rs +++ b/src/deep/candidate.rs @@ -395,8 +395,7 @@ mod tests { confidence, description: String::new(), pattern_rule: None, - rego_stub: None, - cedar_stub: None, + policy_outputs: vec![], pass: ScanPass::Structural, surface: Surface::Backend, } @@ -786,8 +785,7 @@ mod tests { confidence: Confidence::Low, description: "x".into(), pattern_rule: None, - rego_stub: None, - cedar_stub: None, + policy_outputs: vec![], pass: ScanPass::Structural, surface: Surface::Backend, }; diff --git a/src/deep/context.rs b/src/deep/context.rs index 4ac0bf8..cc0ee38 100644 --- a/src/deep/context.rs +++ b/src/deep/context.rs @@ -212,8 +212,7 @@ mod tests { confidence: Confidence::Low, description: String::new(), pattern_rule: None, - rego_stub: None, - cedar_stub: None, + policy_outputs: vec![], pass: ScanPass::Structural, surface: Surface::Backend, } diff --git a/src/deep/finding.rs b/src/deep/finding.rs index beedbea..cd114b3 100644 --- a/src/deep/finding.rs +++ b/src/deep/finding.rs @@ -122,8 +122,8 @@ pub fn into_finding( // `semantic-rbac`) so a consumer grouping by `pattern_rule` sees that // this finding is the model's verdict, not the structural rule's. pattern_rule: Some(rule_id), - rego_stub: None, // structural-only; semantic findings have no rego template - cedar_stub: None, + // Structural-only; semantic findings have no policy templates. + policy_outputs: vec![], pass: ScanPass::Semantic, // Surface follows the source file, not the pass — same path // heuristic as structural findings so a deep-pass `web/src/foo.ts` @@ -244,8 +244,7 @@ mod tests { confidence: Confidence::Low, description: "matched custom rule".into(), pattern_rule: pattern_rule.map(String::from), - rego_stub: None, - cedar_stub: None, + policy_outputs: vec![], pass: ScanPass::Structural, surface: Surface::Backend, } diff --git a/src/deep/merge.rs b/src/deep/merge.rs index d572eb7..8c74ff1 100644 --- a/src/deep/merge.rs +++ b/src/deep/merge.rs @@ -77,8 +77,7 @@ mod tests { confidence, description: String::new(), pattern_rule: None, - rego_stub: None, - cedar_stub: None, + policy_outputs: vec![], pass, surface: Surface::Backend, } diff --git a/src/deep/prompt.rs b/src/deep/prompt.rs index 1fa0eab..5eed017 100644 --- a/src/deep/prompt.rs +++ b/src/deep/prompt.rs @@ -334,8 +334,7 @@ mod tests { confidence: Confidence::Low, description: "matched custom rule".into(), pattern_rule: Some("ts-custom-1".into()), - rego_stub: None, - cedar_stub: None, + policy_outputs: vec![], pass: ScanPass::Structural, surface: Surface::Backend, } diff --git a/src/mcp/tools.rs b/src/mcp/tools.rs index eea8ab4..5c4df5f 100644 --- a/src/mcp/tools.rs +++ b/src/mcp/tools.rs @@ -774,8 +774,7 @@ fn analyze_snippet(_ctx: &ServerContext, args: &Value) -> Result<Value, String> confidence: s.confidence, description: s.description.clone(), pattern_rule: s.pattern_rule.clone(), - rego_stub: None, - cedar_stub: None, + policy_outputs: vec![], pass: ScanPass::Structural, surface: Surface::classify(&PathBuf::from(&parsed.file)), }); diff --git a/src/output/json.rs b/src/output/json.rs index 069abeb..af68171 100644 --- a/src/output/json.rs +++ b/src/output/json.rs @@ -126,8 +126,7 @@ mod tests { confidence: Confidence::High, description: "embedded role check".into(), pattern_rule: None, - rego_stub: None, - cedar_stub: None, + policy_outputs: vec![], pass: ScanPass::Structural, surface: Surface::Backend, } diff --git a/src/output/text.rs b/src/output/text.rs index a004df4..914c85a 100644 --- a/src/output/text.rs +++ b/src/output/text.rs @@ -139,8 +139,7 @@ mod tests { confidence, description: "embedded role check".into(), pattern_rule: None, - rego_stub: None, - cedar_stub: None, + policy_outputs: vec![], pass: ScanPass::Structural, surface: Surface::Backend, } diff --git a/src/rego/grouping.rs b/src/rego/grouping.rs index 7625d52..c2fe6a1 100644 --- a/src/rego/grouping.rs +++ b/src/rego/grouping.rs @@ -1,7 +1,7 @@ use std::collections::BTreeMap; use std::path::{Path, PathBuf}; -use crate::types::Finding; +use crate::types::{Finding, PolicyEngine}; use super::templates; @@ -148,8 +148,8 @@ fn build_rego_content(package_name: &str, source_file: &Path, findings: &[&Findi lines.push(format!("# Original: {}", truncated.trim())); // Get the Rego stub - let stub = match &finding.rego_stub { - Some(s) => s.clone(), + let stub = match finding.policy_output(PolicyEngine::Rego) { + Some(s) => s.to_string(), None => templates::generate_default_stub(finding.category, &finding.code_snippet), }; @@ -232,8 +232,7 @@ mod tests { confidence: Confidence::High, description: "test".into(), pattern_rule: Some("test-rule".into()), - rego_stub: None, - cedar_stub: None, + policy_outputs: vec![], pass: ScanPass::Structural, surface: Surface::Backend, }]; @@ -260,8 +259,7 @@ mod tests { confidence: Confidence::High, description: "test".into(), pattern_rule: None, - rego_stub: None, - cedar_stub: None, + policy_outputs: vec![], pass: ScanPass::Structural, surface: Surface::Backend, }, @@ -276,8 +274,7 @@ mod tests { confidence: Confidence::Medium, description: "test".into(), pattern_rule: None, - rego_stub: None, - cedar_stub: None, + policy_outputs: vec![], pass: ScanPass::Structural, surface: Surface::Backend, }, diff --git a/src/scanner/matcher.rs b/src/scanner/matcher.rs index 0f753d7..204e8c1 100644 --- a/src/scanner/matcher.rs +++ b/src/scanner/matcher.rs @@ -8,7 +8,7 @@ use tree_sitter::{Query, QueryCursor, Tree}; use crate::error::{Result, ZiftError}; use crate::rules::{CrossPredicate, PatternRule, Predicate}; -use crate::types::{Confidence, Finding, Language, ScanPass, Surface}; +use crate::types::{Confidence, Finding, Language, PolicyEngine, PolicyOutput, ScanPass, Surface}; pub struct CompiledRule<'a> { pub rule: &'a PatternRule, @@ -144,6 +144,30 @@ pub fn execute_query( &code_snippet, ); + let mut policy_outputs = Vec::new(); + if let Some(tmpl) = compiled.rule.rego_template.as_ref() { + let mut owned: HashMap<String, String> = captures + .iter() + .map(|(k, v)| (k.to_string(), v.clone())) + .collect(); + add_template_derived_values(&mut owned); + policy_outputs.push(PolicyOutput { + engine: PolicyEngine::Rego, + content: crate::rego::render_template(tmpl, &owned), + }); + } + if let Some(tmpl) = compiled.rule.cedar_template.as_ref() { + let mut owned: HashMap<String, String> = captures + .iter() + .map(|(k, v)| (k.to_string(), v.clone())) + .collect(); + add_cedar_template_derived_values(&mut owned); + policy_outputs.push(PolicyOutput { + engine: PolicyEngine::Cedar, + content: crate::cedar::render_template(tmpl, &owned), + }); + } + findings.push(Finding { id, file: file_path.to_path_buf(), @@ -155,22 +179,7 @@ pub fn execute_query( confidence: compiled.rule.confidence, description: compiled.rule.description.clone(), pattern_rule: Some(compiled.rule.id.clone()), - rego_stub: compiled.rule.rego_template.as_ref().map(|tmpl| { - let mut owned: HashMap<String, String> = captures - .iter() - .map(|(k, v)| (k.to_string(), v.clone())) - .collect(); - add_template_derived_values(&mut owned); - crate::rego::render_template(tmpl, &owned) - }), - cedar_stub: compiled.rule.cedar_template.as_ref().map(|tmpl| { - let mut owned: HashMap<String, String> = captures - .iter() - .map(|(k, v)| (k.to_string(), v.clone())) - .collect(); - add_cedar_template_derived_values(&mut owned); - crate::cedar::render_template(tmpl, &owned) - }), + policy_outputs, pass: ScanPass::Structural, surface: Surface::classify(file_path), }); @@ -400,7 +409,7 @@ public IActionResult Delete(int id) => Ok();"#, ); assert_eq!(findings.len(), 1); - let cedar = findings[0].cedar_stub.as_deref().unwrap(); + let cedar = findings[0].policy_output(PolicyEngine::Cedar).unwrap(); assert!( cedar.contains(r#"principal.role in ["Admin", "Manager"]"#), "cedar should split ASP.NET comma-separated roles into a set; got: {cedar}" @@ -416,7 +425,7 @@ public IActionResult Delete(int id) => Ok();"#, ); assert_eq!(findings.len(), 1); - let rego = findings[0].rego_stub.as_deref().unwrap(); + let rego = findings[0].policy_output(PolicyEngine::Rego).unwrap(); assert!( rego.contains(r#"input.user.role in {"Admin", "Manager"}"#), "rego should split ASP.NET comma-separated roles; got: {rego}" @@ -433,7 +442,7 @@ public IActionResult Delete(int id) => Ok();"#, ); assert_eq!(findings.len(), 1); - let rego = findings[0].rego_stub.as_deref().unwrap(); + let rego = findings[0].policy_output(PolicyEngine::Rego).unwrap(); assert!( rego.contains(r#"input.user.claims["scope"] == "users.delete""#), "rego should require the claim value; got: {rego}" @@ -449,7 +458,7 @@ public IActionResult Reports() => Ok();"#, ); assert_eq!(findings.len(), 1); - let rego = findings[0].rego_stub.as_deref().unwrap(); + let rego = findings[0].policy_output(PolicyEngine::Rego).unwrap(); assert!( rego.contains(r#"input.policy == "CanReadReports""#), "rego should include the shorthand policy name; got: {rego}" @@ -492,12 +501,12 @@ public IActionResult Reports() => Ok();"#, assert_eq!(findings.len(), 1); assert_eq!(findings[0].category, crate::types::AuthCategory::Rbac); - let rego = findings[0].rego_stub.as_deref().unwrap(); + let rego = findings[0].policy_output(PolicyEngine::Rego).unwrap(); assert!( rego.contains(r#""manager" in input.user.roles"#), "rego should use a role membership check; got: {rego}" ); - let cedar = findings[0].cedar_stub.as_deref().unwrap(); + let cedar = findings[0].policy_output(PolicyEngine::Cedar).unwrap(); assert!( cedar.contains(r#"principal.roles.contains("manager")"#), "cedar should use a role membership check; got: {cedar}" @@ -1418,8 +1427,7 @@ match = ".*" confidence: Confidence::High, description: "test".into(), pattern_rule: None, - rego_stub: None, - cedar_stub: None, + policy_outputs: vec![], pass: ScanPass::Structural, surface: Surface::Backend, }; diff --git a/src/types.rs b/src/types.rs index 8609e66..4af95d7 100644 --- a/src/types.rs +++ b/src/types.rs @@ -4,7 +4,30 @@ use std::sync::OnceLock; use regex::Regex; use serde::{Deserialize, Serialize}; +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, clap::ValueEnum)] +#[serde(rename_all = "lowercase")] +pub enum PolicyEngine { + Rego, + Cedar, +} + +impl std::fmt::Display for PolicyEngine { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + PolicyEngine::Rego => write!(f, "rego"), + PolicyEngine::Cedar => write!(f, "cedar"), + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct PolicyOutput { + pub engine: PolicyEngine, + pub content: String, +} + #[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(from = "FindingShim")] pub struct Finding { pub id: String, pub file: PathBuf, @@ -16,14 +39,14 @@ pub struct Finding { pub confidence: Confidence, pub description: String, pub pattern_rule: Option<String>, - pub rego_stub: Option<String>, - /// Cedar policy stub. Populated when `--engine cedar` is used during - /// extract, or when a deep-mode response carries one. Kept parallel to - /// `rego_stub` (rather than collapsed into a `policy_outputs` collection) - /// so persisted findings JSON stays backward-compatible — older consumers - /// see an extra optional field they can ignore. + /// Generated policy stubs, one per engine. Replaces the parallel + /// `rego_stub`/`cedar_stub` fields that existed pre-Phase-B (#71). + /// Deserialization is tolerant of legacy findings: a `FindingShim` + /// reads `rego_stub` / `cedar_stub` if present and folds them into + /// `policy_outputs` so persisted findings files keep loading. + /// Serialization writes only `policy_outputs`. #[serde(default)] - pub cedar_stub: Option<String>, + pub policy_outputs: Vec<PolicyOutput>, pub pass: ScanPass, /// Where in a typical app this finding lives — frontend (UI/client) or /// backend (server/API). Inferred from the file path via simple @@ -39,6 +62,97 @@ pub struct Finding { pub surface: Surface, } +impl Finding { + /// Look up the generated policy content for an engine, if any. + pub fn policy_output(&self, engine: PolicyEngine) -> Option<&str> { + self.policy_outputs + .iter() + .find(|p| p.engine == engine) + .map(|p| p.content.as_str()) + } + + /// Insert or replace the policy output for an engine. + pub fn set_policy_output(&mut self, engine: PolicyEngine, content: String) { + if let Some(existing) = self.policy_outputs.iter_mut().find(|p| p.engine == engine) { + existing.content = content; + } else { + self.policy_outputs.push(PolicyOutput { engine, content }); + } + } +} + +/// Deserialization shim for [`Finding`]. Accepts both the current +/// `policy_outputs` array and the legacy parallel `rego_stub` / `cedar_stub` +/// fields so existing persisted findings JSON keeps loading after the Phase B +/// refactor (#71). On the read side legacy fields are folded into +/// `policy_outputs`; the serialize side never emits them. +#[derive(Deserialize)] +struct FindingShim { + id: String, + file: PathBuf, + line_start: usize, + line_end: usize, + code_snippet: String, + language: Language, + category: AuthCategory, + confidence: Confidence, + description: String, + pattern_rule: Option<String>, + #[serde(default)] + rego_stub: Option<String>, + #[serde(default)] + cedar_stub: Option<String>, + #[serde(default)] + policy_outputs: Vec<PolicyOutput>, + pass: ScanPass, + #[serde(default)] + surface: Surface, +} + +impl From<FindingShim> for Finding { + fn from(s: FindingShim) -> Self { + let mut policy_outputs = s.policy_outputs; + // Fold legacy fields into policy_outputs only when the new field + // doesn't already carry an entry for that engine — explicit + // `policy_outputs` wins on conflict. + if let Some(content) = s.rego_stub + && !policy_outputs + .iter() + .any(|p| p.engine == PolicyEngine::Rego) + { + policy_outputs.push(PolicyOutput { + engine: PolicyEngine::Rego, + content, + }); + } + if let Some(content) = s.cedar_stub + && !policy_outputs + .iter() + .any(|p| p.engine == PolicyEngine::Cedar) + { + policy_outputs.push(PolicyOutput { + engine: PolicyEngine::Cedar, + content, + }); + } + Finding { + id: s.id, + file: s.file, + line_start: s.line_start, + line_end: s.line_end, + code_snippet: s.code_snippet, + language: s.language, + category: s.category, + confidence: s.confidence, + description: s.description, + pattern_rule: s.pattern_rule, + policy_outputs, + pass: s.pass, + surface: s.surface, + } + } +} + #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default, clap::ValueEnum)] #[serde(rename_all = "lowercase")] pub enum Surface { @@ -389,4 +503,89 @@ mod tests { let f: Finding = serde_json::from_str(no_surface).unwrap(); assert_eq!(f.surface, Surface::Backend); } + + #[test] + fn legacy_stub_fields_fold_into_policy_outputs() { + // Pre-#71 findings JSON used parallel `rego_stub` and `cedar_stub` + // fields. The Phase B shim must accept both shapes and surface them + // through `policy_outputs` so consumers loading old findings files + // see the generated policies in the new place. + let legacy = r#"{ + "id": "x", + "file": "a.ts", + "line_start": 1, + "line_end": 1, + "code_snippet": "", + "language": "typescript", + "category": "rbac", + "confidence": "low", + "description": "", + "pattern_rule": null, + "rego_stub": "package x\nallow := true", + "cedar_stub": "permit(principal, action, resource);", + "pass": "structural" + }"#; + let f: Finding = serde_json::from_str(legacy).unwrap(); + assert_eq!( + f.policy_output(PolicyEngine::Rego), + Some("package x\nallow := true") + ); + assert_eq!( + f.policy_output(PolicyEngine::Cedar), + Some("permit(principal, action, resource);") + ); + } + + #[test] + fn explicit_policy_outputs_wins_over_legacy_fields() { + // If a producer wrote both shapes (e.g. mid-migration), the explicit + // `policy_outputs` entry takes precedence — matches the From impl + // contract that the new field is authoritative. + let mixed = r#"{ + "id": "x", + "file": "a.ts", + "line_start": 1, + "line_end": 1, + "code_snippet": "", + "language": "typescript", + "category": "rbac", + "confidence": "low", + "description": "", + "pattern_rule": null, + "rego_stub": "legacy", + "policy_outputs": [{"engine": "rego", "content": "current"}], + "pass": "structural" + }"#; + let f: Finding = serde_json::from_str(mixed).unwrap(); + assert_eq!(f.policy_output(PolicyEngine::Rego), Some("current")); + } + + #[test] + fn finding_serializes_only_policy_outputs() { + // After the refactor we never write `rego_stub` / `cedar_stub` on + // serialize — only the `policy_outputs` array. Existing downstream + // consumers reading the legacy keys are documented to need an + // update (see the changelog for #71). + let f: Finding = serde_json::from_str( + r#"{ + "id": "x", + "file": "a.ts", + "line_start": 1, + "line_end": 1, + "code_snippet": "", + "language": "typescript", + "category": "rbac", + "confidence": "low", + "description": "", + "pattern_rule": null, + "rego_stub": "package x", + "pass": "structural" + }"#, + ) + .unwrap(); + let json = serde_json::to_string(&f).unwrap(); + assert!(!json.contains("rego_stub")); + assert!(!json.contains("cedar_stub")); + assert!(json.contains("policy_outputs")); + } } diff --git a/tests/deep_http_integration.rs b/tests/deep_http_integration.rs index 59679d7..3f0354c 100644 --- a/tests/deep_http_integration.rs +++ b/tests/deep_http_integration.rs @@ -477,8 +477,7 @@ fn structural_finding(file: &str, line: usize) -> Finding { confidence: Confidence::Low, description: "matched custom rule".into(), pattern_rule: Some("ts-custom".into()), - rego_stub: None, - cedar_stub: None, + policy_outputs: vec![], pass: ScanPass::Structural, surface: Surface::Backend, } diff --git a/tests/deep_subprocess_integration.rs b/tests/deep_subprocess_integration.rs index 462417f..fa78401 100644 --- a/tests/deep_subprocess_integration.rs +++ b/tests/deep_subprocess_integration.rs @@ -57,8 +57,7 @@ fn structural_finding(file: &str, line: usize) -> Finding { confidence: Confidence::Low, description: "matched custom rule".into(), pattern_rule: Some("ts-custom".into()), - rego_stub: None, - cedar_stub: None, + policy_outputs: vec![], pass: ScanPass::Structural, surface: Surface::Backend, } From 191fd4005a94cbc48f3a53cc92cad8843f5cf437 Mon Sep 17 00:00:00 2001 From: Brad Anderson <brad@enforceauth.com> Date: Wed, 6 May 2026 14:11:59 -0400 Subject: [PATCH 3/7] refactor: collapse rule TOML templates into policy_templates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase B (#71) part 2. Replace `PatternRule::rego_template` / `PatternRule::cedar_template` with a single `policy_templates: Vec<PolicyTemplate>` collection and a `template_for(engine)` helper, keyed off the canonical `PolicyEngine` enum. Rule TOML parsing accepts both shapes: - Legacy: `[rule.rego_template]` / `[rule.cedar_template]` blocks - New: `[[rule.policy_templates]]` array entries with `engine` + `template` fields Legacy blocks fold into `policy_templates` at parse time; explicit `[[rule.policy_templates]]` entries win on conflict, matching the `Finding` shim contract. No rule files change in this commit — that migration lands separately so the parser change can stand on its own. MCP `get_rule` and the rule-resource JSON now expose templates as `policy_templates: [{engine, template}]`; the rule-list summary exposes `template_engines: [...]`. Downstream consumers reading the old keys will need to update. `mcp::tools::PolicyEngineArg` was a duplicate of `types::PolicyEngine`; collapse onto the canonical enum now that it carries serde derives. --- src/commands/rules.rs | 32 +++++----- src/mcp/resources.rs | 7 +- src/mcp/tools.rs | 64 ++++++++++--------- src/rules/mod.rs | 142 ++++++++++++++++++++++++++++++++++++++--- src/scanner/matcher.rs | 4 +- 5 files changed, 192 insertions(+), 57 deletions(-) diff --git a/src/commands/rules.rs b/src/commands/rules.rs index 8d2730d..44f7ff0 100644 --- a/src/commands/rules.rs +++ b/src/commands/rules.rs @@ -50,21 +50,23 @@ pub fn execute(args: RulesArgs, config: ZiftConfig) -> Result<()> { } } } - // Validate Rego template if present - if let Some(ref tmpl) = rule.rego_template { - let result = crate::rego::validator::validate_template(tmpl); - if !result.valid { - let err = result.error.unwrap_or_default(); - eprintln!("FAIL {} rego_template: {err}", rule.id); - errors += 1; - } - } - // Validate Cedar template if present - if let Some(ref tmpl) = rule.cedar_template { - let result = crate::cedar::validator::validate_template(tmpl); - if !result.valid { - let err = result.error.unwrap_or_default(); - eprintln!("FAIL {} cedar_template: {err}", rule.id); + // Validate each generated policy template against its + // engine's parser. Engine-specific dispatch is one place + // because PolicyEngine is a closed enum. + for tmpl in &rule.policy_templates { + let (label, valid, error) = match tmpl.engine { + crate::types::PolicyEngine::Rego => { + let r = crate::rego::validator::validate_template(&tmpl.template); + ("rego_template", r.valid, r.error) + } + crate::types::PolicyEngine::Cedar => { + let r = crate::cedar::validator::validate_template(&tmpl.template); + ("cedar_template", r.valid, r.error) + } + }; + if !valid { + let err = error.unwrap_or_default(); + eprintln!("FAIL {} {label}: {err}", rule.id); errors += 1; } } diff --git a/src/mcp/resources.rs b/src/mcp/resources.rs index 0a423c9..10d6891 100644 --- a/src/mcp/resources.rs +++ b/src/mcp/resources.rs @@ -105,6 +105,11 @@ pub fn read_resource(ctx: &ServerContext, uri: &str) -> Option<ResourceContent> } fn rule_to_json(rule: &PatternRule) -> serde_json::Value { + let policy_templates: Vec<serde_json::Value> = rule + .policy_templates + .iter() + .map(|t| json!({"engine": t.engine, "template": t.template})) + .collect(); json!({ "id": rule.id, "languages": rule.languages, @@ -112,7 +117,7 @@ fn rule_to_json(rule: &PatternRule) -> serde_json::Value { "confidence": rule.confidence, "description": rule.description, "query": rule.query_source, - "rego_template": rule.rego_template, + "policy_templates": policy_templates, }) } diff --git a/src/mcp/tools.rs b/src/mcp/tools.rs index 5c4df5f..a5c6040 100644 --- a/src/mcp/tools.rs +++ b/src/mcp/tools.rs @@ -26,7 +26,7 @@ use crate::rego::templates::{apply_confidence_wrapping, generate_default_stub, r use crate::rego::validator::validate_rego; use crate::rules::PatternRule; use crate::scanner; -use crate::types::{AuthCategory, Confidence, Finding, Language, ScanPass, Surface}; +use crate::types::{AuthCategory, Confidence, Finding, Language, PolicyEngine, ScanPass, Surface}; const DEFAULT_MAX_PROMPT_CHARS: usize = 16_000; @@ -272,14 +272,15 @@ fn list_rules(ctx: &ServerContext, args: &Value) -> Result<Value, String> { } fn rule_summary(r: &PatternRule) -> Value { + let template_engines: Vec<&PolicyEngine> = + r.policy_templates.iter().map(|t| &t.engine).collect(); json!({ "id": r.id, "languages": r.languages, "category": r.category, "confidence": r.confidence, "description": r.description, - "has_rego_template": r.rego_template.is_some(), - "has_cedar_template": r.cedar_template.is_some(), + "template_engines": template_engines, }) } @@ -352,6 +353,12 @@ fn get_rule(ctx: &ServerContext, args: &Value) -> Result<Value, String> { }) .collect(); + let policy_templates: Vec<Value> = rule + .policy_templates + .iter() + .map(|t| json!({"engine": t.engine, "template": t.template})) + .collect(); + Ok(json!({ "id": rule.id, "languages": rule.languages, @@ -362,8 +369,7 @@ fn get_rule(ctx: &ServerContext, args: &Value) -> Result<Value, String> { "query": rule.query_source, "predicates": predicates, "cross_predicates": cross_predicates, - "rego_template": rule.rego_template, - "cedar_template": rule.cedar_template, + "policy_templates": policy_templates, "tests": tests, })) } @@ -374,7 +380,7 @@ fn suggest_rego_descriptor() -> ToolDescriptor { ToolDescriptor { name: "suggest_rego", description: "Suggest a Rego policy stub for a finding. If a rule_id with \ - a rego_template is supplied, the template is rendered against \ + a Rego template is supplied, the template is rendered against \ string literals extracted from the snippet. Otherwise a \ category-default stub is generated. The stub is wrapped in \ confidence-appropriate guidance (commented for low, TODO \ @@ -392,7 +398,7 @@ fn suggest_rego_descriptor() -> ToolDescriptor { "rule_id": { "type": "string", "description": "Optional. If supplied and the rule has a \ - rego_template, that template wins over the default." + Rego template, that template wins over the default." } }, "required": ["category", "confidence", "code_snippet"], @@ -415,7 +421,7 @@ fn suggest_rego(ctx: &ServerContext, args: &Value) -> Result<Value, String> { let rendered = if let Some(rid) = parsed.rule_id.as_deref() { let rule = ctx.rules.iter().find(|r| r.id == rid); - match rule.and_then(|r| r.rego_template.as_deref()) { + match rule.and_then(|r| r.template_for(PolicyEngine::Rego)) { Some(tmpl) => { // Render the rule's template with literals extracted from the // snippet. We use an empty vars map keyed by capture name — @@ -512,13 +518,6 @@ fn validate_rego_tool(args: &Value) -> Result<Value, String> { // learning per-engine tool names. The original Rego tools stay live as // pinned aliases so existing prompts and integrations keep working. -#[derive(Debug, Clone, Copy, Deserialize)] -#[serde(rename_all = "lowercase")] -enum PolicyEngineArg { - Rego, - Cedar, -} - fn suggest_policy_descriptor() -> ToolDescriptor { ToolDescriptor { name: "suggest_policy", @@ -559,7 +558,7 @@ fn suggest_policy_descriptor() -> ToolDescriptor { #[derive(Debug, Deserialize)] struct SuggestPolicyArgs { #[serde(default)] - engine: Option<PolicyEngineArg>, + engine: Option<PolicyEngine>, category: AuthCategory, confidence: Confidence, code_snippet: String, @@ -569,7 +568,7 @@ struct SuggestPolicyArgs { fn suggest_policy(ctx: &ServerContext, args: &Value) -> Result<Value, String> { let parsed: SuggestPolicyArgs = parse_args(args, "suggest_policy")?; - let engine = parsed.engine.unwrap_or(PolicyEngineArg::Rego); + let engine = parsed.engine.unwrap_or(PolicyEngine::Rego); let rule = parsed .rule_id @@ -577,14 +576,14 @@ fn suggest_policy(ctx: &ServerContext, args: &Value) -> Result<Value, String> { .and_then(|rid| ctx.rules.iter().find(|r| r.id == rid)); let rendered = match engine { - PolicyEngineArg::Rego => match rule.and_then(|r| r.rego_template.as_deref()) { + PolicyEngine::Rego => match rule.and_then(|r| r.template_for(PolicyEngine::Rego)) { Some(tmpl) => { let vars = build_template_vars(&parsed.code_snippet); render_template(tmpl, &vars) } None => generate_default_stub(parsed.category, &parsed.code_snippet), }, - PolicyEngineArg::Cedar => match rule.and_then(|r| r.cedar_template.as_deref()) { + PolicyEngine::Cedar => match rule.and_then(|r| r.template_for(PolicyEngine::Cedar)) { Some(tmpl) => { let vars = build_template_vars(&parsed.code_snippet); cedar::render_template(tmpl, &vars) @@ -594,15 +593,15 @@ fn suggest_policy(ctx: &ServerContext, args: &Value) -> Result<Value, String> { }; let wrapped = match engine { - PolicyEngineArg::Rego => apply_confidence_wrapping(&rendered, parsed.confidence), - PolicyEngineArg::Cedar => { + PolicyEngine::Rego => apply_confidence_wrapping(&rendered, parsed.confidence), + PolicyEngine::Cedar => { cedar::templates::apply_confidence_wrapping(&rendered, parsed.confidence) } }; let key = match engine { - PolicyEngineArg::Rego => "rego", - PolicyEngineArg::Cedar => "cedar", + PolicyEngine::Rego => "rego", + PolicyEngine::Cedar => "cedar", }; Ok(json!({ "engine": key, @@ -638,26 +637,26 @@ fn validate_policy_descriptor() -> ToolDescriptor { #[derive(Debug, Deserialize)] struct ValidatePolicyArgs { #[serde(default)] - engine: Option<PolicyEngineArg>, + engine: Option<PolicyEngine>, policy: String, } fn validate_policy_tool(args: &Value) -> Result<Value, String> { let parsed: ValidatePolicyArgs = parse_args(args, "validate_policy")?; - let engine = parsed.engine.unwrap_or(PolicyEngineArg::Rego); + let engine = parsed.engine.unwrap_or(PolicyEngine::Rego); let (valid, error) = match engine { - PolicyEngineArg::Rego => { + PolicyEngine::Rego => { let r = validate_rego(&parsed.policy); (r.valid, r.error) } - PolicyEngineArg::Cedar => { + PolicyEngine::Cedar => { let r = cedar::validator::validate_cedar(&parsed.policy); (r.valid, r.error) } }; let key = match engine { - PolicyEngineArg::Rego => "rego", - PolicyEngineArg::Cedar => "cedar", + PolicyEngine::Rego => "rego", + PolicyEngine::Cedar => "cedar", }; Ok(json!({ "engine": key, @@ -950,7 +949,12 @@ function check(user: { role: string }) { }; assert_eq!(payload["id"], "ts-role-check-conditional"); assert!(payload["query"].as_str().unwrap().contains("if_statement")); - assert!(payload["rego_template"].is_string()); + // post-#71: templates surface via the `policy_templates` array. + let templates = payload["policy_templates"].as_array().unwrap(); + assert!( + templates.iter().any(|t| t["engine"] == "rego"), + "expected a rego policy template; got: {templates:?}" + ); // cross_predicates is always present (empty array if the rule has none). assert!(payload["cross_predicates"].is_array()); } diff --git a/src/rules/mod.rs b/src/rules/mod.rs index 45965bf..fd7d629 100644 --- a/src/rules/mod.rs +++ b/src/rules/mod.rs @@ -4,7 +4,7 @@ use std::path::Path; use crate::config::ZiftConfig; use crate::error::{Result, ZiftError}; -use crate::types::{AuthCategory, Confidence, Language}; +use crate::types::{AuthCategory, Confidence, Language, PolicyEngine}; use serde::Deserialize; @@ -19,11 +19,31 @@ pub struct PatternRule { pub query_source: String, pub predicates: Vec<(String, Predicate)>, pub cross_predicates: Vec<CrossPredicate>, - pub rego_template: Option<String>, - pub cedar_template: Option<String>, + /// Generated policy templates, one entry per engine. Replaces the + /// parallel `rego_template` / `cedar_template` fields that existed + /// pre-Phase-B (#71). The TOML parser accepts both the new + /// `[[rule.policy_templates]]` array form and the legacy + /// `[rule.rego_template]` / `[rule.cedar_template]` blocks. + pub policy_templates: Vec<PolicyTemplate>, pub tests: Vec<RuleTest>, } +#[derive(Debug, Clone)] +pub struct PolicyTemplate { + pub engine: PolicyEngine, + pub template: String, +} + +impl PatternRule { + /// Look up the rendered template for a policy engine, if any. + pub fn template_for(&self, engine: PolicyEngine) -> Option<&str> { + self.policy_templates + .iter() + .find(|t| t.engine == engine) + .map(|t| t.template.as_str()) + } +} + #[derive(Debug, Clone)] pub enum Predicate { Match(regex::Regex), @@ -108,12 +128,23 @@ struct RuleToml { predicates: std::collections::HashMap<String, PredicateToml>, #[serde(default)] cross_predicates: Vec<CrossPredicateToml>, + /// Legacy single-engine template blocks. Read-tolerant: parsing folds + /// these into `policy_templates` at the [`PatternRule`] level. New rules + /// should use `[[rule.policy_templates]]` instead. rego_template: Option<RegoTemplateToml>, cedar_template: Option<CedarTemplateToml>, #[serde(default)] + policy_templates: Vec<PolicyTemplateToml>, + #[serde(default)] tests: Vec<RuleTestToml>, } +#[derive(Debug, Deserialize)] +struct PolicyTemplateToml { + engine: PolicyEngine, + template: String, +} + #[derive(Debug, Deserialize)] struct PredicateToml { #[serde(rename = "match")] @@ -260,6 +291,38 @@ fn parse_rule(toml_str: &str, source: &str) -> Result<PatternRule> { cross_predicates.push(parsed); } + let mut policy_templates: Vec<PolicyTemplate> = r + .policy_templates + .into_iter() + .map(|t| PolicyTemplate { + engine: t.engine, + template: t.template, + }) + .collect(); + // Fold legacy single-engine template blocks into the new collection. + // Explicit `[[rule.policy_templates]]` entries win on conflict — same + // contract as the Finding shim. + if let Some(t) = r.rego_template + && !policy_templates + .iter() + .any(|p| p.engine == PolicyEngine::Rego) + { + policy_templates.push(PolicyTemplate { + engine: PolicyEngine::Rego, + template: t.template, + }); + } + if let Some(t) = r.cedar_template + && !policy_templates + .iter() + .any(|p| p.engine == PolicyEngine::Cedar) + { + policy_templates.push(PolicyTemplate { + engine: PolicyEngine::Cedar, + template: t.template, + }); + } + Ok(PatternRule { id: r.id, languages: r.languages, @@ -270,8 +333,7 @@ fn parse_rule(toml_str: &str, source: &str) -> Result<PatternRule> { query_source: r.query, predicates, cross_predicates, - rego_template: r.rego_template.map(|t| t.template), - cedar_template: r.cedar_template.map(|t| t.template), + policy_templates, tests: r .tests .into_iter() @@ -469,6 +531,70 @@ match = ".*" ); } + #[test] + fn legacy_template_blocks_fold_into_policy_templates() { + // Pre-#71 rules used `[rule.rego_template]` / `[rule.cedar_template]` + // single-engine blocks. The Phase B parser must accept these and + // surface them through `policy_templates` so external rule trees + // keep loading without forced migration. + let legacy = r#" +[rule] +id = "test-legacy-templates" +languages = ["typescript"] +category = "rbac" +confidence = "high" +description = "uses legacy template blocks" +query = """ +(if_statement) @match +""" + +[rule.rego_template] +template = "package x\nallow := true" + +[rule.cedar_template] +template = "permit(principal, action, resource);" +"#; + let rule = parse_rule(legacy, "test").unwrap(); + assert_eq!( + rule.template_for(PolicyEngine::Rego), + Some("package x\nallow := true") + ); + assert_eq!( + rule.template_for(PolicyEngine::Cedar), + Some("permit(principal, action, resource);") + ); + } + + #[test] + fn policy_templates_array_form_parses() { + let new_form = r#" +[rule] +id = "test-new-templates" +languages = ["typescript"] +category = "rbac" +confidence = "high" +description = "uses new policy_templates array" +query = """ +(if_statement) @match +""" + +[[rule.policy_templates]] +engine = "rego" +template = "package x" + +[[rule.policy_templates]] +engine = "cedar" +template = "permit(principal, action, resource);" +"#; + let rule = parse_rule(new_form, "test").unwrap(); + assert_eq!(rule.policy_templates.len(), 2); + assert_eq!(rule.template_for(PolicyEngine::Rego), Some("package x")); + assert_eq!( + rule.template_for(PolicyEngine::Cedar), + Some("permit(principal, action, resource);") + ); + } + #[test] fn merge_overrides_by_id() { let r1 = PatternRule { @@ -481,8 +607,7 @@ match = ".*" query_source: "".into(), predicates: vec![], cross_predicates: vec![], - rego_template: None, - cedar_template: None, + policy_templates: vec![], tests: vec![], }; let r2 = PatternRule { @@ -495,8 +620,7 @@ match = ".*" query_source: "".into(), predicates: vec![], cross_predicates: vec![], - rego_template: None, - cedar_template: None, + policy_templates: vec![], tests: vec![], }; diff --git a/src/scanner/matcher.rs b/src/scanner/matcher.rs index 204e8c1..f1f8d05 100644 --- a/src/scanner/matcher.rs +++ b/src/scanner/matcher.rs @@ -145,7 +145,7 @@ pub fn execute_query( ); let mut policy_outputs = Vec::new(); - if let Some(tmpl) = compiled.rule.rego_template.as_ref() { + if let Some(tmpl) = compiled.rule.template_for(PolicyEngine::Rego) { let mut owned: HashMap<String, String> = captures .iter() .map(|(k, v)| (k.to_string(), v.clone())) @@ -156,7 +156,7 @@ pub fn execute_query( content: crate::rego::render_template(tmpl, &owned), }); } - if let Some(tmpl) = compiled.rule.cedar_template.as_ref() { + if let Some(tmpl) = compiled.rule.template_for(PolicyEngine::Cedar) { let mut owned: HashMap<String, String> = captures .iter() .map(|(k, v)| (k.to_string(), v.clone())) From b9497c6e4f1ddac49089807eb0d240f849a288ad Mon Sep 17 00:00:00 2001 From: Brad Anderson <brad@enforceauth.com> Date: Wed, 6 May 2026 14:16:31 -0400 Subject: [PATCH 4/7] refactor: extract PolicyGenerator trait MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase B (#71) part 3. Introduce a `PolicyGenerator` trait in `src/policy.rs` carrying the operations every engine needs along the extract and MCP paths: `engine`, `validate`, `render_template`, `default_stub`, `wrap_by_confidence`, and `group_and_generate`. Add `RegoGenerator` and `CedarGenerator` implementations as thin adapters over the existing engine modules, plus a `generator_for(engine)` factory. Collapse parallel pipelines onto the trait: - `extract::execute` becomes generic over `&dyn PolicyGenerator`. The two near-identical `extract_rego` / `extract_cedar` functions collapse into a single `run_extract` that pre-fills missing per-engine outputs, groups, validates, and writes — engine-specific code lives only inside the trait impls. - `mcp::tools::suggest_policy` and `validate_policy` dispatch through the trait instead of carrying their own engine `match` arms. Update CEDAR_SUPPORT.md decision log to record Phase B landing. --- docs/CEDAR_SUPPORT.md | 1 + src/cedar/generator.rs | 53 +++++++++++++++++ src/cedar/mod.rs | 2 + src/commands/extract.rs | 125 +++++++++++++--------------------------- src/commands/rules.rs | 14 +++-- src/lib.rs | 1 + src/mcp/tools.rs | 43 ++++---------- src/policy.rs | 81 ++++++++++++++++++++++++++ src/rego/generator.rs | 54 +++++++++++++++++ src/rego/mod.rs | 2 + 10 files changed, 252 insertions(+), 124 deletions(-) create mode 100644 src/cedar/generator.rs create mode 100644 src/policy.rs create mode 100644 src/rego/generator.rs diff --git a/docs/CEDAR_SUPPORT.md b/docs/CEDAR_SUPPORT.md index bbe7a4a..b233103 100644 --- a/docs/CEDAR_SUPPORT.md +++ b/docs/CEDAR_SUPPORT.md @@ -135,3 +135,4 @@ land on its own merits, not on a wait-for-feedback gate. - **2026-05-03** — memo drafted alongside the user-facing rename from "Rego for OPA" to "Policy as Code (PaC)." No code changes proposed yet; doc captures the investigation while context is fresh. - **2026-05-06** — Phase A landed (`feat: add Cedar as a second policy engine`). Phase B's "wait for users" gate and "JSON-schema break needs a major-version bump" gate dropped: the `*_stub` → `policy_outputs` migration is in Phase B's scope and ships with a deserialization shim, so it doesn't need a coordinated version cut. +- **2026-05-06** — Phase B landed (#71). `Finding::rego_stub` / `cedar_stub` collapsed into `policy_outputs: Vec<PolicyOutput>` (legacy fields fold in via a `FindingShim`). `PatternRule::rego_template` / `cedar_template` collapsed into `policy_templates: Vec<PolicyTemplate>` (legacy `[rule.rego_template]` / `[rule.cedar_template]` blocks parse alongside the new `[[rule.policy_templates]]` array). `PolicyGenerator` trait introduced in `src/policy.rs` with `RegoGenerator` / `CedarGenerator` impls; `extract::execute` is now generic over `&dyn PolicyGenerator`, and the MCP `suggest_policy` / `validate_policy` tools dispatch through the trait. JSON output shape changed: `rego_stub` / `cedar_stub` replaced by `policy_outputs`; MCP `get_rule` / `list_rules` now expose `policy_templates` and `template_engines` instead of `rego_template` / `has_rego_template`. diff --git a/src/cedar/generator.rs b/src/cedar/generator.rs new file mode 100644 index 0000000..70c804e --- /dev/null +++ b/src/cedar/generator.rs @@ -0,0 +1,53 @@ +//! [`PolicyGenerator`] implementation for Cedar. Thin adapter over the +//! engine-specific `templates`, `validator`, and `grouping` modules. + +use std::collections::HashMap; +use std::path::Path; + +use crate::policy::{PolicyFile, PolicyGenerator, ValidationResult}; +use crate::types::{AuthCategory, Confidence, Finding, PolicyEngine}; + +pub struct CedarGenerator; + +impl PolicyGenerator for CedarGenerator { + fn engine(&self) -> PolicyEngine { + PolicyEngine::Cedar + } + + fn validate(&self, policy: &str) -> ValidationResult { + let r = super::validator::validate_cedar(policy); + ValidationResult { + valid: r.valid, + error: r.error, + } + } + + fn render_template(&self, template: &str, vars: &HashMap<String, String>) -> String { + super::templates::render_template(template, vars) + } + + fn default_stub(&self, category: AuthCategory, snippet: &str) -> String { + super::templates::generate_default_stub(category, snippet) + } + + fn wrap_by_confidence(&self, body: &str, confidence: Confidence) -> String { + super::templates::apply_confidence_wrapping(body, confidence) + } + + fn group_and_generate( + &self, + findings: &[Finding], + policy_prefix: &str, + output_dir: &Path, + ) -> Vec<PolicyFile> { + super::grouping::group_findings(findings, policy_prefix, output_dir) + .into_iter() + .map(|f| PolicyFile { + label: f.label, + output_path: f.output_path, + content: f.content, + finding_count: f.finding_count, + }) + .collect() + } +} diff --git a/src/cedar/mod.rs b/src/cedar/mod.rs index 37e46a8..e921c3a 100644 --- a/src/cedar/mod.rs +++ b/src/cedar/mod.rs @@ -1,6 +1,8 @@ +pub mod generator; pub mod grouping; pub mod templates; pub mod validator; +pub use generator::CedarGenerator; pub use grouping::group_findings; pub use templates::render_template; diff --git a/src/commands/extract.rs b/src/commands/extract.rs index a140e75..bbf0155 100644 --- a/src/commands/extract.rs +++ b/src/commands/extract.rs @@ -6,7 +6,8 @@ use serde::Deserialize; use crate::cli::ExtractArgs; use crate::config::ZiftConfig; use crate::error::{Result, ZiftError}; -use crate::types::{Finding, PolicyEngine}; +use crate::policy::{PolicyGenerator, generator_for}; +use crate::types::Finding; /// Minimal struct for deserializing scan output — we only need the findings. #[derive(Deserialize)] @@ -46,88 +47,40 @@ pub fn execute(args: ExtractArgs, config: ZiftConfig) -> Result<()> { return Ok(()); } - match args.engine { - PolicyEngine::Rego => extract_rego(&mut findings, policy_prefix, &output_dir), - PolicyEngine::Cedar => extract_cedar(&mut findings, policy_prefix, &output_dir), - } -} - -fn extract_rego(findings: &mut [Finding], policy_prefix: &str, output_dir: &Path) -> Result<()> { - use crate::rego::{self, templates}; - - for finding in findings.iter_mut() { - if finding.policy_output(PolicyEngine::Rego).is_none() { - let stub = templates::generate_default_stub(finding.category, &finding.code_snippet); - finding.set_policy_output(PolicyEngine::Rego, stub); - } - } - - let rego_files = rego::group_findings(findings, policy_prefix, output_dir); - - // Canonicalize once before writing any files. Each generated file shares - // the same `output_dir`, so canonicalizing per-file inside the loop was - // redundant work and a stray `output_dir` rename mid-loop would break - // anyway. - std::fs::create_dir_all(output_dir)?; - let canonical_output_dir = output_dir.canonicalize().map_err(|e| { - ZiftError::General(format!( - "failed to resolve output dir '{}': {e}", - output_dir.display() - )) - })?; - - let mut total_files = 0; - let mut validation_warnings = 0; - for rego_file in ®o_files { - let validation = rego::validator::validate_rego(®o_file.content); - let status = if validation.valid { "OK" } else { "WARN" }; - - write_policy_file( - ®o_file.output_path, - ®o_file.content, - &canonical_output_dir, - )?; - total_files += 1; - eprintln!( - " [{status}] {} ({} findings) → {}", - rego_file.package_name, - rego_file.finding_count, - rego_file.output_path.display(), - ); - if let Some(err) = validation.error { - eprintln!(" ⚠ Rego parse warning: {err}"); - validation_warnings += 1; - } - } - - eprintln!( - "\nGenerated {total_files} Rego files from {} findings.", - findings.len(), - ); - if validation_warnings > 0 { - eprintln!( - "{validation_warnings} file(s) have Rego syntax warnings — review before deploying.", - ); - } - Ok(()) + let generator = generator_for(args.engine); + run_extract( + generator.as_ref(), + &mut findings, + policy_prefix, + &output_dir, + ) } -fn extract_cedar(findings: &mut [Finding], policy_prefix: &str, output_dir: &Path) -> Result<()> { - use crate::cedar::{self, templates}; - - // Mirror of the Rego pre-fill: if a finding has no Cedar policy_output - // yet (e.g. it came from an older scan, or its rule has no Cedar - // template), synthesize one from the category default. This keeps Cedar - // coverage at 100% — no rule produces zero output. +/// Engine-agnostic extract pipeline. Pre-fills any missing per-engine +/// policy output on each finding (so Cedar/Rego coverage stays at 100% +/// even when a rule has no template), groups findings into output files, +/// validates each file with the engine's parser, and writes them under a +/// canonicalised `output_dir`. +fn run_extract( + generator: &dyn PolicyGenerator, + findings: &mut [Finding], + policy_prefix: &str, + output_dir: &Path, +) -> Result<()> { + let engine = generator.engine(); for finding in findings.iter_mut() { - if finding.policy_output(PolicyEngine::Cedar).is_none() { - let stub = templates::generate_default_stub(finding.category, &finding.code_snippet); - finding.set_policy_output(PolicyEngine::Cedar, stub); + if finding.policy_output(engine).is_none() { + let stub = generator.default_stub(finding.category, &finding.code_snippet); + finding.set_policy_output(engine, stub); } } - let cedar_files = cedar::group_findings(findings, policy_prefix, output_dir); + let policy_files = generator.group_and_generate(findings, policy_prefix, output_dir); + // Canonicalize once before writing any files. Each generated file + // shares the same `output_dir`, so canonicalizing per-file inside + // the loop was redundant work and a stray `output_dir` rename + // mid-loop would break anyway. std::fs::create_dir_all(output_dir)?; let canonical_output_dir = output_dir.canonicalize().map_err(|e| { ZiftError::General(format!( @@ -138,35 +91,35 @@ fn extract_cedar(findings: &mut [Finding], policy_prefix: &str, output_dir: &Pat let mut total_files = 0; let mut validation_warnings = 0; - for cedar_file in &cedar_files { - let validation = cedar::validator::validate_cedar(&cedar_file.content); + for policy_file in &policy_files { + let validation = generator.validate(&policy_file.content); let status = if validation.valid { "OK" } else { "WARN" }; write_policy_file( - &cedar_file.output_path, - &cedar_file.content, + &policy_file.output_path, + &policy_file.content, &canonical_output_dir, )?; total_files += 1; eprintln!( " [{status}] {} ({} findings) → {}", - cedar_file.label, - cedar_file.finding_count, - cedar_file.output_path.display(), + policy_file.label, + policy_file.finding_count, + policy_file.output_path.display(), ); if let Some(err) = validation.error { - eprintln!(" ⚠ Cedar parse warning: {err}"); + eprintln!(" ⚠ {engine} parse warning: {err}"); validation_warnings += 1; } } eprintln!( - "\nGenerated {total_files} Cedar files from {} findings.", + "\nGenerated {total_files} {engine} files from {} findings.", findings.len(), ); if validation_warnings > 0 { eprintln!( - "{validation_warnings} file(s) have Cedar syntax warnings — review before deploying.", + "{validation_warnings} file(s) have {engine} syntax warnings — review before deploying.", ); } Ok(()) diff --git a/src/commands/rules.rs b/src/commands/rules.rs index 44f7ff0..fce3d19 100644 --- a/src/commands/rules.rs +++ b/src/commands/rules.rs @@ -51,22 +51,24 @@ pub fn execute(args: RulesArgs, config: ZiftConfig) -> Result<()> { } } // Validate each generated policy template against its - // engine's parser. Engine-specific dispatch is one place - // because PolicyEngine is a closed enum. + // engine's parser. The template-level validators wrap each + // body in a minimal module before parsing, which is engine + // shape — keep the dispatch local rather than pushing a + // template-flavored variant onto the PolicyGenerator trait. for tmpl in &rule.policy_templates { - let (label, valid, error) = match tmpl.engine { + let (valid, error) = match tmpl.engine { crate::types::PolicyEngine::Rego => { let r = crate::rego::validator::validate_template(&tmpl.template); - ("rego_template", r.valid, r.error) + (r.valid, r.error) } crate::types::PolicyEngine::Cedar => { let r = crate::cedar::validator::validate_template(&tmpl.template); - ("cedar_template", r.valid, r.error) + (r.valid, r.error) } }; if !valid { let err = error.unwrap_or_default(); - eprintln!("FAIL {} {label}: {err}", rule.id); + eprintln!("FAIL {} {} template: {err}", rule.id, tmpl.engine); errors += 1; } } diff --git a/src/lib.rs b/src/lib.rs index 5d5f8d2..8dfcf79 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -19,6 +19,7 @@ pub mod cedar; pub mod cli; pub mod error; +pub mod policy; pub mod rego; pub mod rules; pub mod types; diff --git a/src/mcp/tools.rs b/src/mcp/tools.rs index a5c6040..331f0c9 100644 --- a/src/mcp/tools.rs +++ b/src/mcp/tools.rs @@ -15,13 +15,13 @@ use std::path::{Path, PathBuf}; use serde::Deserialize; use serde_json::{Value, json}; -use crate::cedar; use crate::cli::ScanArgs; use crate::deep::candidate::{Candidate, CandidateKind}; use crate::deep::context::expand_region; use crate::deep::prompt::{PromptInputs, render}; use crate::mcp::protocol::{ToolDescriptor, ToolsCallResult, ToolsListResult}; use crate::mcp::server::ServerContext; +use crate::policy::generator_for; use crate::rego::templates::{apply_confidence_wrapping, generate_default_stub, render_template}; use crate::rego::validator::validate_rego; use crate::rules::PatternRule; @@ -569,36 +569,23 @@ struct SuggestPolicyArgs { fn suggest_policy(ctx: &ServerContext, args: &Value) -> Result<Value, String> { let parsed: SuggestPolicyArgs = parse_args(args, "suggest_policy")?; let engine = parsed.engine.unwrap_or(PolicyEngine::Rego); + let generator = generator_for(engine); let rule = parsed .rule_id .as_deref() .and_then(|rid| ctx.rules.iter().find(|r| r.id == rid)); - let rendered = match engine { - PolicyEngine::Rego => match rule.and_then(|r| r.template_for(PolicyEngine::Rego)) { - Some(tmpl) => { - let vars = build_template_vars(&parsed.code_snippet); - render_template(tmpl, &vars) - } - None => generate_default_stub(parsed.category, &parsed.code_snippet), - }, - PolicyEngine::Cedar => match rule.and_then(|r| r.template_for(PolicyEngine::Cedar)) { - Some(tmpl) => { - let vars = build_template_vars(&parsed.code_snippet); - cedar::render_template(tmpl, &vars) - } - None => cedar::templates::generate_default_stub(parsed.category, &parsed.code_snippet), - }, - }; - - let wrapped = match engine { - PolicyEngine::Rego => apply_confidence_wrapping(&rendered, parsed.confidence), - PolicyEngine::Cedar => { - cedar::templates::apply_confidence_wrapping(&rendered, parsed.confidence) + let rendered = match rule.and_then(|r| r.template_for(engine)) { + Some(tmpl) => { + let vars = build_template_vars(&parsed.code_snippet); + generator.render_template(tmpl, &vars) } + None => generator.default_stub(parsed.category, &parsed.code_snippet), }; + let wrapped = generator.wrap_by_confidence(&rendered, parsed.confidence); + let key = match engine { PolicyEngine::Rego => "rego", PolicyEngine::Cedar => "cedar", @@ -644,16 +631,8 @@ struct ValidatePolicyArgs { fn validate_policy_tool(args: &Value) -> Result<Value, String> { let parsed: ValidatePolicyArgs = parse_args(args, "validate_policy")?; let engine = parsed.engine.unwrap_or(PolicyEngine::Rego); - let (valid, error) = match engine { - PolicyEngine::Rego => { - let r = validate_rego(&parsed.policy); - (r.valid, r.error) - } - PolicyEngine::Cedar => { - let r = cedar::validator::validate_cedar(&parsed.policy); - (r.valid, r.error) - } - }; + let result = generator_for(engine).validate(&parsed.policy); + let (valid, error) = (result.valid, result.error); let key = match engine { PolicyEngine::Rego => "rego", PolicyEngine::Cedar => "cedar", diff --git a/src/policy.rs b/src/policy.rs new file mode 100644 index 0000000..7d14bc8 --- /dev/null +++ b/src/policy.rs @@ -0,0 +1,81 @@ +//! Engine-agnostic policy generation. The [`PolicyGenerator`] trait +//! collapses the parallel Rego/Cedar pipelines that grew out of Phase A +//! (#27) into a single dispatch surface keyed off [`PolicyEngine`]. +//! +//! Generators are stateless and cheap to construct, so callers obtain one +//! per engine via [`generator_for`] and pass it through as `&dyn +//! PolicyGenerator`. The trait carries the operations every engine needs +//! along the extract and MCP paths: validate a finished policy, render a +//! captured template, generate a category-default stub, wrap a stub for +//! its confidence level, and group findings into per-source-file output +//! units. + +use std::collections::HashMap; +use std::path::{Path, PathBuf}; + +use crate::types::{AuthCategory, Confidence, Finding, PolicyEngine}; + +/// One generated output file produced by a [`PolicyGenerator`]. The +/// `label` is the engine's human-readable identifier — Rego packages use +/// dotted names (`app.api.orders`); Cedar files reuse the same dotted +/// shape purely for log-line consistency since Cedar has no package +/// keyword. The body of the file is `content`; `finding_count` is the +/// number of findings folded into it. +pub struct PolicyFile { + pub label: String, + pub output_path: PathBuf, + pub content: String, + pub finding_count: usize, +} + +/// Result of validating a generated policy string. Engines parse the +/// content with their respective parsers (`regorus` for Rego, `cedar-policy` +/// for Cedar) and report the first parse error, if any. +#[derive(Debug)] +pub struct ValidationResult { + pub valid: bool, + pub error: Option<String>, +} + +pub trait PolicyGenerator { + /// Which engine this generator produces output for. + fn engine(&self) -> PolicyEngine; + + /// Validate a complete policy string. The engine-specific parser is + /// the source of truth — callers don't need to know which one ran. + fn validate(&self, policy: &str) -> ValidationResult; + + /// Render a `{{var}}`-templated policy body against captures. The + /// caller is responsible for adding any engine-specific derived + /// variables (e.g. role-set expansions) before calling. + fn render_template(&self, template: &str, vars: &HashMap<String, String>) -> String; + + /// Generate the category's default stub when a finding has no rule + /// template available. + fn default_stub(&self, category: AuthCategory, snippet: &str) -> String; + + /// Wrap a generated stub in confidence-appropriate guidance — fully + /// commented for `Low`, TODO-prefixed for `Medium`, raw for `High`. + /// The exact comment syntax is engine-specific. + fn wrap_by_confidence(&self, body: &str, confidence: Confidence) -> String; + + /// Group findings into per-source-file output units. The exact + /// per-engine layout (Rego packages vs flat Cedar files) lives in + /// the engine modules. + fn group_and_generate( + &self, + findings: &[Finding], + policy_prefix: &str, + output_dir: &Path, + ) -> Vec<PolicyFile>; +} + +/// Obtain the generator for a policy engine. Generators are zero-sized +/// and constructed on demand so dispatch sites can stay generic over +/// `&dyn PolicyGenerator` without thinking about ownership. +pub fn generator_for(engine: PolicyEngine) -> Box<dyn PolicyGenerator> { + match engine { + PolicyEngine::Rego => Box::new(crate::rego::RegoGenerator), + PolicyEngine::Cedar => Box::new(crate::cedar::CedarGenerator), + } +} diff --git a/src/rego/generator.rs b/src/rego/generator.rs new file mode 100644 index 0000000..aa96fe2 --- /dev/null +++ b/src/rego/generator.rs @@ -0,0 +1,54 @@ +//! [`PolicyGenerator`] implementation for Rego/OPA. Thin adapter over the +//! engine-specific `templates`, `validator`, and `grouping` modules so +//! each layer can stay focused on its own concern. + +use std::collections::HashMap; +use std::path::Path; + +use crate::policy::{PolicyFile, PolicyGenerator, ValidationResult}; +use crate::types::{AuthCategory, Confidence, Finding, PolicyEngine}; + +pub struct RegoGenerator; + +impl PolicyGenerator for RegoGenerator { + fn engine(&self) -> PolicyEngine { + PolicyEngine::Rego + } + + fn validate(&self, policy: &str) -> ValidationResult { + let r = super::validator::validate_rego(policy); + ValidationResult { + valid: r.valid, + error: r.error, + } + } + + fn render_template(&self, template: &str, vars: &HashMap<String, String>) -> String { + super::templates::render_template(template, vars) + } + + fn default_stub(&self, category: AuthCategory, snippet: &str) -> String { + super::templates::generate_default_stub(category, snippet) + } + + fn wrap_by_confidence(&self, body: &str, confidence: Confidence) -> String { + super::templates::apply_confidence_wrapping(body, confidence) + } + + fn group_and_generate( + &self, + findings: &[Finding], + policy_prefix: &str, + output_dir: &Path, + ) -> Vec<PolicyFile> { + super::grouping::group_findings(findings, policy_prefix, output_dir) + .into_iter() + .map(|f| PolicyFile { + label: f.package_name, + output_path: f.output_path, + content: f.content, + finding_count: f.finding_count, + }) + .collect() + } +} diff --git a/src/rego/mod.rs b/src/rego/mod.rs index 37e46a8..9fa9734 100644 --- a/src/rego/mod.rs +++ b/src/rego/mod.rs @@ -1,6 +1,8 @@ +pub mod generator; pub mod grouping; pub mod templates; pub mod validator; +pub use generator::RegoGenerator; pub use grouping::group_findings; pub use templates::render_template; From 8bca94021a28b35ec435852c492cc651ca9967b0 Mon Sep 17 00:00:00 2001 From: Brad Anderson <brad@enforceauth.com> Date: Wed, 6 May 2026 19:52:38 -0400 Subject: [PATCH 5/7] refactor: tighten PolicyGenerator dispatch and dedupe engine entries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address review nits on the PolicyGenerator extraction (#71): - Restore capitalized engine name in extract output by adding PolicyEngine::human_name() alongside as_str(); Display now delegates to as_str() so the lowercase canonical form stays the single source. - Drop the dead default-stub fallback in rego/cedar grouping — the extract pipeline already pre-fills policy_output for every finding, so grouping can treat its presence as an invariant. Tests updated to mirror the pre-fill. - Replace match arms that derived "rego"/"cedar" string keys in MCP tools with engine.as_str(). - Dedupe policy_outputs (FindingShim) and policy_templates (parse_rule) by engine on construction so duplicate explicit entries can't sneak past first-match lookups. - generator_for now returns &'static dyn PolicyGenerator backed by module-level statics, dropping the per-call Box allocation. --- src/cedar/grouping.rs | 24 ++++++++++++++++++------ src/commands/extract.rs | 10 +++++----- src/mcp/tools.rs | 16 ++++------------ src/policy.rs | 13 ++++++++----- src/rego/grouping.rs | 35 +++++++++++++++++++++++++---------- src/rules/mod.rs | 22 ++++++++++++++-------- src/types.rs | 39 ++++++++++++++++++++++++++++++++++----- 7 files changed, 108 insertions(+), 51 deletions(-) diff --git a/src/cedar/grouping.rs b/src/cedar/grouping.rs index 5f20352..79ac2be 100644 --- a/src/cedar/grouping.rs +++ b/src/cedar/grouping.rs @@ -130,11 +130,15 @@ fn build_cedar_content(source_file: &Path, findings: &[&Finding]) -> String { }; lines.push(format!("// Original: {}", truncated.trim())); - let stub = match finding.policy_output(PolicyEngine::Cedar) { - Some(s) => s.to_string(), - None => templates::generate_default_stub(finding.category, &finding.code_snippet), + // The extract pipeline pre-fills `policy_output(Cedar)` for every + // finding before calling into grouping (see + // `commands::extract::run_extract`), so we treat its presence as + // an invariant and skip findings without one rather than silently + // synthesizing a default — that would mask a missing pre-fill. + let Some(stub) = finding.policy_output(PolicyEngine::Cedar) else { + continue; }; - let wrapped = templates::apply_confidence_wrapping(&stub, finding.confidence); + let wrapped = templates::apply_confidence_wrapping(stub, finding.confidence); for line in wrapped.lines() { lines.push(line.to_string()); } @@ -149,8 +153,11 @@ mod tests { use super::*; use crate::types::*; + /// Mirrors the pre-fill `commands::extract::run_extract` performs + /// before invoking `group_findings`: every finding lands here with a + /// rendered Cedar stub already attached. fn finding(file: &str, snippet: &str, category: AuthCategory) -> Finding { - Finding { + let mut f = Finding { id: "x".into(), file: PathBuf::from(file), line_start: 10, @@ -164,7 +171,12 @@ mod tests { policy_outputs: vec![], pass: ScanPass::Structural, surface: Surface::Backend, - } + }; + f.set_policy_output( + PolicyEngine::Cedar, + templates::generate_default_stub(f.category, &f.code_snippet), + ); + f } #[test] diff --git a/src/commands/extract.rs b/src/commands/extract.rs index bbf0155..f0f7134 100644 --- a/src/commands/extract.rs +++ b/src/commands/extract.rs @@ -47,9 +47,8 @@ pub fn execute(args: ExtractArgs, config: ZiftConfig) -> Result<()> { return Ok(()); } - let generator = generator_for(args.engine); run_extract( - generator.as_ref(), + generator_for(args.engine), &mut findings, policy_prefix, &output_dir, @@ -68,6 +67,7 @@ fn run_extract( output_dir: &Path, ) -> Result<()> { let engine = generator.engine(); + let engine_name = engine.human_name(); for finding in findings.iter_mut() { if finding.policy_output(engine).is_none() { let stub = generator.default_stub(finding.category, &finding.code_snippet); @@ -108,18 +108,18 @@ fn run_extract( policy_file.output_path.display(), ); if let Some(err) = validation.error { - eprintln!(" ⚠ {engine} parse warning: {err}"); + eprintln!(" ⚠ {engine_name} parse warning: {err}"); validation_warnings += 1; } } eprintln!( - "\nGenerated {total_files} {engine} files from {} findings.", + "\nGenerated {total_files} {engine_name} files from {} findings.", findings.len(), ); if validation_warnings > 0 { eprintln!( - "{validation_warnings} file(s) have {engine} syntax warnings — review before deploying.", + "{validation_warnings} file(s) have {engine_name} syntax warnings — review before deploying.", ); } Ok(()) diff --git a/src/mcp/tools.rs b/src/mcp/tools.rs index 331f0c9..8c75c33 100644 --- a/src/mcp/tools.rs +++ b/src/mcp/tools.rs @@ -586,10 +586,7 @@ fn suggest_policy(ctx: &ServerContext, args: &Value) -> Result<Value, String> { let wrapped = generator.wrap_by_confidence(&rendered, parsed.confidence); - let key = match engine { - PolicyEngine::Rego => "rego", - PolicyEngine::Cedar => "cedar", - }; + let key = engine.as_str(); Ok(json!({ "engine": key, "policy": wrapped, @@ -632,15 +629,10 @@ fn validate_policy_tool(args: &Value) -> Result<Value, String> { let parsed: ValidatePolicyArgs = parse_args(args, "validate_policy")?; let engine = parsed.engine.unwrap_or(PolicyEngine::Rego); let result = generator_for(engine).validate(&parsed.policy); - let (valid, error) = (result.valid, result.error); - let key = match engine { - PolicyEngine::Rego => "rego", - PolicyEngine::Cedar => "cedar", - }; Ok(json!({ - "engine": key, - "valid": valid, - "error": error, + "engine": engine.as_str(), + "valid": result.valid, + "error": result.error, })) } diff --git a/src/policy.rs b/src/policy.rs index 7d14bc8..f6b3618 100644 --- a/src/policy.rs +++ b/src/policy.rs @@ -71,11 +71,14 @@ pub trait PolicyGenerator { } /// Obtain the generator for a policy engine. Generators are zero-sized -/// and constructed on demand so dispatch sites can stay generic over -/// `&dyn PolicyGenerator` without thinking about ownership. -pub fn generator_for(engine: PolicyEngine) -> Box<dyn PolicyGenerator> { +/// unit structs, so we hand out `&'static` references to module-level +/// statics and avoid per-call allocation. Dispatch sites stay generic +/// over `&dyn PolicyGenerator`. +pub fn generator_for(engine: PolicyEngine) -> &'static dyn PolicyGenerator { + static REGO: crate::rego::RegoGenerator = crate::rego::RegoGenerator; + static CEDAR: crate::cedar::CedarGenerator = crate::cedar::CedarGenerator; match engine { - PolicyEngine::Rego => Box::new(crate::rego::RegoGenerator), - PolicyEngine::Cedar => Box::new(crate::cedar::CedarGenerator), + PolicyEngine::Rego => ®O, + PolicyEngine::Cedar => &CEDAR, } } diff --git a/src/rego/grouping.rs b/src/rego/grouping.rs index c2fe6a1..1c16140 100644 --- a/src/rego/grouping.rs +++ b/src/rego/grouping.rs @@ -147,11 +147,15 @@ fn build_rego_content(package_name: &str, source_file: &Path, findings: &[&Findi }; lines.push(format!("# Original: {}", truncated.trim())); - // Get the Rego stub - let stub = match finding.policy_output(PolicyEngine::Rego) { - Some(s) => s.to_string(), - None => templates::generate_default_stub(finding.category, &finding.code_snippet), + // The extract pipeline pre-fills `policy_output(Rego)` for every + // finding before calling into grouping (see + // `commands::extract::run_extract`), so we treat its presence as + // an invariant and skip findings without one rather than silently + // synthesizing a default — that would mask a missing pre-fill. + let Some(stub) = finding.policy_output(PolicyEngine::Rego) else { + continue; }; + let stub = stub.to_string(); // Apply confidence wrapping let wrapped = templates::apply_confidence_wrapping(&stub, finding.confidence); @@ -219,9 +223,20 @@ mod tests { assert_eq!(path, PathBuf::from("./policies/api/orders.rego")); } + /// Mirrors the pre-fill `commands::extract::run_extract` performs + /// before invoking `group_findings`: every finding lands here with a + /// rendered Rego stub already attached. + fn with_default_stub(mut f: Finding) -> Finding { + f.set_policy_output( + PolicyEngine::Rego, + templates::generate_default_stub(f.category, &f.code_snippet), + ); + f + } + #[test] fn group_findings_single_file() { - let findings = vec![Finding { + let findings = vec![with_default_stub(Finding { id: "abc".into(), file: PathBuf::from("src/api/orders.ts"), line_start: 10, @@ -235,7 +250,7 @@ mod tests { policy_outputs: vec![], pass: ScanPass::Structural, surface: Surface::Backend, - }]; + })]; let files = group_findings(&findings, "app", Path::new("./policies")); assert_eq!(files.len(), 1); @@ -248,7 +263,7 @@ mod tests { #[test] fn group_findings_multiple_files() { let findings = vec![ - Finding { + with_default_stub(Finding { id: "a".into(), file: PathBuf::from("src/api/orders.ts"), line_start: 10, @@ -262,8 +277,8 @@ mod tests { policy_outputs: vec![], pass: ScanPass::Structural, surface: Surface::Backend, - }, - Finding { + }), + with_default_stub(Finding { id: "b".into(), file: PathBuf::from("src/api/users.ts"), line_start: 5, @@ -277,7 +292,7 @@ mod tests { policy_outputs: vec![], pass: ScanPass::Structural, surface: Surface::Backend, - }, + }), ]; let files = group_findings(&findings, "app", Path::new("./policies")); diff --git a/src/rules/mod.rs b/src/rules/mod.rs index fd7d629..15f180a 100644 --- a/src/rules/mod.rs +++ b/src/rules/mod.rs @@ -291,14 +291,20 @@ fn parse_rule(toml_str: &str, source: &str) -> Result<PatternRule> { cross_predicates.push(parsed); } - let mut policy_templates: Vec<PolicyTemplate> = r - .policy_templates - .into_iter() - .map(|t| PolicyTemplate { - engine: t.engine, - template: t.template, - }) - .collect(); + // Dedupe by engine, keeping the first occurrence. `template_for` + // returns the first match, so hand-written TOML with two + // `[[rule.policy_templates]]` blocks for the same engine would + // silently use one of them — drop the rest at parse time so the + // in-memory shape can't surprise later code. + let mut policy_templates: Vec<PolicyTemplate> = Vec::with_capacity(r.policy_templates.len()); + for t in r.policy_templates { + if !policy_templates.iter().any(|p| p.engine == t.engine) { + policy_templates.push(PolicyTemplate { + engine: t.engine, + template: t.template, + }); + } + } // Fold legacy single-engine template blocks into the new collection. // Explicit `[[rule.policy_templates]]` entries win on conflict — same // contract as the Finding shim. diff --git a/src/types.rs b/src/types.rs index 4af95d7..778bd14 100644 --- a/src/types.rs +++ b/src/types.rs @@ -11,15 +11,34 @@ pub enum PolicyEngine { Cedar, } -impl std::fmt::Display for PolicyEngine { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { +impl PolicyEngine { + /// Lowercase canonical identifier used in JSON keys, CLI flags, and + /// log lines. Stable across the public surface — matches the serde + /// representation. + pub fn as_str(&self) -> &'static str { + match self { + PolicyEngine::Rego => "rego", + PolicyEngine::Cedar => "cedar", + } + } + + /// Human-readable name for prose-style output (e.g. "Generated 3 + /// Rego files"). Distinct from [`Self::as_str`] so user-facing + /// messages stay capitalized. + pub fn human_name(&self) -> &'static str { match self { - PolicyEngine::Rego => write!(f, "rego"), - PolicyEngine::Cedar => write!(f, "cedar"), + PolicyEngine::Rego => "Rego", + PolicyEngine::Cedar => "Cedar", } } } +impl std::fmt::Display for PolicyEngine { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(self.as_str()) + } +} + #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct PolicyOutput { pub engine: PolicyEngine, @@ -111,7 +130,17 @@ struct FindingShim { impl From<FindingShim> for Finding { fn from(s: FindingShim) -> Self { - let mut policy_outputs = s.policy_outputs; + // Dedupe explicit entries by engine, keeping the first occurrence. + // Hand-written or mid-migration JSON could carry duplicates; the + // lookup helpers (`policy_output`) silently return the first match, + // so dropping the rest at parse time keeps the in-memory shape + // honest. + let mut policy_outputs: Vec<PolicyOutput> = Vec::with_capacity(s.policy_outputs.len()); + for po in s.policy_outputs { + if !policy_outputs.iter().any(|p| p.engine == po.engine) { + policy_outputs.push(po); + } + } // Fold legacy fields into policy_outputs only when the new field // doesn't already carry an entry for that engine — explicit // `policy_outputs` wins on conflict. From 6627b8016ef7132d353676be8581bdd5216c3ac8 Mon Sep 17 00:00:00 2001 From: Brad Anderson <brad@enforceauth.com> Date: Wed, 6 May 2026 20:40:06 -0400 Subject: [PATCH 6/7] fix: address PR #79 review feedback - src/cedar/grouping.rs, src/rego/grouping.rs: warn (instead of silently skip) when a finding lacks the engine's policy_output, so the pre-fill invariant is observable rather than masked. - src/types.rs: dedupe duplicate engine entries in `set_policy_output` via retain+push so the vector holds at most one PolicyOutput per engine even for in-memory-built Findings. - src/commands/extract.rs: reject existing symlinks at the leaf of `write_policy_file` to close a follow-the-link escape past the canonical-parent containment check. - src/mcp/tools.rs: collapse `suggest_rego` and `validate_rego` into thin aliases that forward to `suggest_policy` / `validate_policy_tool` pinned to Rego, eliminating the parallel dispatch path. Extend `build_template_vars` with `roles_set` / `cedar_roles_set` and CSV normalisation so MCP rule_id-driven suggestions render the same as the structural scanner path for rules like ASP.NET Authorize(Roles="Admin,Manager"). - src/mcp/resources.rs: assert `policy_templates` is a JSON array in `read_rule_returns_definition` to guard against regressions on the renamed field. - src/lib.rs: add the `policy` module to the crate-level Stable public API rustdoc list. - .agents/commands/pr.md: tighten the plan-file matching algorithm (token derivation, no-match / multi-match / no-checklist cases). --- .agents/commands/pr.md | 13 +++--- src/cedar/grouping.rs | 6 +++ src/commands/extract.rs | 11 ++++++ src/lib.rs | 1 + src/mcp/resources.rs | 4 ++ src/mcp/tools.rs | 87 +++++++++++++++++++++-------------------- src/rego/grouping.rs | 6 +++ src/types.rs | 12 +++--- 8 files changed, 86 insertions(+), 54 deletions(-) diff --git a/.agents/commands/pr.md b/.agents/commands/pr.md index aa97720..de4fe97 100644 --- a/.agents/commands/pr.md +++ b/.agents/commands/pr.md @@ -12,11 +12,14 @@ When activated, create a pull request for the current branch: - Run `git log main..HEAD --oneline` to see commits to include 2. **Move completed plan to done** (if applicable): - - Check `plans/todo/` for a plan file related to this branch (match by issue number or feature name) - - If found, check whether all checklist items (`- [ ]`) are complete (`- [x]`) - - If all complete: `mkdir -p plans/done && git mv plans/todo/<file> plans/done/<file>` - - Commit: `git commit -m "docs: move completed plan to plans/done/"` - - If not all complete, leave it in `plans/todo/` + - Extract an issue/feature token from the branch name (e.g., `fix/896-buffer-import` → `896`; otherwise the kebab-case feature slug after the type prefix) + - Search `plans/todo/` for files whose **filename** contains the token, and as a fallback files whose **content** contains it + - If **no match**: skip this step + - If **multiple matches**: list them and skip — require human disambiguation rather than guessing + - If **exactly one match**: inspect its checklist items + - If the file has zero `- [ ]` / `- [x]` items, treat it as "no checklist" and skip the move + - If any `- [ ]` items remain unchecked, leave it in `plans/todo/` + - Only when every checklist item is `- [x]`: `mkdir -p plans/done && git mv plans/todo/<file> plans/done/<file>` and commit `docs: move completed plan to plans/done/` 3. **Push the branch** (if not already pushed): - Run `git push -u origin <branch-name>` diff --git a/src/cedar/grouping.rs b/src/cedar/grouping.rs index 79ac2be..aa87b9b 100644 --- a/src/cedar/grouping.rs +++ b/src/cedar/grouping.rs @@ -136,6 +136,12 @@ fn build_cedar_content(source_file: &Path, findings: &[&Finding]) -> String { // an invariant and skip findings without one rather than silently // synthesizing a default — that would mask a missing pre-fill. let Some(stub) = finding.policy_output(PolicyEngine::Cedar) else { + tracing::warn!( + finding_id = %finding.id, + file = %finding.file.display(), + line = finding.line_start, + "skipping finding without Cedar policy output; header counts will overstate coverage" + ); continue; }; let wrapped = templates::apply_confidence_wrapping(stub, finding.confidence); diff --git a/src/commands/extract.rs b/src/commands/extract.rs index f0f7134..f2e5c07 100644 --- a/src/commands/extract.rs +++ b/src/commands/extract.rs @@ -151,6 +151,17 @@ fn write_policy_file(output_path: &Path, content: &str, canonical_output_dir: &P canonical_output_dir.display() ))); } + // Reject existing symlinks at the leaf — `fs::write` would follow them + // and escape the canonicalised parent containment. Use `symlink_metadata` + // so we inspect the link itself rather than its target. + if let Ok(meta) = std::fs::symlink_metadata(output_path) + && meta.file_type().is_symlink() + { + return Err(ZiftError::General(format!( + "refusing to write through symlink at output path '{}'", + output_path.display() + ))); + } std::fs::write(output_path, content)?; Ok(()) } diff --git a/src/lib.rs b/src/lib.rs index 8dfcf79..b8497f4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,6 +11,7 @@ //! - [`error`] — `ZiftError` and `Result<T>` //! - [`types`] — core data types (`Finding`, `Language`, `AuthCategory`, …) //! - [`rules`] — rule loading (read-only) +//! - [`policy`] — engine-agnostic [`policy::PolicyGenerator`] trait and dispatch //! - [`rego`] — Rego/OPA policy generation; `rego::validator` is the stable surface //! - [`cedar`] — Cedar policy generation; `cedar::validator` is the stable surface //! - [`run`] — binary entry point diff --git a/src/mcp/resources.rs b/src/mcp/resources.rs index 10d6891..7fff147 100644 --- a/src/mcp/resources.rs +++ b/src/mcp/resources.rs @@ -290,6 +290,10 @@ mod tests { let parsed: serde_json::Value = serde_json::from_str(&r.text).unwrap(); assert_eq!(parsed["id"], "ts-role-check-conditional"); assert!(parsed["query"].is_string()); + assert!( + parsed["policy_templates"].is_array(), + "policy_templates must be a JSON array (renamed field regression guard)" + ); } #[test] diff --git a/src/mcp/tools.rs b/src/mcp/tools.rs index 8c75c33..d39fcbd 100644 --- a/src/mcp/tools.rs +++ b/src/mcp/tools.rs @@ -22,8 +22,6 @@ use crate::deep::prompt::{PromptInputs, render}; use crate::mcp::protocol::{ToolDescriptor, ToolsCallResult, ToolsListResult}; use crate::mcp::server::ServerContext; use crate::policy::generator_for; -use crate::rego::templates::{apply_confidence_wrapping, generate_default_stub, render_template}; -use crate::rego::validator::validate_rego; use crate::rules::PatternRule; use crate::scanner; use crate::types::{AuthCategory, Confidence, Finding, Language, PolicyEngine, ScanPass, Surface}; @@ -407,37 +405,16 @@ fn suggest_rego_descriptor() -> ToolDescriptor { } } -#[derive(Debug, Deserialize)] -struct SuggestRegoArgs { - category: AuthCategory, - confidence: Confidence, - code_snippet: String, - #[serde(default)] - rule_id: Option<String>, -} - fn suggest_rego(ctx: &ServerContext, args: &Value) -> Result<Value, String> { - let parsed: SuggestRegoArgs = parse_args(args, "suggest_rego")?; - - let rendered = if let Some(rid) = parsed.rule_id.as_deref() { - let rule = ctx.rules.iter().find(|r| r.id == rid); - match rule.and_then(|r| r.template_for(PolicyEngine::Rego)) { - Some(tmpl) => { - // Render the rule's template with literals extracted from the - // snippet. We use an empty vars map keyed by capture name — - // the model fills these in during deep mode; here we mirror - // the structural-pass behavior for ad-hoc agent calls. - let vars = build_template_vars(&parsed.code_snippet); - render_template(tmpl, &vars) - } - None => generate_default_stub(parsed.category, &parsed.code_snippet), - } - } else { - generate_default_stub(parsed.category, &parsed.code_snippet) - }; - - let wrapped = apply_confidence_wrapping(&rendered, parsed.confidence); - Ok(json!({"rego": wrapped})) + // Thin alias: forward to `suggest_policy` pinned to Rego so the two + // codepaths can't drift. `suggest_policy` already echoes the engine's + // key (`"rego"`) alongside `"policy"`, so existing clients reading + // `payload["rego"]` keep working. + let mut forwarded = args.clone(); + if let Value::Object(map) = &mut forwarded { + map.insert("engine".to_string(), json!("rego")); + } + suggest_policy(ctx, &forwarded) } fn build_template_vars(snippet: &str) -> std::collections::HashMap<String, String> { @@ -474,10 +451,36 @@ fn build_template_vars(snippet: &str) -> std::collections::HashMap<String, Strin ); vars.insert("roles".to_string(), set.clone()); vars.insert("plans".to_string(), set); + + // CSV-normalised item lists for `{{roles_set}}` (Rego) and + // `{{cedar_roles_set}}` (Cedar). Mirrors the scanner path's + // `comma_separated_quoted_items` so rules like ASP.NET + // `Authorize(Roles="Admin,Manager")` render correctly when an + // agent calls `suggest_policy` with a `rule_id` + raw snippet. + let items = csv_normalised_quoted_items(&literals); + vars.insert("roles_set".to_string(), items.clone()); + vars.insert("cedar_roles_set".to_string(), items); } vars } +/// Comma-split each literal, trim, drop empties, quote, and re-join. Matches +/// the scanner's `comma_separated_quoted_items` so the structural and MCP +/// suggest paths produce identical `roles_set` / `cedar_roles_set` output. +fn csv_normalised_quoted_items(literals: &[String]) -> String { + literals + .iter() + .flat_map(|lit| { + lit.split(',') + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(str::to_string) + }) + .map(|s| format!("\"{s}\"")) + .collect::<Vec<_>>() + .join(", ") +} + // -- validate_rego ------------------------------------------------------- fn validate_rego_descriptor() -> ToolDescriptor { @@ -497,18 +500,16 @@ fn validate_rego_descriptor() -> ToolDescriptor { } } -#[derive(Debug, Deserialize)] -struct ValidateRegoArgs { - policy: String, -} - fn validate_rego_tool(args: &Value) -> Result<Value, String> { - let parsed: ValidateRegoArgs = parse_args(args, "validate_rego")?; - let result = validate_rego(&parsed.policy); - Ok(json!({ - "valid": result.valid, - "error": result.error, - })) + // Thin alias: forward to `validate_policy_tool` pinned to Rego. + // `validate_policy_tool` returns `{engine, valid, error}`; the extra + // `engine` field is additive and existing clients reading `valid`/ + // `error` keep working. + let mut forwarded = args.clone(); + if let Value::Object(map) = &mut forwarded { + map.insert("engine".to_string(), json!("rego")); + } + validate_policy_tool(&forwarded) } // -- suggest_policy / validate_policy ----------------------------------- diff --git a/src/rego/grouping.rs b/src/rego/grouping.rs index 1c16140..01bfe1c 100644 --- a/src/rego/grouping.rs +++ b/src/rego/grouping.rs @@ -153,6 +153,12 @@ fn build_rego_content(package_name: &str, source_file: &Path, findings: &[&Findi // an invariant and skip findings without one rather than silently // synthesizing a default — that would mask a missing pre-fill. let Some(stub) = finding.policy_output(PolicyEngine::Rego) else { + tracing::warn!( + finding_id = %finding.id, + file = %finding.file.display(), + line = finding.line_start, + "skipping finding without Rego policy output; header counts will overstate coverage" + ); continue; }; let stub = stub.to_string(); diff --git a/src/types.rs b/src/types.rs index 778bd14..068cf7b 100644 --- a/src/types.rs +++ b/src/types.rs @@ -90,13 +90,13 @@ impl Finding { .map(|p| p.content.as_str()) } - /// Insert or replace the policy output for an engine. + /// Insert or replace the policy output for an engine. Removes any + /// existing entries for the engine first so the vector holds at most + /// one [`PolicyOutput`] per [`PolicyEngine`], even if a Finding was + /// constructed in-memory with duplicates. pub fn set_policy_output(&mut self, engine: PolicyEngine, content: String) { - if let Some(existing) = self.policy_outputs.iter_mut().find(|p| p.engine == engine) { - existing.content = content; - } else { - self.policy_outputs.push(PolicyOutput { engine, content }); - } + self.policy_outputs.retain(|p| p.engine != engine); + self.policy_outputs.push(PolicyOutput { engine, content }); } } From fc3779dd07743519d262f170e865b6487ec41565 Mon Sep 17 00:00:00 2001 From: Brad Anderson <brad@enforceauth.com> Date: Wed, 6 May 2026 20:52:55 -0400 Subject: [PATCH 7/7] docs: clarify /pr token extraction and plan search rules --- .agents/commands/pr.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.agents/commands/pr.md b/.agents/commands/pr.md index de4fe97..6093c57 100644 --- a/.agents/commands/pr.md +++ b/.agents/commands/pr.md @@ -12,8 +12,11 @@ When activated, create a pull request for the current branch: - Run `git log main..HEAD --oneline` to see commits to include 2. **Move completed plan to done** (if applicable): - - Extract an issue/feature token from the branch name (e.g., `fix/896-buffer-import` → `896`; otherwise the kebab-case feature slug after the type prefix) - - Search `plans/todo/` for files whose **filename** contains the token, and as a fallback files whose **content** contains it + - Extract an issue/feature token from the branch name using the first rule that matches: + - If the branch (or the portion after a `type/` prefix) starts with digits, use the leading numeric sequence (e.g., `fix/896-buffer-import` → `896`, `123-foo` → `123`) + - Otherwise, if the branch is `type/slug`, use the full kebab-case slug after the slash (e.g., `feature/add-new-widget` → `add-new-widget`) + - Otherwise, use the full branch name (e.g., `fix-typo` → `fix-typo`) + - Search `plans/todo/` for files whose **filename** contains the token (case-insensitive substring match). Do not fall back to scanning file contents. - If **no match**: skip this step - If **multiple matches**: list them and skip — require human disambiguation rather than guessing - If **exactly one match**: inspect its checklist items