diff --git a/.agents/commands/pr.md b/.agents/commands/pr.md index 488d1e0..6093c57 100644 --- a/.agents/commands/pr.md +++ b/.agents/commands/pr.md @@ -11,22 +11,42 @@ When activated, create a pull request for the current branch: - Ensure we're not on `main` (abort if so) - Run `git log main..HEAD --oneline` to see commits to include -2. **Push the branch** (if not already pushed): +2. **Move completed plan to done** (if applicable): + - Extract an issue/feature token from the branch name using the first rule that matches: + - If the branch (or the portion after a `type/` prefix) starts with digits, use the leading numeric sequence (e.g., `fix/896-buffer-import` → `896`, `123-foo` → `123`) + - Otherwise, if the branch is `type/slug`, use the full kebab-case slug after the slash (e.g., `feature/add-new-widget` → `add-new-widget`) + - Otherwise, use the full branch name (e.g., `fix-typo` → `fix-typo`) + - Search `plans/todo/` for files whose **filename** contains the token (case-insensitive substring match). Do not fall back to scanning file contents. + - If **no match**: skip this step + - If **multiple matches**: list them and skip — require human disambiguation rather than guessing + - If **exactly one match**: inspect its checklist items + - If the file has zero `- [ ]` / `- [x]` items, treat it as "no checklist" and skip the move + - If any `- [ ]` items remain unchecked, leave it in `plans/todo/` + - Only when every checklist item is `- [x]`: `mkdir -p plans/done && git mv plans/todo/ plans/done/` and commit `docs: move completed plan to plans/done/` + +3. **Push the branch** (if not already pushed): - Run `git push -u origin ` -3. **Generate PR title and body**: +4. **Check for related issues**: + - Look at the branch name for issue numbers (e.g., `fix/896-buffer-import` references #896) + - Check commit messages for issue references + - Run `gh issue list --state open --limit 20` to see recent open issues that might be related + - If the PR resolves an issue, note it for the body + +5. **Generate PR title and body**: - Title: Use conventional commit format based on the primary change (e.g., `feat: add tree-sitter Java pattern matching`) - Keep the title under 72 characters - Body should include: - **Summary**: Brief description of what this PR does - **Changes**: Bullet list of key changes - **Testing**: How to test the changes (e.g., `cargo test`, manual CLI usage) + - **Issue references**: Add `Fixes #XXX` or `Closes #XXX` for any issues this PR resolves (these will auto-close the issues when merged) -4. **Create the PR**: +6. **Create the PR**: ```bash gh pr create --title "" --body "<body>" --base main --assignee @me ``` -5. **Report the PR URL** to the user +7. **Report the PR URL** to the user If the user provides arguments (e.g., `/pr "Custom title"`), use that as the PR title instead of generating one. diff --git a/docs/CEDAR_SUPPORT.md b/docs/CEDAR_SUPPORT.md index bbe7a4a..b233103 100644 --- a/docs/CEDAR_SUPPORT.md +++ b/docs/CEDAR_SUPPORT.md @@ -135,3 +135,4 @@ land on its own merits, not on a wait-for-feedback gate. - **2026-05-03** — memo drafted alongside the user-facing rename from "Rego for OPA" to "Policy as Code (PaC)." No code changes proposed yet; doc captures the investigation while context is fresh. - **2026-05-06** — Phase A landed (`feat: add Cedar as a second policy engine`). Phase B's "wait for users" gate and "JSON-schema break needs a major-version bump" gate dropped: the `*_stub` → `policy_outputs` migration is in Phase B's scope and ships with a deserialization shim, so it doesn't need a coordinated version cut. +- **2026-05-06** — Phase B landed (#71). `Finding::rego_stub` / `cedar_stub` collapsed into `policy_outputs: Vec<PolicyOutput>` (legacy fields fold in via a `FindingShim`). `PatternRule::rego_template` / `cedar_template` collapsed into `policy_templates: Vec<PolicyTemplate>` (legacy `[rule.rego_template]` / `[rule.cedar_template]` blocks parse alongside the new `[[rule.policy_templates]]` array). `PolicyGenerator` trait introduced in `src/policy.rs` with `RegoGenerator` / `CedarGenerator` impls; `extract::execute` is now generic over `&dyn PolicyGenerator`, and the MCP `suggest_policy` / `validate_policy` tools dispatch through the trait. JSON output shape changed: `rego_stub` / `cedar_stub` replaced by `policy_outputs`; MCP `get_rule` / `list_rules` now expose `policy_templates` and `template_engines` instead of `rego_template` / `has_rego_template`. diff --git a/src/cedar/generator.rs b/src/cedar/generator.rs new file mode 100644 index 0000000..70c804e --- /dev/null +++ b/src/cedar/generator.rs @@ -0,0 +1,53 @@ +//! [`PolicyGenerator`] implementation for Cedar. Thin adapter over the +//! engine-specific `templates`, `validator`, and `grouping` modules. + +use std::collections::HashMap; +use std::path::Path; + +use crate::policy::{PolicyFile, PolicyGenerator, ValidationResult}; +use crate::types::{AuthCategory, Confidence, Finding, PolicyEngine}; + +pub struct CedarGenerator; + +impl PolicyGenerator for CedarGenerator { + fn engine(&self) -> PolicyEngine { + PolicyEngine::Cedar + } + + fn validate(&self, policy: &str) -> ValidationResult { + let r = super::validator::validate_cedar(policy); + ValidationResult { + valid: r.valid, + error: r.error, + } + } + + fn render_template(&self, template: &str, vars: &HashMap<String, String>) -> String { + super::templates::render_template(template, vars) + } + + fn default_stub(&self, category: AuthCategory, snippet: &str) -> String { + super::templates::generate_default_stub(category, snippet) + } + + fn wrap_by_confidence(&self, body: &str, confidence: Confidence) -> String { + super::templates::apply_confidence_wrapping(body, confidence) + } + + fn group_and_generate( + &self, + findings: &[Finding], + policy_prefix: &str, + output_dir: &Path, + ) -> Vec<PolicyFile> { + super::grouping::group_findings(findings, policy_prefix, output_dir) + .into_iter() + .map(|f| PolicyFile { + label: f.label, + output_path: f.output_path, + content: f.content, + finding_count: f.finding_count, + }) + .collect() + } +} diff --git a/src/cedar/grouping.rs b/src/cedar/grouping.rs index d7ab140..aa87b9b 100644 --- a/src/cedar/grouping.rs +++ b/src/cedar/grouping.rs @@ -6,7 +6,7 @@ use std::collections::BTreeMap; use std::path::{Path, PathBuf}; -use crate::types::Finding; +use crate::types::{Finding, PolicyEngine}; use super::templates; @@ -130,11 +130,21 @@ fn build_cedar_content(source_file: &Path, findings: &[&Finding]) -> String { }; lines.push(format!("// Original: {}", truncated.trim())); - let stub = match &finding.cedar_stub { - Some(s) => s.clone(), - None => templates::generate_default_stub(finding.category, &finding.code_snippet), + // The extract pipeline pre-fills `policy_output(Cedar)` for every + // finding before calling into grouping (see + // `commands::extract::run_extract`), so we treat its presence as + // an invariant and skip findings without one rather than silently + // synthesizing a default — that would mask a missing pre-fill. + let Some(stub) = finding.policy_output(PolicyEngine::Cedar) else { + tracing::warn!( + finding_id = %finding.id, + file = %finding.file.display(), + line = finding.line_start, + "skipping finding without Cedar policy output; header counts will overstate coverage" + ); + continue; }; - let wrapped = templates::apply_confidence_wrapping(&stub, finding.confidence); + let wrapped = templates::apply_confidence_wrapping(stub, finding.confidence); for line in wrapped.lines() { lines.push(line.to_string()); } @@ -149,8 +159,11 @@ mod tests { use super::*; use crate::types::*; + /// Mirrors the pre-fill `commands::extract::run_extract` performs + /// before invoking `group_findings`: every finding lands here with a + /// rendered Cedar stub already attached. fn finding(file: &str, snippet: &str, category: AuthCategory) -> Finding { - Finding { + let mut f = Finding { id: "x".into(), file: PathBuf::from(file), line_start: 10, @@ -161,11 +174,15 @@ mod tests { confidence: Confidence::High, description: "test".into(), pattern_rule: Some("test-rule".into()), - rego_stub: None, - cedar_stub: None, + policy_outputs: vec![], pass: ScanPass::Structural, surface: Surface::Backend, - } + }; + f.set_policy_output( + PolicyEngine::Cedar, + templates::generate_default_stub(f.category, &f.code_snippet), + ); + f } #[test] diff --git a/src/cedar/mod.rs b/src/cedar/mod.rs index 37e46a8..e921c3a 100644 --- a/src/cedar/mod.rs +++ b/src/cedar/mod.rs @@ -1,6 +1,8 @@ +pub mod generator; pub mod grouping; pub mod templates; pub mod validator; +pub use generator::CedarGenerator; pub use grouping::group_findings; pub use templates::render_template; diff --git a/src/cli.rs b/src/cli.rs index ec2784d..21b00e5 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -165,20 +165,7 @@ pub struct ExtractArgs { pub min_confidence: Option<Confidence>, } -#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)] -pub enum PolicyEngine { - Rego, - Cedar, -} - -impl std::fmt::Display for PolicyEngine { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - PolicyEngine::Rego => write!(f, "rego"), - PolicyEngine::Cedar => write!(f, "cedar"), - } - } -} +pub use crate::types::PolicyEngine; // -- Report -- diff --git a/src/commands/extract.rs b/src/commands/extract.rs index 79027c2..f2e5c07 100644 --- a/src/commands/extract.rs +++ b/src/commands/extract.rs @@ -3,9 +3,10 @@ use std::path::Path; use serde::Deserialize; -use crate::cli::{ExtractArgs, PolicyEngine}; +use crate::cli::ExtractArgs; use crate::config::ZiftConfig; use crate::error::{Result, ZiftError}; +use crate::policy::{PolicyGenerator, generator_for}; use crate::types::Finding; /// Minimal struct for deserializing scan output — we only need the findings. @@ -46,92 +47,40 @@ pub fn execute(args: ExtractArgs, config: ZiftConfig) -> Result<()> { return Ok(()); } - match args.engine { - PolicyEngine::Rego => extract_rego(&mut findings, policy_prefix, &output_dir), - PolicyEngine::Cedar => extract_cedar(&mut findings, policy_prefix, &output_dir), - } -} - -fn extract_rego(findings: &mut [Finding], policy_prefix: &str, output_dir: &Path) -> Result<()> { - use crate::rego::{self, templates}; - - for finding in findings.iter_mut() { - if finding.rego_stub.is_none() { - finding.rego_stub = Some(templates::generate_default_stub( - finding.category, - &finding.code_snippet, - )); - } - } - - let rego_files = rego::group_findings(findings, policy_prefix, output_dir); - - // Canonicalize once before writing any files. Each generated file shares - // the same `output_dir`, so canonicalizing per-file inside the loop was - // redundant work and a stray `output_dir` rename mid-loop would break - // anyway. - std::fs::create_dir_all(output_dir)?; - let canonical_output_dir = output_dir.canonicalize().map_err(|e| { - ZiftError::General(format!( - "failed to resolve output dir '{}': {e}", - output_dir.display() - )) - })?; - - let mut total_files = 0; - let mut validation_warnings = 0; - for rego_file in ®o_files { - let validation = rego::validator::validate_rego(®o_file.content); - let status = if validation.valid { "OK" } else { "WARN" }; - - write_policy_file( - ®o_file.output_path, - ®o_file.content, - &canonical_output_dir, - )?; - total_files += 1; - eprintln!( - " [{status}] {} ({} findings) → {}", - rego_file.package_name, - rego_file.finding_count, - rego_file.output_path.display(), - ); - if let Some(err) = validation.error { - eprintln!(" ⚠ Rego parse warning: {err}"); - validation_warnings += 1; - } - } - - eprintln!( - "\nGenerated {total_files} Rego files from {} findings.", - findings.len(), - ); - if validation_warnings > 0 { - eprintln!( - "{validation_warnings} file(s) have Rego syntax warnings — review before deploying.", - ); - } - Ok(()) + run_extract( + generator_for(args.engine), + &mut findings, + policy_prefix, + &output_dir, + ) } -fn extract_cedar(findings: &mut [Finding], policy_prefix: &str, output_dir: &Path) -> Result<()> { - use crate::cedar::{self, templates}; - - // Mirror of the Rego pre-fill: if a finding doesn't carry a - // cedar_stub yet (e.g. it came from an older scan, or its rule has no - // `cedar_template` block), synthesize one from the category default. - // This keeps Cedar coverage at 100% — no rule produces zero output. +/// Engine-agnostic extract pipeline. Pre-fills any missing per-engine +/// policy output on each finding (so Cedar/Rego coverage stays at 100% +/// even when a rule has no template), groups findings into output files, +/// validates each file with the engine's parser, and writes them under a +/// canonicalised `output_dir`. +fn run_extract( + generator: &dyn PolicyGenerator, + findings: &mut [Finding], + policy_prefix: &str, + output_dir: &Path, +) -> Result<()> { + let engine = generator.engine(); + let engine_name = engine.human_name(); for finding in findings.iter_mut() { - if finding.cedar_stub.is_none() { - finding.cedar_stub = Some(templates::generate_default_stub( - finding.category, - &finding.code_snippet, - )); + if finding.policy_output(engine).is_none() { + let stub = generator.default_stub(finding.category, &finding.code_snippet); + finding.set_policy_output(engine, stub); } } - let cedar_files = cedar::group_findings(findings, policy_prefix, output_dir); + let policy_files = generator.group_and_generate(findings, policy_prefix, output_dir); + // Canonicalize once before writing any files. Each generated file + // shares the same `output_dir`, so canonicalizing per-file inside + // the loop was redundant work and a stray `output_dir` rename + // mid-loop would break anyway. std::fs::create_dir_all(output_dir)?; let canonical_output_dir = output_dir.canonicalize().map_err(|e| { ZiftError::General(format!( @@ -142,35 +91,35 @@ fn extract_cedar(findings: &mut [Finding], policy_prefix: &str, output_dir: &Pat let mut total_files = 0; let mut validation_warnings = 0; - for cedar_file in &cedar_files { - let validation = cedar::validator::validate_cedar(&cedar_file.content); + for policy_file in &policy_files { + let validation = generator.validate(&policy_file.content); let status = if validation.valid { "OK" } else { "WARN" }; write_policy_file( - &cedar_file.output_path, - &cedar_file.content, + &policy_file.output_path, + &policy_file.content, &canonical_output_dir, )?; total_files += 1; eprintln!( " [{status}] {} ({} findings) → {}", - cedar_file.label, - cedar_file.finding_count, - cedar_file.output_path.display(), + policy_file.label, + policy_file.finding_count, + policy_file.output_path.display(), ); if let Some(err) = validation.error { - eprintln!(" ⚠ Cedar parse warning: {err}"); + eprintln!(" ⚠ {engine_name} parse warning: {err}"); validation_warnings += 1; } } eprintln!( - "\nGenerated {total_files} Cedar files from {} findings.", + "\nGenerated {total_files} {engine_name} files from {} findings.", findings.len(), ); if validation_warnings > 0 { eprintln!( - "{validation_warnings} file(s) have Cedar syntax warnings — review before deploying.", + "{validation_warnings} file(s) have {engine_name} syntax warnings — review before deploying.", ); } Ok(()) @@ -202,6 +151,17 @@ fn write_policy_file(output_path: &Path, content: &str, canonical_output_dir: &P canonical_output_dir.display() ))); } + // Reject existing symlinks at the leaf — `fs::write` would follow them + // and escape the canonicalised parent containment. Use `symlink_metadata` + // so we inspect the link itself rather than its target. + if let Ok(meta) = std::fs::symlink_metadata(output_path) + && meta.file_type().is_symlink() + { + return Err(ZiftError::General(format!( + "refusing to write through symlink at output path '{}'", + output_path.display() + ))); + } std::fs::write(output_path, content)?; Ok(()) } diff --git a/src/commands/rules.rs b/src/commands/rules.rs index 8d2730d..fce3d19 100644 --- a/src/commands/rules.rs +++ b/src/commands/rules.rs @@ -50,21 +50,25 @@ pub fn execute(args: RulesArgs, config: ZiftConfig) -> Result<()> { } } } - // Validate Rego template if present - if let Some(ref tmpl) = rule.rego_template { - let result = crate::rego::validator::validate_template(tmpl); - if !result.valid { - let err = result.error.unwrap_or_default(); - eprintln!("FAIL {} rego_template: {err}", rule.id); - errors += 1; - } - } - // Validate Cedar template if present - if let Some(ref tmpl) = rule.cedar_template { - let result = crate::cedar::validator::validate_template(tmpl); - if !result.valid { - let err = result.error.unwrap_or_default(); - eprintln!("FAIL {} cedar_template: {err}", rule.id); + // Validate each generated policy template against its + // engine's parser. The template-level validators wrap each + // body in a minimal module before parsing, which is engine + // shape — keep the dispatch local rather than pushing a + // template-flavored variant onto the PolicyGenerator trait. + for tmpl in &rule.policy_templates { + let (valid, error) = match tmpl.engine { + crate::types::PolicyEngine::Rego => { + let r = crate::rego::validator::validate_template(&tmpl.template); + (r.valid, r.error) + } + crate::types::PolicyEngine::Cedar => { + let r = crate::cedar::validator::validate_template(&tmpl.template); + (r.valid, r.error) + } + }; + if !valid { + let err = error.unwrap_or_default(); + eprintln!("FAIL {} {} template: {err}", rule.id, tmpl.engine); errors += 1; } } diff --git a/src/deep/candidate.rs b/src/deep/candidate.rs index 173188f..d9508aa 100644 --- a/src/deep/candidate.rs +++ b/src/deep/candidate.rs @@ -395,8 +395,7 @@ mod tests { confidence, description: String::new(), pattern_rule: None, - rego_stub: None, - cedar_stub: None, + policy_outputs: vec![], pass: ScanPass::Structural, surface: Surface::Backend, } @@ -786,8 +785,7 @@ mod tests { confidence: Confidence::Low, description: "x".into(), pattern_rule: None, - rego_stub: None, - cedar_stub: None, + policy_outputs: vec![], pass: ScanPass::Structural, surface: Surface::Backend, }; diff --git a/src/deep/context.rs b/src/deep/context.rs index 4ac0bf8..cc0ee38 100644 --- a/src/deep/context.rs +++ b/src/deep/context.rs @@ -212,8 +212,7 @@ mod tests { confidence: Confidence::Low, description: String::new(), pattern_rule: None, - rego_stub: None, - cedar_stub: None, + policy_outputs: vec![], pass: ScanPass::Structural, surface: Surface::Backend, } diff --git a/src/deep/finding.rs b/src/deep/finding.rs index beedbea..cd114b3 100644 --- a/src/deep/finding.rs +++ b/src/deep/finding.rs @@ -122,8 +122,8 @@ pub fn into_finding( // `semantic-rbac`) so a consumer grouping by `pattern_rule` sees that // this finding is the model's verdict, not the structural rule's. pattern_rule: Some(rule_id), - rego_stub: None, // structural-only; semantic findings have no rego template - cedar_stub: None, + // Structural-only; semantic findings have no policy templates. + policy_outputs: vec![], pass: ScanPass::Semantic, // Surface follows the source file, not the pass — same path // heuristic as structural findings so a deep-pass `web/src/foo.ts` @@ -244,8 +244,7 @@ mod tests { confidence: Confidence::Low, description: "matched custom rule".into(), pattern_rule: pattern_rule.map(String::from), - rego_stub: None, - cedar_stub: None, + policy_outputs: vec![], pass: ScanPass::Structural, surface: Surface::Backend, } diff --git a/src/deep/merge.rs b/src/deep/merge.rs index d572eb7..8c74ff1 100644 --- a/src/deep/merge.rs +++ b/src/deep/merge.rs @@ -77,8 +77,7 @@ mod tests { confidence, description: String::new(), pattern_rule: None, - rego_stub: None, - cedar_stub: None, + policy_outputs: vec![], pass, surface: Surface::Backend, } diff --git a/src/deep/prompt.rs b/src/deep/prompt.rs index 1fa0eab..5eed017 100644 --- a/src/deep/prompt.rs +++ b/src/deep/prompt.rs @@ -334,8 +334,7 @@ mod tests { confidence: Confidence::Low, description: "matched custom rule".into(), pattern_rule: Some("ts-custom-1".into()), - rego_stub: None, - cedar_stub: None, + policy_outputs: vec![], pass: ScanPass::Structural, surface: Surface::Backend, } diff --git a/src/lib.rs b/src/lib.rs index 5d5f8d2..b8497f4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,6 +11,7 @@ //! - [`error`] — `ZiftError` and `Result<T>` //! - [`types`] — core data types (`Finding`, `Language`, `AuthCategory`, …) //! - [`rules`] — rule loading (read-only) +//! - [`policy`] — engine-agnostic [`policy::PolicyGenerator`] trait and dispatch //! - [`rego`] — Rego/OPA policy generation; `rego::validator` is the stable surface //! - [`cedar`] — Cedar policy generation; `cedar::validator` is the stable surface //! - [`run`] — binary entry point @@ -19,6 +20,7 @@ pub mod cedar; pub mod cli; pub mod error; +pub mod policy; pub mod rego; pub mod rules; pub mod types; diff --git a/src/mcp/resources.rs b/src/mcp/resources.rs index 0a423c9..7fff147 100644 --- a/src/mcp/resources.rs +++ b/src/mcp/resources.rs @@ -105,6 +105,11 @@ pub fn read_resource(ctx: &ServerContext, uri: &str) -> Option<ResourceContent> } fn rule_to_json(rule: &PatternRule) -> serde_json::Value { + let policy_templates: Vec<serde_json::Value> = rule + .policy_templates + .iter() + .map(|t| json!({"engine": t.engine, "template": t.template})) + .collect(); json!({ "id": rule.id, "languages": rule.languages, @@ -112,7 +117,7 @@ fn rule_to_json(rule: &PatternRule) -> serde_json::Value { "confidence": rule.confidence, "description": rule.description, "query": rule.query_source, - "rego_template": rule.rego_template, + "policy_templates": policy_templates, }) } @@ -285,6 +290,10 @@ mod tests { let parsed: serde_json::Value = serde_json::from_str(&r.text).unwrap(); assert_eq!(parsed["id"], "ts-role-check-conditional"); assert!(parsed["query"].is_string()); + assert!( + parsed["policy_templates"].is_array(), + "policy_templates must be a JSON array (renamed field regression guard)" + ); } #[test] diff --git a/src/mcp/tools.rs b/src/mcp/tools.rs index eea8ab4..d39fcbd 100644 --- a/src/mcp/tools.rs +++ b/src/mcp/tools.rs @@ -15,18 +15,16 @@ use std::path::{Path, PathBuf}; use serde::Deserialize; use serde_json::{Value, json}; -use crate::cedar; use crate::cli::ScanArgs; use crate::deep::candidate::{Candidate, CandidateKind}; use crate::deep::context::expand_region; use crate::deep::prompt::{PromptInputs, render}; use crate::mcp::protocol::{ToolDescriptor, ToolsCallResult, ToolsListResult}; use crate::mcp::server::ServerContext; -use crate::rego::templates::{apply_confidence_wrapping, generate_default_stub, render_template}; -use crate::rego::validator::validate_rego; +use crate::policy::generator_for; use crate::rules::PatternRule; use crate::scanner; -use crate::types::{AuthCategory, Confidence, Finding, Language, ScanPass, Surface}; +use crate::types::{AuthCategory, Confidence, Finding, Language, PolicyEngine, ScanPass, Surface}; const DEFAULT_MAX_PROMPT_CHARS: usize = 16_000; @@ -272,14 +270,15 @@ fn list_rules(ctx: &ServerContext, args: &Value) -> Result<Value, String> { } fn rule_summary(r: &PatternRule) -> Value { + let template_engines: Vec<&PolicyEngine> = + r.policy_templates.iter().map(|t| &t.engine).collect(); json!({ "id": r.id, "languages": r.languages, "category": r.category, "confidence": r.confidence, "description": r.description, - "has_rego_template": r.rego_template.is_some(), - "has_cedar_template": r.cedar_template.is_some(), + "template_engines": template_engines, }) } @@ -352,6 +351,12 @@ fn get_rule(ctx: &ServerContext, args: &Value) -> Result<Value, String> { }) .collect(); + let policy_templates: Vec<Value> = rule + .policy_templates + .iter() + .map(|t| json!({"engine": t.engine, "template": t.template})) + .collect(); + Ok(json!({ "id": rule.id, "languages": rule.languages, @@ -362,8 +367,7 @@ fn get_rule(ctx: &ServerContext, args: &Value) -> Result<Value, String> { "query": rule.query_source, "predicates": predicates, "cross_predicates": cross_predicates, - "rego_template": rule.rego_template, - "cedar_template": rule.cedar_template, + "policy_templates": policy_templates, "tests": tests, })) } @@ -374,7 +378,7 @@ fn suggest_rego_descriptor() -> ToolDescriptor { ToolDescriptor { name: "suggest_rego", description: "Suggest a Rego policy stub for a finding. If a rule_id with \ - a rego_template is supplied, the template is rendered against \ + a Rego template is supplied, the template is rendered against \ string literals extracted from the snippet. Otherwise a \ category-default stub is generated. The stub is wrapped in \ confidence-appropriate guidance (commented for low, TODO \ @@ -392,7 +396,7 @@ fn suggest_rego_descriptor() -> ToolDescriptor { "rule_id": { "type": "string", "description": "Optional. If supplied and the rule has a \ - rego_template, that template wins over the default." + Rego template, that template wins over the default." } }, "required": ["category", "confidence", "code_snippet"], @@ -401,37 +405,16 @@ fn suggest_rego_descriptor() -> ToolDescriptor { } } -#[derive(Debug, Deserialize)] -struct SuggestRegoArgs { - category: AuthCategory, - confidence: Confidence, - code_snippet: String, - #[serde(default)] - rule_id: Option<String>, -} - fn suggest_rego(ctx: &ServerContext, args: &Value) -> Result<Value, String> { - let parsed: SuggestRegoArgs = parse_args(args, "suggest_rego")?; - - let rendered = if let Some(rid) = parsed.rule_id.as_deref() { - let rule = ctx.rules.iter().find(|r| r.id == rid); - match rule.and_then(|r| r.rego_template.as_deref()) { - Some(tmpl) => { - // Render the rule's template with literals extracted from the - // snippet. We use an empty vars map keyed by capture name — - // the model fills these in during deep mode; here we mirror - // the structural-pass behavior for ad-hoc agent calls. - let vars = build_template_vars(&parsed.code_snippet); - render_template(tmpl, &vars) - } - None => generate_default_stub(parsed.category, &parsed.code_snippet), - } - } else { - generate_default_stub(parsed.category, &parsed.code_snippet) - }; - - let wrapped = apply_confidence_wrapping(&rendered, parsed.confidence); - Ok(json!({"rego": wrapped})) + // Thin alias: forward to `suggest_policy` pinned to Rego so the two + // codepaths can't drift. `suggest_policy` already echoes the engine's + // key (`"rego"`) alongside `"policy"`, so existing clients reading + // `payload["rego"]` keep working. + let mut forwarded = args.clone(); + if let Value::Object(map) = &mut forwarded { + map.insert("engine".to_string(), json!("rego")); + } + suggest_policy(ctx, &forwarded) } fn build_template_vars(snippet: &str) -> std::collections::HashMap<String, String> { @@ -468,10 +451,36 @@ fn build_template_vars(snippet: &str) -> std::collections::HashMap<String, Strin ); vars.insert("roles".to_string(), set.clone()); vars.insert("plans".to_string(), set); + + // CSV-normalised item lists for `{{roles_set}}` (Rego) and + // `{{cedar_roles_set}}` (Cedar). Mirrors the scanner path's + // `comma_separated_quoted_items` so rules like ASP.NET + // `Authorize(Roles="Admin,Manager")` render correctly when an + // agent calls `suggest_policy` with a `rule_id` + raw snippet. + let items = csv_normalised_quoted_items(&literals); + vars.insert("roles_set".to_string(), items.clone()); + vars.insert("cedar_roles_set".to_string(), items); } vars } +/// Comma-split each literal, trim, drop empties, quote, and re-join. Matches +/// the scanner's `comma_separated_quoted_items` so the structural and MCP +/// suggest paths produce identical `roles_set` / `cedar_roles_set` output. +fn csv_normalised_quoted_items(literals: &[String]) -> String { + literals + .iter() + .flat_map(|lit| { + lit.split(',') + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(str::to_string) + }) + .map(|s| format!("\"{s}\"")) + .collect::<Vec<_>>() + .join(", ") +} + // -- validate_rego ------------------------------------------------------- fn validate_rego_descriptor() -> ToolDescriptor { @@ -491,18 +500,16 @@ fn validate_rego_descriptor() -> ToolDescriptor { } } -#[derive(Debug, Deserialize)] -struct ValidateRegoArgs { - policy: String, -} - fn validate_rego_tool(args: &Value) -> Result<Value, String> { - let parsed: ValidateRegoArgs = parse_args(args, "validate_rego")?; - let result = validate_rego(&parsed.policy); - Ok(json!({ - "valid": result.valid, - "error": result.error, - })) + // Thin alias: forward to `validate_policy_tool` pinned to Rego. + // `validate_policy_tool` returns `{engine, valid, error}`; the extra + // `engine` field is additive and existing clients reading `valid`/ + // `error` keep working. + let mut forwarded = args.clone(); + if let Value::Object(map) = &mut forwarded { + map.insert("engine".to_string(), json!("rego")); + } + validate_policy_tool(&forwarded) } // -- suggest_policy / validate_policy ----------------------------------- @@ -512,13 +519,6 @@ fn validate_rego_tool(args: &Value) -> Result<Value, String> { // learning per-engine tool names. The original Rego tools stay live as // pinned aliases so existing prompts and integrations keep working. -#[derive(Debug, Clone, Copy, Deserialize)] -#[serde(rename_all = "lowercase")] -enum PolicyEngineArg { - Rego, - Cedar, -} - fn suggest_policy_descriptor() -> ToolDescriptor { ToolDescriptor { name: "suggest_policy", @@ -559,7 +559,7 @@ fn suggest_policy_descriptor() -> ToolDescriptor { #[derive(Debug, Deserialize)] struct SuggestPolicyArgs { #[serde(default)] - engine: Option<PolicyEngineArg>, + engine: Option<PolicyEngine>, category: AuthCategory, confidence: Confidence, code_snippet: String, @@ -569,41 +569,25 @@ struct SuggestPolicyArgs { fn suggest_policy(ctx: &ServerContext, args: &Value) -> Result<Value, String> { let parsed: SuggestPolicyArgs = parse_args(args, "suggest_policy")?; - let engine = parsed.engine.unwrap_or(PolicyEngineArg::Rego); + let engine = parsed.engine.unwrap_or(PolicyEngine::Rego); + let generator = generator_for(engine); let rule = parsed .rule_id .as_deref() .and_then(|rid| ctx.rules.iter().find(|r| r.id == rid)); - let rendered = match engine { - PolicyEngineArg::Rego => match rule.and_then(|r| r.rego_template.as_deref()) { - Some(tmpl) => { - let vars = build_template_vars(&parsed.code_snippet); - render_template(tmpl, &vars) - } - None => generate_default_stub(parsed.category, &parsed.code_snippet), - }, - PolicyEngineArg::Cedar => match rule.and_then(|r| r.cedar_template.as_deref()) { - Some(tmpl) => { - let vars = build_template_vars(&parsed.code_snippet); - cedar::render_template(tmpl, &vars) - } - None => cedar::templates::generate_default_stub(parsed.category, &parsed.code_snippet), - }, - }; - - let wrapped = match engine { - PolicyEngineArg::Rego => apply_confidence_wrapping(&rendered, parsed.confidence), - PolicyEngineArg::Cedar => { - cedar::templates::apply_confidence_wrapping(&rendered, parsed.confidence) + let rendered = match rule.and_then(|r| r.template_for(engine)) { + Some(tmpl) => { + let vars = build_template_vars(&parsed.code_snippet); + generator.render_template(tmpl, &vars) } + None => generator.default_stub(parsed.category, &parsed.code_snippet), }; - let key = match engine { - PolicyEngineArg::Rego => "rego", - PolicyEngineArg::Cedar => "cedar", - }; + let wrapped = generator.wrap_by_confidence(&rendered, parsed.confidence); + + let key = engine.as_str(); Ok(json!({ "engine": key, "policy": wrapped, @@ -638,31 +622,18 @@ fn validate_policy_descriptor() -> ToolDescriptor { #[derive(Debug, Deserialize)] struct ValidatePolicyArgs { #[serde(default)] - engine: Option<PolicyEngineArg>, + engine: Option<PolicyEngine>, policy: String, } fn validate_policy_tool(args: &Value) -> Result<Value, String> { let parsed: ValidatePolicyArgs = parse_args(args, "validate_policy")?; - let engine = parsed.engine.unwrap_or(PolicyEngineArg::Rego); - let (valid, error) = match engine { - PolicyEngineArg::Rego => { - let r = validate_rego(&parsed.policy); - (r.valid, r.error) - } - PolicyEngineArg::Cedar => { - let r = cedar::validator::validate_cedar(&parsed.policy); - (r.valid, r.error) - } - }; - let key = match engine { - PolicyEngineArg::Rego => "rego", - PolicyEngineArg::Cedar => "cedar", - }; + let engine = parsed.engine.unwrap_or(PolicyEngine::Rego); + let result = generator_for(engine).validate(&parsed.policy); Ok(json!({ - "engine": key, - "valid": valid, - "error": error, + "engine": engine.as_str(), + "valid": result.valid, + "error": result.error, })) } @@ -774,8 +745,7 @@ fn analyze_snippet(_ctx: &ServerContext, args: &Value) -> Result<Value, String> confidence: s.confidence, description: s.description.clone(), pattern_rule: s.pattern_rule.clone(), - rego_stub: None, - cedar_stub: None, + policy_outputs: vec![], pass: ScanPass::Structural, surface: Surface::classify(&PathBuf::from(&parsed.file)), }); @@ -951,7 +921,12 @@ function check(user: { role: string }) { }; assert_eq!(payload["id"], "ts-role-check-conditional"); assert!(payload["query"].as_str().unwrap().contains("if_statement")); - assert!(payload["rego_template"].is_string()); + // post-#71: templates surface via the `policy_templates` array. + let templates = payload["policy_templates"].as_array().unwrap(); + assert!( + templates.iter().any(|t| t["engine"] == "rego"), + "expected a rego policy template; got: {templates:?}" + ); // cross_predicates is always present (empty array if the rule has none). assert!(payload["cross_predicates"].is_array()); } diff --git a/src/output/json.rs b/src/output/json.rs index 069abeb..af68171 100644 --- a/src/output/json.rs +++ b/src/output/json.rs @@ -126,8 +126,7 @@ mod tests { confidence: Confidence::High, description: "embedded role check".into(), pattern_rule: None, - rego_stub: None, - cedar_stub: None, + policy_outputs: vec![], pass: ScanPass::Structural, surface: Surface::Backend, } diff --git a/src/output/text.rs b/src/output/text.rs index a004df4..914c85a 100644 --- a/src/output/text.rs +++ b/src/output/text.rs @@ -139,8 +139,7 @@ mod tests { confidence, description: "embedded role check".into(), pattern_rule: None, - rego_stub: None, - cedar_stub: None, + policy_outputs: vec![], pass: ScanPass::Structural, surface: Surface::Backend, } diff --git a/src/policy.rs b/src/policy.rs new file mode 100644 index 0000000..f6b3618 --- /dev/null +++ b/src/policy.rs @@ -0,0 +1,84 @@ +//! Engine-agnostic policy generation. The [`PolicyGenerator`] trait +//! collapses the parallel Rego/Cedar pipelines that grew out of Phase A +//! (#27) into a single dispatch surface keyed off [`PolicyEngine`]. +//! +//! Generators are stateless and cheap to construct, so callers obtain one +//! per engine via [`generator_for`] and pass it through as `&dyn +//! PolicyGenerator`. The trait carries the operations every engine needs +//! along the extract and MCP paths: validate a finished policy, render a +//! captured template, generate a category-default stub, wrap a stub for +//! its confidence level, and group findings into per-source-file output +//! units. + +use std::collections::HashMap; +use std::path::{Path, PathBuf}; + +use crate::types::{AuthCategory, Confidence, Finding, PolicyEngine}; + +/// One generated output file produced by a [`PolicyGenerator`]. The +/// `label` is the engine's human-readable identifier — Rego packages use +/// dotted names (`app.api.orders`); Cedar files reuse the same dotted +/// shape purely for log-line consistency since Cedar has no package +/// keyword. The body of the file is `content`; `finding_count` is the +/// number of findings folded into it. +pub struct PolicyFile { + pub label: String, + pub output_path: PathBuf, + pub content: String, + pub finding_count: usize, +} + +/// Result of validating a generated policy string. Engines parse the +/// content with their respective parsers (`regorus` for Rego, `cedar-policy` +/// for Cedar) and report the first parse error, if any. +#[derive(Debug)] +pub struct ValidationResult { + pub valid: bool, + pub error: Option<String>, +} + +pub trait PolicyGenerator { + /// Which engine this generator produces output for. + fn engine(&self) -> PolicyEngine; + + /// Validate a complete policy string. The engine-specific parser is + /// the source of truth — callers don't need to know which one ran. + fn validate(&self, policy: &str) -> ValidationResult; + + /// Render a `{{var}}`-templated policy body against captures. The + /// caller is responsible for adding any engine-specific derived + /// variables (e.g. role-set expansions) before calling. + fn render_template(&self, template: &str, vars: &HashMap<String, String>) -> String; + + /// Generate the category's default stub when a finding has no rule + /// template available. + fn default_stub(&self, category: AuthCategory, snippet: &str) -> String; + + /// Wrap a generated stub in confidence-appropriate guidance — fully + /// commented for `Low`, TODO-prefixed for `Medium`, raw for `High`. + /// The exact comment syntax is engine-specific. + fn wrap_by_confidence(&self, body: &str, confidence: Confidence) -> String; + + /// Group findings into per-source-file output units. The exact + /// per-engine layout (Rego packages vs flat Cedar files) lives in + /// the engine modules. + fn group_and_generate( + &self, + findings: &[Finding], + policy_prefix: &str, + output_dir: &Path, + ) -> Vec<PolicyFile>; +} + +/// Obtain the generator for a policy engine. Generators are zero-sized +/// unit structs, so we hand out `&'static` references to module-level +/// statics and avoid per-call allocation. Dispatch sites stay generic +/// over `&dyn PolicyGenerator`. +pub fn generator_for(engine: PolicyEngine) -> &'static dyn PolicyGenerator { + static REGO: crate::rego::RegoGenerator = crate::rego::RegoGenerator; + static CEDAR: crate::cedar::CedarGenerator = crate::cedar::CedarGenerator; + match engine { + PolicyEngine::Rego => ®O, + PolicyEngine::Cedar => &CEDAR, + } +} diff --git a/src/rego/generator.rs b/src/rego/generator.rs new file mode 100644 index 0000000..aa96fe2 --- /dev/null +++ b/src/rego/generator.rs @@ -0,0 +1,54 @@ +//! [`PolicyGenerator`] implementation for Rego/OPA. Thin adapter over the +//! engine-specific `templates`, `validator`, and `grouping` modules so +//! each layer can stay focused on its own concern. + +use std::collections::HashMap; +use std::path::Path; + +use crate::policy::{PolicyFile, PolicyGenerator, ValidationResult}; +use crate::types::{AuthCategory, Confidence, Finding, PolicyEngine}; + +pub struct RegoGenerator; + +impl PolicyGenerator for RegoGenerator { + fn engine(&self) -> PolicyEngine { + PolicyEngine::Rego + } + + fn validate(&self, policy: &str) -> ValidationResult { + let r = super::validator::validate_rego(policy); + ValidationResult { + valid: r.valid, + error: r.error, + } + } + + fn render_template(&self, template: &str, vars: &HashMap<String, String>) -> String { + super::templates::render_template(template, vars) + } + + fn default_stub(&self, category: AuthCategory, snippet: &str) -> String { + super::templates::generate_default_stub(category, snippet) + } + + fn wrap_by_confidence(&self, body: &str, confidence: Confidence) -> String { + super::templates::apply_confidence_wrapping(body, confidence) + } + + fn group_and_generate( + &self, + findings: &[Finding], + policy_prefix: &str, + output_dir: &Path, + ) -> Vec<PolicyFile> { + super::grouping::group_findings(findings, policy_prefix, output_dir) + .into_iter() + .map(|f| PolicyFile { + label: f.package_name, + output_path: f.output_path, + content: f.content, + finding_count: f.finding_count, + }) + .collect() + } +} diff --git a/src/rego/grouping.rs b/src/rego/grouping.rs index 7625d52..01bfe1c 100644 --- a/src/rego/grouping.rs +++ b/src/rego/grouping.rs @@ -1,7 +1,7 @@ use std::collections::BTreeMap; use std::path::{Path, PathBuf}; -use crate::types::Finding; +use crate::types::{Finding, PolicyEngine}; use super::templates; @@ -147,11 +147,21 @@ fn build_rego_content(package_name: &str, source_file: &Path, findings: &[&Findi }; lines.push(format!("# Original: {}", truncated.trim())); - // Get the Rego stub - let stub = match &finding.rego_stub { - Some(s) => s.clone(), - None => templates::generate_default_stub(finding.category, &finding.code_snippet), + // The extract pipeline pre-fills `policy_output(Rego)` for every + // finding before calling into grouping (see + // `commands::extract::run_extract`), so we treat its presence as + // an invariant and skip findings without one rather than silently + // synthesizing a default — that would mask a missing pre-fill. + let Some(stub) = finding.policy_output(PolicyEngine::Rego) else { + tracing::warn!( + finding_id = %finding.id, + file = %finding.file.display(), + line = finding.line_start, + "skipping finding without Rego policy output; header counts will overstate coverage" + ); + continue; }; + let stub = stub.to_string(); // Apply confidence wrapping let wrapped = templates::apply_confidence_wrapping(&stub, finding.confidence); @@ -219,9 +229,20 @@ mod tests { assert_eq!(path, PathBuf::from("./policies/api/orders.rego")); } + /// Mirrors the pre-fill `commands::extract::run_extract` performs + /// before invoking `group_findings`: every finding lands here with a + /// rendered Rego stub already attached. + fn with_default_stub(mut f: Finding) -> Finding { + f.set_policy_output( + PolicyEngine::Rego, + templates::generate_default_stub(f.category, &f.code_snippet), + ); + f + } + #[test] fn group_findings_single_file() { - let findings = vec![Finding { + let findings = vec![with_default_stub(Finding { id: "abc".into(), file: PathBuf::from("src/api/orders.ts"), line_start: 10, @@ -232,11 +253,10 @@ mod tests { confidence: Confidence::High, description: "test".into(), pattern_rule: Some("test-rule".into()), - rego_stub: None, - cedar_stub: None, + policy_outputs: vec![], pass: ScanPass::Structural, surface: Surface::Backend, - }]; + })]; let files = group_findings(&findings, "app", Path::new("./policies")); assert_eq!(files.len(), 1); @@ -249,7 +269,7 @@ mod tests { #[test] fn group_findings_multiple_files() { let findings = vec![ - Finding { + with_default_stub(Finding { id: "a".into(), file: PathBuf::from("src/api/orders.ts"), line_start: 10, @@ -260,12 +280,11 @@ mod tests { confidence: Confidence::High, description: "test".into(), pattern_rule: None, - rego_stub: None, - cedar_stub: None, + policy_outputs: vec![], pass: ScanPass::Structural, surface: Surface::Backend, - }, - Finding { + }), + with_default_stub(Finding { id: "b".into(), file: PathBuf::from("src/api/users.ts"), line_start: 5, @@ -276,11 +295,10 @@ mod tests { confidence: Confidence::Medium, description: "test".into(), pattern_rule: None, - rego_stub: None, - cedar_stub: None, + policy_outputs: vec![], pass: ScanPass::Structural, surface: Surface::Backend, - }, + }), ]; let files = group_findings(&findings, "app", Path::new("./policies")); diff --git a/src/rego/mod.rs b/src/rego/mod.rs index 37e46a8..9fa9734 100644 --- a/src/rego/mod.rs +++ b/src/rego/mod.rs @@ -1,6 +1,8 @@ +pub mod generator; pub mod grouping; pub mod templates; pub mod validator; +pub use generator::RegoGenerator; pub use grouping::group_findings; pub use templates::render_template; diff --git a/src/rules/mod.rs b/src/rules/mod.rs index 45965bf..15f180a 100644 --- a/src/rules/mod.rs +++ b/src/rules/mod.rs @@ -4,7 +4,7 @@ use std::path::Path; use crate::config::ZiftConfig; use crate::error::{Result, ZiftError}; -use crate::types::{AuthCategory, Confidence, Language}; +use crate::types::{AuthCategory, Confidence, Language, PolicyEngine}; use serde::Deserialize; @@ -19,11 +19,31 @@ pub struct PatternRule { pub query_source: String, pub predicates: Vec<(String, Predicate)>, pub cross_predicates: Vec<CrossPredicate>, - pub rego_template: Option<String>, - pub cedar_template: Option<String>, + /// Generated policy templates, one entry per engine. Replaces the + /// parallel `rego_template` / `cedar_template` fields that existed + /// pre-Phase-B (#71). The TOML parser accepts both the new + /// `[[rule.policy_templates]]` array form and the legacy + /// `[rule.rego_template]` / `[rule.cedar_template]` blocks. + pub policy_templates: Vec<PolicyTemplate>, pub tests: Vec<RuleTest>, } +#[derive(Debug, Clone)] +pub struct PolicyTemplate { + pub engine: PolicyEngine, + pub template: String, +} + +impl PatternRule { + /// Look up the rendered template for a policy engine, if any. + pub fn template_for(&self, engine: PolicyEngine) -> Option<&str> { + self.policy_templates + .iter() + .find(|t| t.engine == engine) + .map(|t| t.template.as_str()) + } +} + #[derive(Debug, Clone)] pub enum Predicate { Match(regex::Regex), @@ -108,12 +128,23 @@ struct RuleToml { predicates: std::collections::HashMap<String, PredicateToml>, #[serde(default)] cross_predicates: Vec<CrossPredicateToml>, + /// Legacy single-engine template blocks. Read-tolerant: parsing folds + /// these into `policy_templates` at the [`PatternRule`] level. New rules + /// should use `[[rule.policy_templates]]` instead. rego_template: Option<RegoTemplateToml>, cedar_template: Option<CedarTemplateToml>, #[serde(default)] + policy_templates: Vec<PolicyTemplateToml>, + #[serde(default)] tests: Vec<RuleTestToml>, } +#[derive(Debug, Deserialize)] +struct PolicyTemplateToml { + engine: PolicyEngine, + template: String, +} + #[derive(Debug, Deserialize)] struct PredicateToml { #[serde(rename = "match")] @@ -260,6 +291,44 @@ fn parse_rule(toml_str: &str, source: &str) -> Result<PatternRule> { cross_predicates.push(parsed); } + // Dedupe by engine, keeping the first occurrence. `template_for` + // returns the first match, so hand-written TOML with two + // `[[rule.policy_templates]]` blocks for the same engine would + // silently use one of them — drop the rest at parse time so the + // in-memory shape can't surprise later code. + let mut policy_templates: Vec<PolicyTemplate> = Vec::with_capacity(r.policy_templates.len()); + for t in r.policy_templates { + if !policy_templates.iter().any(|p| p.engine == t.engine) { + policy_templates.push(PolicyTemplate { + engine: t.engine, + template: t.template, + }); + } + } + // Fold legacy single-engine template blocks into the new collection. + // Explicit `[[rule.policy_templates]]` entries win on conflict — same + // contract as the Finding shim. + if let Some(t) = r.rego_template + && !policy_templates + .iter() + .any(|p| p.engine == PolicyEngine::Rego) + { + policy_templates.push(PolicyTemplate { + engine: PolicyEngine::Rego, + template: t.template, + }); + } + if let Some(t) = r.cedar_template + && !policy_templates + .iter() + .any(|p| p.engine == PolicyEngine::Cedar) + { + policy_templates.push(PolicyTemplate { + engine: PolicyEngine::Cedar, + template: t.template, + }); + } + Ok(PatternRule { id: r.id, languages: r.languages, @@ -270,8 +339,7 @@ fn parse_rule(toml_str: &str, source: &str) -> Result<PatternRule> { query_source: r.query, predicates, cross_predicates, - rego_template: r.rego_template.map(|t| t.template), - cedar_template: r.cedar_template.map(|t| t.template), + policy_templates, tests: r .tests .into_iter() @@ -469,6 +537,70 @@ match = ".*" ); } + #[test] + fn legacy_template_blocks_fold_into_policy_templates() { + // Pre-#71 rules used `[rule.rego_template]` / `[rule.cedar_template]` + // single-engine blocks. The Phase B parser must accept these and + // surface them through `policy_templates` so external rule trees + // keep loading without forced migration. + let legacy = r#" +[rule] +id = "test-legacy-templates" +languages = ["typescript"] +category = "rbac" +confidence = "high" +description = "uses legacy template blocks" +query = """ +(if_statement) @match +""" + +[rule.rego_template] +template = "package x\nallow := true" + +[rule.cedar_template] +template = "permit(principal, action, resource);" +"#; + let rule = parse_rule(legacy, "test").unwrap(); + assert_eq!( + rule.template_for(PolicyEngine::Rego), + Some("package x\nallow := true") + ); + assert_eq!( + rule.template_for(PolicyEngine::Cedar), + Some("permit(principal, action, resource);") + ); + } + + #[test] + fn policy_templates_array_form_parses() { + let new_form = r#" +[rule] +id = "test-new-templates" +languages = ["typescript"] +category = "rbac" +confidence = "high" +description = "uses new policy_templates array" +query = """ +(if_statement) @match +""" + +[[rule.policy_templates]] +engine = "rego" +template = "package x" + +[[rule.policy_templates]] +engine = "cedar" +template = "permit(principal, action, resource);" +"#; + let rule = parse_rule(new_form, "test").unwrap(); + assert_eq!(rule.policy_templates.len(), 2); + assert_eq!(rule.template_for(PolicyEngine::Rego), Some("package x")); + assert_eq!( + rule.template_for(PolicyEngine::Cedar), + Some("permit(principal, action, resource);") + ); + } + #[test] fn merge_overrides_by_id() { let r1 = PatternRule { @@ -481,8 +613,7 @@ match = ".*" query_source: "".into(), predicates: vec![], cross_predicates: vec![], - rego_template: None, - cedar_template: None, + policy_templates: vec![], tests: vec![], }; let r2 = PatternRule { @@ -495,8 +626,7 @@ match = ".*" query_source: "".into(), predicates: vec![], cross_predicates: vec![], - rego_template: None, - cedar_template: None, + policy_templates: vec![], tests: vec![], }; diff --git a/src/scanner/matcher.rs b/src/scanner/matcher.rs index 0f753d7..f1f8d05 100644 --- a/src/scanner/matcher.rs +++ b/src/scanner/matcher.rs @@ -8,7 +8,7 @@ use tree_sitter::{Query, QueryCursor, Tree}; use crate::error::{Result, ZiftError}; use crate::rules::{CrossPredicate, PatternRule, Predicate}; -use crate::types::{Confidence, Finding, Language, ScanPass, Surface}; +use crate::types::{Confidence, Finding, Language, PolicyEngine, PolicyOutput, ScanPass, Surface}; pub struct CompiledRule<'a> { pub rule: &'a PatternRule, @@ -144,6 +144,30 @@ pub fn execute_query( &code_snippet, ); + let mut policy_outputs = Vec::new(); + if let Some(tmpl) = compiled.rule.template_for(PolicyEngine::Rego) { + let mut owned: HashMap<String, String> = captures + .iter() + .map(|(k, v)| (k.to_string(), v.clone())) + .collect(); + add_template_derived_values(&mut owned); + policy_outputs.push(PolicyOutput { + engine: PolicyEngine::Rego, + content: crate::rego::render_template(tmpl, &owned), + }); + } + if let Some(tmpl) = compiled.rule.template_for(PolicyEngine::Cedar) { + let mut owned: HashMap<String, String> = captures + .iter() + .map(|(k, v)| (k.to_string(), v.clone())) + .collect(); + add_cedar_template_derived_values(&mut owned); + policy_outputs.push(PolicyOutput { + engine: PolicyEngine::Cedar, + content: crate::cedar::render_template(tmpl, &owned), + }); + } + findings.push(Finding { id, file: file_path.to_path_buf(), @@ -155,22 +179,7 @@ pub fn execute_query( confidence: compiled.rule.confidence, description: compiled.rule.description.clone(), pattern_rule: Some(compiled.rule.id.clone()), - rego_stub: compiled.rule.rego_template.as_ref().map(|tmpl| { - let mut owned: HashMap<String, String> = captures - .iter() - .map(|(k, v)| (k.to_string(), v.clone())) - .collect(); - add_template_derived_values(&mut owned); - crate::rego::render_template(tmpl, &owned) - }), - cedar_stub: compiled.rule.cedar_template.as_ref().map(|tmpl| { - let mut owned: HashMap<String, String> = captures - .iter() - .map(|(k, v)| (k.to_string(), v.clone())) - .collect(); - add_cedar_template_derived_values(&mut owned); - crate::cedar::render_template(tmpl, &owned) - }), + policy_outputs, pass: ScanPass::Structural, surface: Surface::classify(file_path), }); @@ -400,7 +409,7 @@ public IActionResult Delete(int id) => Ok();"#, ); assert_eq!(findings.len(), 1); - let cedar = findings[0].cedar_stub.as_deref().unwrap(); + let cedar = findings[0].policy_output(PolicyEngine::Cedar).unwrap(); assert!( cedar.contains(r#"principal.role in ["Admin", "Manager"]"#), "cedar should split ASP.NET comma-separated roles into a set; got: {cedar}" @@ -416,7 +425,7 @@ public IActionResult Delete(int id) => Ok();"#, ); assert_eq!(findings.len(), 1); - let rego = findings[0].rego_stub.as_deref().unwrap(); + let rego = findings[0].policy_output(PolicyEngine::Rego).unwrap(); assert!( rego.contains(r#"input.user.role in {"Admin", "Manager"}"#), "rego should split ASP.NET comma-separated roles; got: {rego}" @@ -433,7 +442,7 @@ public IActionResult Delete(int id) => Ok();"#, ); assert_eq!(findings.len(), 1); - let rego = findings[0].rego_stub.as_deref().unwrap(); + let rego = findings[0].policy_output(PolicyEngine::Rego).unwrap(); assert!( rego.contains(r#"input.user.claims["scope"] == "users.delete""#), "rego should require the claim value; got: {rego}" @@ -449,7 +458,7 @@ public IActionResult Reports() => Ok();"#, ); assert_eq!(findings.len(), 1); - let rego = findings[0].rego_stub.as_deref().unwrap(); + let rego = findings[0].policy_output(PolicyEngine::Rego).unwrap(); assert!( rego.contains(r#"input.policy == "CanReadReports""#), "rego should include the shorthand policy name; got: {rego}" @@ -492,12 +501,12 @@ public IActionResult Reports() => Ok();"#, assert_eq!(findings.len(), 1); assert_eq!(findings[0].category, crate::types::AuthCategory::Rbac); - let rego = findings[0].rego_stub.as_deref().unwrap(); + let rego = findings[0].policy_output(PolicyEngine::Rego).unwrap(); assert!( rego.contains(r#""manager" in input.user.roles"#), "rego should use a role membership check; got: {rego}" ); - let cedar = findings[0].cedar_stub.as_deref().unwrap(); + let cedar = findings[0].policy_output(PolicyEngine::Cedar).unwrap(); assert!( cedar.contains(r#"principal.roles.contains("manager")"#), "cedar should use a role membership check; got: {cedar}" @@ -1418,8 +1427,7 @@ match = ".*" confidence: Confidence::High, description: "test".into(), pattern_rule: None, - rego_stub: None, - cedar_stub: None, + policy_outputs: vec![], pass: ScanPass::Structural, surface: Surface::Backend, }; diff --git a/src/types.rs b/src/types.rs index 8609e66..068cf7b 100644 --- a/src/types.rs +++ b/src/types.rs @@ -4,7 +4,49 @@ use std::sync::OnceLock; use regex::Regex; use serde::{Deserialize, Serialize}; +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, clap::ValueEnum)] +#[serde(rename_all = "lowercase")] +pub enum PolicyEngine { + Rego, + Cedar, +} + +impl PolicyEngine { + /// Lowercase canonical identifier used in JSON keys, CLI flags, and + /// log lines. Stable across the public surface — matches the serde + /// representation. + pub fn as_str(&self) -> &'static str { + match self { + PolicyEngine::Rego => "rego", + PolicyEngine::Cedar => "cedar", + } + } + + /// Human-readable name for prose-style output (e.g. "Generated 3 + /// Rego files"). Distinct from [`Self::as_str`] so user-facing + /// messages stay capitalized. + pub fn human_name(&self) -> &'static str { + match self { + PolicyEngine::Rego => "Rego", + PolicyEngine::Cedar => "Cedar", + } + } +} + +impl std::fmt::Display for PolicyEngine { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(self.as_str()) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct PolicyOutput { + pub engine: PolicyEngine, + pub content: String, +} + #[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(from = "FindingShim")] pub struct Finding { pub id: String, pub file: PathBuf, @@ -16,14 +58,14 @@ pub struct Finding { pub confidence: Confidence, pub description: String, pub pattern_rule: Option<String>, - pub rego_stub: Option<String>, - /// Cedar policy stub. Populated when `--engine cedar` is used during - /// extract, or when a deep-mode response carries one. Kept parallel to - /// `rego_stub` (rather than collapsed into a `policy_outputs` collection) - /// so persisted findings JSON stays backward-compatible — older consumers - /// see an extra optional field they can ignore. + /// Generated policy stubs, one per engine. Replaces the parallel + /// `rego_stub`/`cedar_stub` fields that existed pre-Phase-B (#71). + /// Deserialization is tolerant of legacy findings: a `FindingShim` + /// reads `rego_stub` / `cedar_stub` if present and folds them into + /// `policy_outputs` so persisted findings files keep loading. + /// Serialization writes only `policy_outputs`. #[serde(default)] - pub cedar_stub: Option<String>, + pub policy_outputs: Vec<PolicyOutput>, pub pass: ScanPass, /// Where in a typical app this finding lives — frontend (UI/client) or /// backend (server/API). Inferred from the file path via simple @@ -39,6 +81,107 @@ pub struct Finding { pub surface: Surface, } +impl Finding { + /// Look up the generated policy content for an engine, if any. + pub fn policy_output(&self, engine: PolicyEngine) -> Option<&str> { + self.policy_outputs + .iter() + .find(|p| p.engine == engine) + .map(|p| p.content.as_str()) + } + + /// Insert or replace the policy output for an engine. Removes any + /// existing entries for the engine first so the vector holds at most + /// one [`PolicyOutput`] per [`PolicyEngine`], even if a Finding was + /// constructed in-memory with duplicates. + pub fn set_policy_output(&mut self, engine: PolicyEngine, content: String) { + self.policy_outputs.retain(|p| p.engine != engine); + self.policy_outputs.push(PolicyOutput { engine, content }); + } +} + +/// Deserialization shim for [`Finding`]. Accepts both the current +/// `policy_outputs` array and the legacy parallel `rego_stub` / `cedar_stub` +/// fields so existing persisted findings JSON keeps loading after the Phase B +/// refactor (#71). On the read side legacy fields are folded into +/// `policy_outputs`; the serialize side never emits them. +#[derive(Deserialize)] +struct FindingShim { + id: String, + file: PathBuf, + line_start: usize, + line_end: usize, + code_snippet: String, + language: Language, + category: AuthCategory, + confidence: Confidence, + description: String, + pattern_rule: Option<String>, + #[serde(default)] + rego_stub: Option<String>, + #[serde(default)] + cedar_stub: Option<String>, + #[serde(default)] + policy_outputs: Vec<PolicyOutput>, + pass: ScanPass, + #[serde(default)] + surface: Surface, +} + +impl From<FindingShim> for Finding { + fn from(s: FindingShim) -> Self { + // Dedupe explicit entries by engine, keeping the first occurrence. + // Hand-written or mid-migration JSON could carry duplicates; the + // lookup helpers (`policy_output`) silently return the first match, + // so dropping the rest at parse time keeps the in-memory shape + // honest. + let mut policy_outputs: Vec<PolicyOutput> = Vec::with_capacity(s.policy_outputs.len()); + for po in s.policy_outputs { + if !policy_outputs.iter().any(|p| p.engine == po.engine) { + policy_outputs.push(po); + } + } + // Fold legacy fields into policy_outputs only when the new field + // doesn't already carry an entry for that engine — explicit + // `policy_outputs` wins on conflict. + if let Some(content) = s.rego_stub + && !policy_outputs + .iter() + .any(|p| p.engine == PolicyEngine::Rego) + { + policy_outputs.push(PolicyOutput { + engine: PolicyEngine::Rego, + content, + }); + } + if let Some(content) = s.cedar_stub + && !policy_outputs + .iter() + .any(|p| p.engine == PolicyEngine::Cedar) + { + policy_outputs.push(PolicyOutput { + engine: PolicyEngine::Cedar, + content, + }); + } + Finding { + id: s.id, + file: s.file, + line_start: s.line_start, + line_end: s.line_end, + code_snippet: s.code_snippet, + language: s.language, + category: s.category, + confidence: s.confidence, + description: s.description, + pattern_rule: s.pattern_rule, + policy_outputs, + pass: s.pass, + surface: s.surface, + } + } +} + #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default, clap::ValueEnum)] #[serde(rename_all = "lowercase")] pub enum Surface { @@ -389,4 +532,89 @@ mod tests { let f: Finding = serde_json::from_str(no_surface).unwrap(); assert_eq!(f.surface, Surface::Backend); } + + #[test] + fn legacy_stub_fields_fold_into_policy_outputs() { + // Pre-#71 findings JSON used parallel `rego_stub` and `cedar_stub` + // fields. The Phase B shim must accept both shapes and surface them + // through `policy_outputs` so consumers loading old findings files + // see the generated policies in the new place. + let legacy = r#"{ + "id": "x", + "file": "a.ts", + "line_start": 1, + "line_end": 1, + "code_snippet": "", + "language": "typescript", + "category": "rbac", + "confidence": "low", + "description": "", + "pattern_rule": null, + "rego_stub": "package x\nallow := true", + "cedar_stub": "permit(principal, action, resource);", + "pass": "structural" + }"#; + let f: Finding = serde_json::from_str(legacy).unwrap(); + assert_eq!( + f.policy_output(PolicyEngine::Rego), + Some("package x\nallow := true") + ); + assert_eq!( + f.policy_output(PolicyEngine::Cedar), + Some("permit(principal, action, resource);") + ); + } + + #[test] + fn explicit_policy_outputs_wins_over_legacy_fields() { + // If a producer wrote both shapes (e.g. mid-migration), the explicit + // `policy_outputs` entry takes precedence — matches the From impl + // contract that the new field is authoritative. + let mixed = r#"{ + "id": "x", + "file": "a.ts", + "line_start": 1, + "line_end": 1, + "code_snippet": "", + "language": "typescript", + "category": "rbac", + "confidence": "low", + "description": "", + "pattern_rule": null, + "rego_stub": "legacy", + "policy_outputs": [{"engine": "rego", "content": "current"}], + "pass": "structural" + }"#; + let f: Finding = serde_json::from_str(mixed).unwrap(); + assert_eq!(f.policy_output(PolicyEngine::Rego), Some("current")); + } + + #[test] + fn finding_serializes_only_policy_outputs() { + // After the refactor we never write `rego_stub` / `cedar_stub` on + // serialize — only the `policy_outputs` array. Existing downstream + // consumers reading the legacy keys are documented to need an + // update (see the changelog for #71). + let f: Finding = serde_json::from_str( + r#"{ + "id": "x", + "file": "a.ts", + "line_start": 1, + "line_end": 1, + "code_snippet": "", + "language": "typescript", + "category": "rbac", + "confidence": "low", + "description": "", + "pattern_rule": null, + "rego_stub": "package x", + "pass": "structural" + }"#, + ) + .unwrap(); + let json = serde_json::to_string(&f).unwrap(); + assert!(!json.contains("rego_stub")); + assert!(!json.contains("cedar_stub")); + assert!(json.contains("policy_outputs")); + } } diff --git a/tests/deep_http_integration.rs b/tests/deep_http_integration.rs index 59679d7..3f0354c 100644 --- a/tests/deep_http_integration.rs +++ b/tests/deep_http_integration.rs @@ -477,8 +477,7 @@ fn structural_finding(file: &str, line: usize) -> Finding { confidence: Confidence::Low, description: "matched custom rule".into(), pattern_rule: Some("ts-custom".into()), - rego_stub: None, - cedar_stub: None, + policy_outputs: vec![], pass: ScanPass::Structural, surface: Surface::Backend, } diff --git a/tests/deep_subprocess_integration.rs b/tests/deep_subprocess_integration.rs index 462417f..fa78401 100644 --- a/tests/deep_subprocess_integration.rs +++ b/tests/deep_subprocess_integration.rs @@ -57,8 +57,7 @@ fn structural_finding(file: &str, line: usize) -> Finding { confidence: Confidence::Low, description: "matched custom rule".into(), pattern_rule: Some("ts-custom".into()), - rego_stub: None, - cedar_stub: None, + policy_outputs: vec![], pass: ScanPass::Structural, surface: Surface::Backend, }